check-http/server.py

import re
import time
import asyncio
import aiodns
import aiohttp
import validators
import socket

from sanic import Sanic
from sanic.log import logger
from sanic.response import html, json as json_response
from sanic.exceptions import InvalidUsage

app = Sanic()

# keep that in memory
RATE_LIMIT_DB = {}

# to prevent DDoS or bounce attack attempt or something like that
RATE_LIMIT_SECONDS = 5


def clear_rate_limit_db(now):
    to_delete = []

    "Remove too old rate limit values"
    for key, value in RATE_LIMIT_DB.items():
        if now - value > RATE_LIMIT_SECONDS:
            # a dictionnary can't be modified during iteration so delegate this
            # operation
            to_delete.append(key)

    for key in to_delete:
        del RATE_LIMIT_DB[key]


def check_rate_limit(key, now):

    if key in RATE_LIMIT_DB:
        since_last_attempt = now - RATE_LIMIT_DB[key]
        if since_last_attempt < RATE_LIMIT_SECONDS:
            logger.info(f"Rate limit reached for {key}, can retry in {int(RATE_LIMIT_SECONDS - since_last_attempt)} seconds")
            return json_response({
                "status": "error",
                "code": "error_rate_limit",
                "content": f"Rate limit reached for this domain or ip, retry in {int(RATE_LIMIT_SECONDS - since_last_attempt)} seconds",
            }, status=400)

    RATE_LIMIT_DB[key] = time.time()


async def check_port_is_open(ip, port):

    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sock.settimeout(2)
    result = sock.connect_ex((ip, port))
    return result == 0


# FIXME : remove it ? not used anymore...
async def query_dns(host, dns_entry_type):
    loop = asyncio.get_event_loop()
    dns_resolver = aiodns.DNSResolver(loop=loop)

    try:
        return await dns_resolver.query(host, dns_entry_type)
    except aiodns.error.DNSError:
        return []
    except Exception:
        import traceback
        traceback.print_exc()
        logger.error("Unhandled error while resolving DNS entry")


@app.route("/check-http/", methods=["POST"])
async def check_http(request):
    """
    This function received an HTTP request from a YunoHost instance while this
    server is hosted on our infrastructure. The request is expected to be a
    POST request with a body like {"domain": "domain-to-check.tld",
                                   "nonce": "1234567890abcdef" }

    The nonce value is a single-use ID, and we will try to reach
    http://domain.tld/.well-known/ynh-{nonce} which should return 200 if we
    are indeed reaching the right server.

    The general workflow is the following:

    - grab the ip from the request
    - check for ip based rate limit (see RATE_LIMIT_SECONDS value)
    - get json from body and domain from it
    - check for domain based rate limit (see RATE_LIMIT_SECONDS value)
    - check domain is in valid format
    - try to do an http request on the ip (using the domain as target host) for the page /.well-known/ynh-diagnosis/{nonce}
    - answer saying if the domain can be reached
    """

    # this is supposed to be a fast operation if run often enough
    now = time.time()
    clear_rate_limit_db(now)

    # ############################################# #
    #  Validate request and extract the parameters  #
    # ############################################# #

    ip = request.ip

    check_rate_limit_ip = check_rate_limit(ip, now)
    if check_rate_limit_ip:
        return check_rate_limit_ip

    try:
        data = request.json
    except InvalidUsage:
        logger.info(f"Invalid json in request, body is: {request.body}")
        return json_response({
            "status": "error",
            "code": "error_bad_json",
            "content": "Invalid usage, body isn't proper json",
        }, status=400)

    if not data or "domain" not in data or "nonce" not in data:
        logger.info(f"Invalid request: didn't specified a domain and a nonce id (body is: {request.body}")
        return json_response({
            "status": "error",
            "code": "error_no_domain_",
            "content": "Request must specify a domain and a nonce",
        }, status=400)

    domain = data["domain"]

    # Since now we are only checking the IP itself, it seems
    # unecessary to also have a rate limit on domains since the
    # rate limit on IP will be hit first ...
    # That would simplify some code, for example we could add the
    # rate limit check in a decorator for each route/check
    check_rate_limit_domain = check_rate_limit(domain, now)
    if check_rate_limit_domain:
        return check_rate_limit_domain

    if not validators.domain(domain):
        logger.info(f"Invalid request, is not in the right format (domain is: {domain})")
        return json_response({
            "status": "error",
            "code": "error_domain_bad_format",
            "content": "domain is not in the right format (do not include http:// or https://)",
        }, status=400)

    nonce = data["nonce"]

    # nonce id is arbitrarily defined to be a
    # 16-digit hexadecimal string
    if not re.match(r"^[a-f0-9]{16}$", nonce):
        logger.info(f"Invalid request, is not in the right format (nonce is: {nonce})")
        return json_response({
            "status": "error",
            "code": "error_nonce_bad_format",
            "content": "nonce is not in the right format (it should be a 16-digit hexadecimal string)",
        }, status=400)

    # ############################################# #
    #  Run the actual check                         #
    # ############################################# #

    async with aiohttp.ClientSession() as session:
        try:
            url = "http://" + ip + "/.well-known/ynh-diagnosis/" + nonce
            async with session.get(url,
                                   headers={"Host": domain},
                                   timeout=aiohttp.ClientTimeout(total=30)) as response:
                # XXX in the futur try to do a double check with the server to
                # see if the correct content is get
                await response.text()
                assert response.status == 200
                logger.info(f"Success when checking http access for {domain} asked by {ip}")
        # TODO various kind of errors
        except aiohttp.client_exceptions.ClientConnectorError:
            return json_response({
                "status": "error",
                "code": "error_http_check_connection_error",
                "content": "Connection error: could not connect to the requested domain, it's very likely unreachable",
            }, status=418)
        except Exception:
            import traceback
            traceback.print_exc()

            return json_response({
                "status": "error",
                "code": "error_http_check_unknown_error",
                "content": "An error happened while trying to reach your domain, it's very likely unreachable",
            }, status=400)

    return json_response({"status": "ok"})


@app.route("/check-ports/", methods=["POST"])
async def check_ports(request):
    """
    This function received an HTTP request from a YunoHost instance while this
    server is hosted on our infrastructure. The request is expected to be a
    POST request with a body like {"ports": [80,443,22,25]}

    The general workflow is the following:

    - grab the ip from the request
    - check for ip based rate limit (see RATE_LIMIT_SECONDS value)
    - get json from body and ports list from it
    - check ports are opened or closed
    - answer the list of opened / closed ports
    """

    # this is supposed to be a fast operation if run often enough
    now = time.time()
    clear_rate_limit_db(now)

    # ############################################# #
    #  Validate request and extract the parameters  #
    # ############################################# #

    ip = request.ip

    check_rate_limit_ip = check_rate_limit(ip, now)
    if check_rate_limit_ip:
        return check_rate_limit_ip

    try:
        data = request.json
    except InvalidUsage:
        logger.info(f"Invalid json in request, body is: {request.body}")
        return json_response({
            "status": "error",
            "code": "error_bad_json",
            "content": "Invalid usage: body isn't proper json",
        }, status=400)

    def is_port_number(p):
        return isinstance(p, int) and p > 0 and p < 65535

    # Check "ports" exist in request and is a list of port
    if not data or "ports" not in data:
        logger.info(f"Invalid request didn't specified a ports list (body is: {request.body}")
        return json_response({
            "status": "error",
            "code": "error_no_ports_list",
            "content": "Request must specify a list of ports to check",
        }, status=400)
    elif not isinstance(data["ports"], list) or any(not is_port_number(p) for p in data["ports"]) or len(data["ports"]) > 30 or data["ports"] == []:
        logger.info(f"Invalid request, ports list is not an actual list of ports, or is too long: {request.body}")
        return json_response({
            "status": "error",
            "code": "error_invalid_ports_list",
            "content": "This is not an acceptable port list: ports must be between 0 and 65535 and at most 30 ports can be checked",
        }, status=400)

    ports = set(data["ports"])  # Keep only a set so that we get unique ports

    # ############################################# #
    #  Run the actual check                         #
    # ############################################# #

    result = {}
    for port in ports:
        result[port] = await check_port_is_open(ip, port)

    return json_response({"status": "ok", "ports": result})


@app.route("/check-smtp/", methods=["POST"])
async def check_smtp(request):

    # TODO

    return json_reponse({"status": "error",
                         "code": "error_not_implemented_yet",
                         "content": "This is not yet implemented"})


@app.route("/")
async def main(request):
    return html("You aren't really supposed to use this website using your browser.<br><br>It's a small server with an API to check if a services running on YunoHost instance can be reached from 'the global internet'.")


if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7000)
Implement nonce mechanism 2019-07-29 21:17:34 +02:00			`import re`
[enh] implement rate limit and finish project 2019-01-19 09:58:39 +01:00			`import time`
[enh] do ipv4 dns check 2019-01-19 08:47:56 +01:00			`import asyncio`
			`import aiodns`
[enh] first working version 2019-01-19 07:54:11 +01:00			`import aiohttp`
[enh] check domain validity 2019-01-19 07:58:37 +01:00			`import validators`
Add API to check ports 2019-07-29 22:23:14 +02:00			`import socket`
[enh] progress 2019-01-19 07:38:13 +01:00
init 2019-01-19 07:25:23 +01:00			`from sanic import Sanic`
[enh] first working version 2019-01-19 07:54:11 +01:00			`from sanic.log import logger`
[enh] progress 2019-01-19 07:38:13 +01:00			`from sanic.response import html, json as json_response`
			`from sanic.exceptions import InvalidUsage`
init 2019-01-19 07:25:23 +01:00
			`app = Sanic()`

[enh] implement rate limit and finish project 2019-01-19 09:58:39 +01:00			`# keep that in memory`
			`RATE_LIMIT_DB = {}`

			`# to prevent DDoS or bounce attack attempt or something like that`
			`RATE_LIMIT_SECONDS = 5`


			`def clear_rate_limit_db(now):`
			`to_delete = []`

			`"Remove too old rate limit values"`
			`for key, value in RATE_LIMIT_DB.items():`
			`if now - value > RATE_LIMIT_SECONDS:`
			`# a dictionnary can't be modified during iteration so delegate this`
			`# operation`
			`to_delete.append(key)`

			`for key in to_delete:`
			`del RATE_LIMIT_DB[key]`

init 2019-01-19 07:25:23 +01:00
Factorize rate limit check 2019-07-25 11:07:39 +02:00			`def check_rate_limit(key, now):`

			`if key in RATE_LIMIT_DB:`
			`since_last_attempt = now - RATE_LIMIT_DB[key]`
			`if since_last_attempt < RATE_LIMIT_SECONDS:`
			`logger.info(f"Rate limit reached for {key}, can retry in {int(RATE_LIMIT_SECONDS - since_last_attempt)} seconds")`
			`return json_response({`
			`"status": "error",`
			`"code": "error_rate_limit",`
			`"content": f"Rate limit reached for this domain or ip, retry in {int(RATE_LIMIT_SECONDS - since_last_attempt)} seconds",`
			`}, status=400)`

			`RATE_LIMIT_DB[key] = time.time()`


Add API to check ports 2019-07-29 22:23:14 +02:00			`async def check_port_is_open(ip, port):`

			`sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)`
			`sock.settimeout(2)`
			`result = sock.connect_ex((ip, port))`
			`return result == 0`


Misc fixes / updates for messages 2019-07-29 22:54:58 +02:00			`# FIXME : remove it ? not used anymore...`
[enh] handle ipv6 dns check 2019-01-19 09:19:06 +01:00			`async def query_dns(host, dns_entry_type):`
			`loop = asyncio.get_event_loop()`
			`dns_resolver = aiodns.DNSResolver(loop=loop)`

			`try:`
			`return await dns_resolver.query(host, dns_entry_type)`
			`except aiodns.error.DNSError:`
			`return []`
			`except Exception:`
			`import traceback`
			`traceback.print_exc()`
			`logger.error("Unhandled error while resolving DNS entry")`


Add API to check ports 2019-07-29 22:23:14 +02:00			`@app.route("/check-http/", methods=["POST"])`
[enh] progress 2019-01-19 07:38:13 +01:00			`async def check_http(request):`
[doc] add some doc about the general workflow 2019-01-19 13:10:39 +01:00			`"""`
			`This function received an HTTP request from a YunoHost instance while this`
Implement nonce mechanism 2019-07-29 21:17:34 +02:00			`server is hosted on our infrastructure. The request is expected to be a`
			`POST request with a body like {"domain": "domain-to-check.tld",`
			`"nonce": "1234567890abcdef" }`

			`The nonce value is a single-use ID, and we will try to reach`
			`http://domain.tld/.well-known/ynh-{nonce} which should return 200 if we`
			`are indeed reaching the right server.`
[doc] add some doc about the general workflow 2019-01-19 13:10:39 +01:00
			`The general workflow is the following:`

			`- grab the ip from the request`
			`- check for ip based rate limit (see RATE_LIMIT_SECONDS value)`
			`- get json from body and domain from it`
			`- check for domain based rate limit (see RATE_LIMIT_SECONDS value)`
			`- check domain is in valid format`
Implement nonce mechanism 2019-07-29 21:17:34 +02:00			`- try to do an http request on the ip (using the domain as target host) for the page /.well-known/ynh-diagnosis/{nonce}`
[doc] add some doc about the general workflow 2019-01-19 13:10:39 +01:00			`- answer saying if the domain can be reached`
			`"""`

Factorize rate limit check 2019-07-25 11:07:39 +02:00			`# this is supposed to be a fast operation if run often enough`
[enh] implement rate limit and finish project 2019-01-19 09:58:39 +01:00			`now = time.time()`
			`clear_rate_limit_db(now)`

Implement nonce mechanism 2019-07-29 21:17:34 +02:00			`# ############################################# #`
			`# Validate request and extract the parameters #`
			`# ############################################# #`

[enh] progress 2019-01-19 07:38:13 +01:00			`ip = request.ip`

Factorize rate limit check 2019-07-25 11:07:39 +02:00			`check_rate_limit_ip = check_rate_limit(ip, now)`
			`if check_rate_limit_ip:`
			`return check_rate_limit_ip`
[enh] implement rate limit and finish project 2019-01-19 09:58:39 +01:00
[enh] progress 2019-01-19 07:38:13 +01:00			`try:`
			`data = request.json`
			`except InvalidUsage:`
Misc fixes / updates for messages 2019-07-29 22:54:58 +02:00			`logger.info(f"Invalid json in request, body is: {request.body}")`
[enh] progress 2019-01-19 07:38:13 +01:00			`return json_response({`
			`"status": "error",`
[enh] add error codes 2019-01-19 08:08:36 +01:00			`"code": "error_bad_json",`
Factorize rate limit check 2019-07-25 11:07:39 +02:00			`"content": "Invalid usage, body isn't proper json",`
[enh] uses http code 400 in case of bad request 2019-01-19 10:05:06 +01:00			`}, status=400)`
[enh] progress 2019-01-19 07:38:13 +01:00
Implement nonce mechanism 2019-07-29 21:17:34 +02:00			`if not data or "domain" not in data or "nonce" not in data:`
Misc fixes / updates for messages 2019-07-29 22:54:58 +02:00			`logger.info(f"Invalid request: didn't specified a domain and a nonce id (body is: {request.body}")`
[enh] add error codes 2019-01-19 08:08:36 +01:00			`return json_response({`
			`"status": "error",`
Misc fixes / updates for messages 2019-07-29 22:54:58 +02:00			`"code": "error_no_domain_",`
Implement nonce mechanism 2019-07-29 21:17:34 +02:00			`"content": "Request must specify a domain and a nonce",`
[enh] uses http code 400 in case of bad request 2019-01-19 10:05:06 +01:00			`}, status=400)`
[enh] progress 2019-01-19 07:38:13 +01:00
			`domain = data["domain"]`

Misc fixes / updates for messages 2019-07-29 22:54:58 +02:00			`# Since now we are only checking the IP itself, it seems`
			`# unecessary to also have a rate limit on domains since the`
			`# rate limit on IP will be hit first ...`
			`# That would simplify some code, for example we could add the`
			`# rate limit check in a decorator for each route/check`
Factorize rate limit check 2019-07-25 11:07:39 +02:00			`check_rate_limit_domain = check_rate_limit(domain, now)`
			`if check_rate_limit_domain:`
			`return check_rate_limit_domain`
[enh] implement rate limit and finish project 2019-01-19 09:58:39 +01:00
[enh] check domain validity 2019-01-19 07:58:37 +01:00			`if not validators.domain(domain):`
Misc fixes / updates for messages 2019-07-29 22:54:58 +02:00			`logger.info(f"Invalid request, is not in the right format (domain is: {domain})")`
[enh] add error codes 2019-01-19 08:08:36 +01:00			`return json_response({`
			`"status": "error",`
			`"code": "error_domain_bad_format",`
			`"content": "domain is not in the right format (do not include http:// or https://)",`
[enh] uses http code 400 in case of bad request 2019-01-19 10:05:06 +01:00			`}, status=400)`
[enh] check domain validity 2019-01-19 07:58:37 +01:00
Implement nonce mechanism 2019-07-29 21:17:34 +02:00			`nonce = data["nonce"]`

			`# nonce id is arbitrarily defined to be a`
			`# 16-digit hexadecimal string`
			`if not re.match(r"^[a-f0-9]{16}$", nonce):`
Misc fixes / updates for messages 2019-07-29 22:54:58 +02:00			`logger.info(f"Invalid request, is not in the right format (nonce is: {nonce})")`
Implement nonce mechanism 2019-07-29 21:17:34 +02:00			`return json_response({`
			`"status": "error",`
			`"code": "error_nonce_bad_format",`
			`"content": "nonce is not in the right format (it should be a 16-digit hexadecimal string)",`
			`}, status=400)`

			`# ############################################# #`
			`# Run the actual check #`
			`# ############################################# #`

[enh] first working version 2019-01-19 07:54:11 +01:00			`async with aiohttp.ClientSession() as session:`
			`try:`
Implement nonce mechanism 2019-07-29 21:17:34 +02:00			`url = "http://" + ip + "/.well-known/ynh-diagnosis/" + nonce`
			`async with session.get(url,`
Imho we shouldn't enforce this check on the DNS ... we may want to check the http reachability independently of the DNS setup so that we're more able to pinpoint what's the actual issue (also it may lead to many false-negatives due to DNS caching) 2019-07-25 11:23:47 +02:00			`headers={"Host": domain},`
			`timeout=aiohttp.ClientTimeout(total=30)) as response:`
[enh] first working version 2019-01-19 07:54:11 +01:00			`# XXX in the futur try to do a double check with the server to`
			`# see if the correct content is get`
			`await response.text()`
Implement nonce mechanism 2019-07-29 21:17:34 +02:00			`assert response.status == 200`
[enh] first working version 2019-01-19 07:54:11 +01:00			`logger.info(f"Success when checking http access for {domain} asked by {ip}")`
			`# TODO various kind of errors`
			`except aiohttp.client_exceptions.ClientConnectorError:`
[enh] add error codes 2019-01-19 08:08:36 +01:00			`return json_response({`
			`"status": "error",`
			`"code": "error_http_check_connection_error",`
Misc fixes / updates for messages 2019-07-29 22:54:58 +02:00			`"content": "Connection error: could not connect to the requested domain, it's very likely unreachable",`
[mod] custom code when test failure is due to a known error 2019-01-22 02:40:16 +01:00			`}, status=418)`
[enh] first working version 2019-01-19 07:54:11 +01:00			`except Exception:`
			`import traceback`
			`traceback.print_exc()`

[enh] add error codes 2019-01-19 08:08:36 +01:00			`return json_response({`
			`"status": "error",`
			`"code": "error_http_check_unknown_error",`
Misc fixes / updates for messages 2019-07-29 22:54:58 +02:00			`"content": "An error happened while trying to reach your domain, it's very likely unreachable",`
[enh] uses http code 400 in case of bad request 2019-01-19 10:05:06 +01:00			`}, status=400)`
[enh] first working version 2019-01-19 07:54:11 +01:00
[enh] progress 2019-01-19 07:38:13 +01:00			`return json_response({"status": "ok"})`


Add API to check ports 2019-07-29 22:23:14 +02:00			`@app.route("/check-ports/", methods=["POST"])`
			`async def check_ports(request):`
			`"""`
			`This function received an HTTP request from a YunoHost instance while this`
			`server is hosted on our infrastructure. The request is expected to be a`
			`POST request with a body like {"ports": [80,443,22,25]}`

			`The general workflow is the following:`

			`- grab the ip from the request`
			`- check for ip based rate limit (see RATE_LIMIT_SECONDS value)`
			`- get json from body and ports list from it`
			`- check ports are opened or closed`
			`- answer the list of opened / closed ports`
			`"""`

			`# this is supposed to be a fast operation if run often enough`
			`now = time.time()`
			`clear_rate_limit_db(now)`

			`# ############################################# #`
			`# Validate request and extract the parameters #`
			`# ############################################# #`

			`ip = request.ip`

			`check_rate_limit_ip = check_rate_limit(ip, now)`
			`if check_rate_limit_ip:`
			`return check_rate_limit_ip`

			`try:`
			`data = request.json`
			`except InvalidUsage:`
Misc fixes / updates for messages 2019-07-29 22:54:58 +02:00			`logger.info(f"Invalid json in request, body is: {request.body}")`
Add API to check ports 2019-07-29 22:23:14 +02:00			`return json_response({`
			`"status": "error",`
			`"code": "error_bad_json",`
Misc fixes / updates for messages 2019-07-29 22:54:58 +02:00			`"content": "Invalid usage: body isn't proper json",`
Add API to check ports 2019-07-29 22:23:14 +02:00			`}, status=400)`

			`def is_port_number(p):`
			`return isinstance(p, int) and p > 0 and p < 65535`

			`# Check "ports" exist in request and is a list of port`
			`if not data or "ports" not in data:`
Misc fixes / updates for messages 2019-07-29 22:54:58 +02:00			`logger.info(f"Invalid request didn't specified a ports list (body is: {request.body}")`
Add API to check ports 2019-07-29 22:23:14 +02:00			`return json_response({`
			`"status": "error",`
			`"code": "error_no_ports_list",`
			`"content": "Request must specify a list of ports to check",`
			`}, status=400)`
			`elif not isinstance(data["ports"], list) or any(not is_port_number(p) for p in data["ports"]) or len(data["ports"]) > 30 or data["ports"] == []:`
Misc fixes / updates for messages 2019-07-29 22:54:58 +02:00			`logger.info(f"Invalid request, ports list is not an actual list of ports, or is too long: {request.body}")`
Add API to check ports 2019-07-29 22:23:14 +02:00			`return json_response({`
			`"status": "error",`
			`"code": "error_invalid_ports_list",`
Misc fixes / updates for messages 2019-07-29 22:54:58 +02:00			`"content": "This is not an acceptable port list: ports must be between 0 and 65535 and at most 30 ports can be checked",`
Add API to check ports 2019-07-29 22:23:14 +02:00			`}, status=400)`

			`ports = set(data["ports"]) # Keep only a set so that we get unique ports`

			`# ############################################# #`
			`# Run the actual check #`
			`# ############################################# #`

			`result = {}`
			`for port in ports:`
			`result[port] = await check_port_is_open(ip, port)`

			`return json_response({"status": "ok", "ports": result})`


Add skeleton for check-smtp 2019-07-29 22:34:46 +02:00			`@app.route("/check-smtp/", methods=["POST"])`
			`async def check_smtp(request):`

			`# TODO`

			`return json_reponse({"status": "error",`
			`"code": "error_not_implemented_yet",`
			`"content": "This is not yet implemented"})`


init 2019-01-19 07:25:23 +01:00			`@app.route("/")`
[enh] progress 2019-01-19 07:38:13 +01:00			`async def main(request):`
Misc fixes / updates for messages 2019-07-29 22:54:58 +02:00			`return html("You aren't really supposed to use this website using your browser.<br><br>It's a small server with an API to check if a services running on YunoHost instance can be reached from 'the global internet'.")`
init 2019-01-19 07:25:23 +01:00

			`if __name__ == "__main__":`
			`app.run(host="0.0.0.0", port=7000)`