From dea05903188639fcc5d2899d386ba24357d19991 Mon Sep 17 00:00:00 2001 From: Alexandre Aubin Date: Thu, 25 Jul 2019 11:07:39 +0200 Subject: [PATCH 01/14] Factorize rate limit check --- server.py | 51 +++++++++++++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/server.py b/server.py index cc35674..734da35 100644 --- a/server.py +++ b/server.py @@ -32,6 +32,21 @@ def clear_rate_limit_db(now): del RATE_LIMIT_DB[key] +def check_rate_limit(key, now): + + if key in RATE_LIMIT_DB: + since_last_attempt = now - RATE_LIMIT_DB[key] + if since_last_attempt < RATE_LIMIT_SECONDS: + logger.info(f"Rate limit reached for {key}, can retry in {int(RATE_LIMIT_SECONDS - since_last_attempt)} seconds") + return json_response({ + "status": "error", + "code": "error_rate_limit", + "content": f"Rate limit reached for this domain or ip, retry in {int(RATE_LIMIT_SECONDS - since_last_attempt)} seconds", + }, status=400) + + RATE_LIMIT_DB[key] = time.time() + + async def query_dns(host, dns_entry_type): loop = asyncio.get_event_loop() dns_resolver = aiodns.DNSResolver(loop=loop) @@ -65,32 +80,24 @@ async def check_http(request): - answer saying if the domain can be reached """ - # this is supposed to be a fast operation if run enough + # this is supposed to be a fast operation if run often enough now = time.time() clear_rate_limit_db(now) ip = request.ip - if ip in RATE_LIMIT_DB: - since_last_attempt = now - RATE_LIMIT_DB[ip] - if since_last_attempt < RATE_LIMIT_SECONDS: - logger.info(f"Rate limite {ip}, can retry in {int(RATE_LIMIT_SECONDS - since_last_attempt)} seconds") - return json_response({ - "status": "error", - "code": "error_rate_limit", - "content": f"Rate limit on ip, retry in {int(RATE_LIMIT_SECONDS - since_last_attempt)} seconds", - }, status=400) - - RATE_LIMIT_DB[ip] = time.time() + check_rate_limit_ip = check_rate_limit(ip, now) + if check_rate_limit_ip: + return check_rate_limit_ip try: data = request.json except InvalidUsage: - logger.info(f"Unvalid json in request, body is : {request.body}") + logger.info(f"Invalid json in request, body is : {request.body}") return json_response({ "status": "error", "code": "error_bad_json", - "content": "InvalidUsage, body isn't proper json", + "content": "Invalid usage, body isn't proper json", }, status=400) if not data or "domain" not in data: @@ -98,22 +105,14 @@ async def check_http(request): return json_response({ "status": "error", "code": "error_no_domain", - "content": "request must specify a domain", + "content": "Request must specify a domain", }, status=400) domain = data["domain"] - if domain in RATE_LIMIT_DB: - since_last_attempt = now - RATE_LIMIT_DB[domain] - if since_last_attempt < RATE_LIMIT_SECONDS: - logger.info(f"Rate limite {domain}, can retry in {int(RATE_LIMIT_SECONDS - since_last_attempt)} seconds") - return json_response({ - "status": "error", - "code": "error_rate_limit", - "content": f"Rate limit on domain, retry in {int(RATE_LIMIT_SECONDS - since_last_attempt)} seconds", - }, status=400) - - RATE_LIMIT_DB[domain] = time.time() + check_rate_limit_domain = check_rate_limit(domain, now) + if check_rate_limit_domain: + return check_rate_limit_domain if not validators.domain(domain): logger.info(f"Invalid request, is not in the right format (domain is : {domain})") From b593e6ba77811a00d5c4cc287e0f931ac19536cf Mon Sep 17 00:00:00 2001 From: Alexandre Aubin Date: Thu, 25 Jul 2019 11:23:47 +0200 Subject: [PATCH 02/14] Imho we shouldn't enforce this check on the DNS ... we may want to check the http reachability independently of the DNS setup so that we're more able to pinpoint what's the actual issue (also it may lead to many false-negatives due to DNS caching) --- server.py | 54 ++++-------------------------------------------------- 1 file changed, 4 insertions(+), 50 deletions(-) diff --git a/server.py b/server.py index 734da35..ebd621f 100644 --- a/server.py +++ b/server.py @@ -75,8 +75,7 @@ async def check_http(request): - get json from body and domain from it - check for domain based rate limit (see RATE_LIMIT_SECONDS value) - check domain is in valid format - - check dns entry for domain match the ip of the request (advanced rule for ipv6) - - everything is checked, now try to do an http request on the domain + - now try to do an http request on the ip using the domain as target host - answer saying if the domain can be reached """ @@ -122,56 +121,11 @@ async def check_http(request): "content": "domain is not in the right format (do not include http:// or https://)", }, status=400) - # TODO handle ipv6 - # ipv6 situation - if ":" in ip: - dns_entry = await query_dns(domain, "AAAA") - - if not dns_entry: - # check if entry in ip4 for custom error - dns_entry = await query_dns(domain, "A") - - # there is an ipv4 entry but the request is made in ipv6, ask to uses ipv4 instead - if dns_entry: - logger.info(f"[ipv6] Invalid request, no AAAA DNS entry for domain {domain} BUT ipv4 entry, ask user to request in ipv4") - return json_response({ - "status": "error", - "code": "error_no_ipv6_dns_entry_but_ipv4_dns_entry", - "content": f"there is not AAAA (ipv6) DNS entry for domain {domain} BUT there is an entry in ipv4, please redo the request in ipv4", - }, status=400) - - else: - logger.info(f"[ipv6] Invalid request, no DNS entry for domain {domain} (both in ipv6 and ip4)") - return json_response({ - "status": "error", - "code": "error_no_ipv4_ipv6_dns_entry_for_domain", - "content": f"there is not A (ipv4) and AAAA (ipv6) DNS entry for domain {domain}", - }, status=400) - # ipv4 situation - else: - dns_entry = await query_dns(domain, "A") - - if not dns_entry: - logger.info(f"[ipv4] Invalid request, no DNS entry for domain {domain}") - return json_response({ - "status": "error", - "code": "error_no_ipv4_dns_entry_for_domain", - "content": f"there is not A (ipv4) and AAAA (ipv6) DNS entry for domain {domain}", - }, status=400) - - dns_entry = dns_entry[0] - - if dns_entry.host != ip: - logger.info(f"Invalid request, A DNS entry {dns_entry.host} for domain {domain} doesn't match request ip {ip}") - return json_response({ - "status": "error", - "code": "error_dns_entry_doesnt_match_request_ip", - "content": f"error, the request is made from the ip {ip} but the dns entry said {domain} has the ip {dns_entry.host}, you can only check a domain configured for your ip", - }, status=400) - async with aiohttp.ClientSession() as session: try: - async with session.get("http://" + domain, timeout=aiohttp.ClientTimeout(total=30)) as response: + async with session.get("http://" + ip, + headers={"Host": domain}, + timeout=aiohttp.ClientTimeout(total=30)) as response: # XXX in the futur try to do a double check with the server to # see if the correct content is get await response.text() From 9a68ffabc1073f799527536d0af41ab819aa92ff Mon Sep 17 00:00:00 2001 From: Alexandre Aubin Date: Mon, 29 Jul 2019 21:17:34 +0200 Subject: [PATCH 03/14] Implement nonce mechanism --- server.py | 42 +++++++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/server.py b/server.py index ebd621f..e4aa235 100644 --- a/server.py +++ b/server.py @@ -1,3 +1,4 @@ +import re import time import asyncio import aiodns @@ -65,8 +66,13 @@ async def query_dns(host, dns_entry_type): async def check_http(request): """ This function received an HTTP request from a YunoHost instance while this - server is hosted on our infrastructure. The expected request body is: - {"domain": "domain-to-check.tld"} and the method POST + server is hosted on our infrastructure. The request is expected to be a + POST request with a body like {"domain": "domain-to-check.tld", + "nonce": "1234567890abcdef" } + + The nonce value is a single-use ID, and we will try to reach + http://domain.tld/.well-known/ynh-{nonce} which should return 200 if we + are indeed reaching the right server. The general workflow is the following: @@ -75,7 +81,7 @@ async def check_http(request): - get json from body and domain from it - check for domain based rate limit (see RATE_LIMIT_SECONDS value) - check domain is in valid format - - now try to do an http request on the ip using the domain as target host + - try to do an http request on the ip (using the domain as target host) for the page /.well-known/ynh-diagnosis/{nonce} - answer saying if the domain can be reached """ @@ -83,6 +89,10 @@ async def check_http(request): now = time.time() clear_rate_limit_db(now) + # ############################################# # + # Validate request and extract the parameters # + # ############################################# # + ip = request.ip check_rate_limit_ip = check_rate_limit(ip, now) @@ -99,12 +109,12 @@ async def check_http(request): "content": "Invalid usage, body isn't proper json", }, status=400) - if not data or "domain" not in data: - logger.info(f"Unvalid request didn't specified a domain (body is : {request.body}") + if not data or "domain" not in data or "nonce" not in data: + logger.info(f"Unvalid request didn't specified a domain and a nonce id (body is : {request.body}") return json_response({ "status": "error", "code": "error_no_domain", - "content": "Request must specify a domain", + "content": "Request must specify a domain and a nonce", }, status=400) domain = data["domain"] @@ -121,14 +131,32 @@ async def check_http(request): "content": "domain is not in the right format (do not include http:// or https://)", }, status=400) + nonce = data["nonce"] + + # nonce id is arbitrarily defined to be a + # 16-digit hexadecimal string + if not re.match(r"^[a-f0-9]{16}$", nonce): + logger.info(f"Invalid request, is not in the right format (nonce is : {nonce})") + return json_response({ + "status": "error", + "code": "error_nonce_bad_format", + "content": "nonce is not in the right format (it should be a 16-digit hexadecimal string)", + }, status=400) + + # ############################################# # + # Run the actual check # + # ############################################# # + async with aiohttp.ClientSession() as session: try: - async with session.get("http://" + ip, + url = "http://" + ip + "/.well-known/ynh-diagnosis/" + nonce + async with session.get(url, headers={"Host": domain}, timeout=aiohttp.ClientTimeout(total=30)) as response: # XXX in the futur try to do a double check with the server to # see if the correct content is get await response.text() + assert response.status == 200 logger.info(f"Success when checking http access for {domain} asked by {ip}") # TODO various kind of errors except aiohttp.client_exceptions.ClientConnectorError: From faa81c1ee364e9e41895fe166b5aa15a1b5dc2a3 Mon Sep 17 00:00:00 2001 From: Alexandre Aubin Date: Mon, 29 Jul 2019 22:23:14 +0200 Subject: [PATCH 04/14] Add API to check ports --- server.py | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-) diff --git a/server.py b/server.py index e4aa235..13ad57d 100644 --- a/server.py +++ b/server.py @@ -4,6 +4,7 @@ import asyncio import aiodns import aiohttp import validators +import socket from sanic import Sanic from sanic.log import logger @@ -48,6 +49,14 @@ def check_rate_limit(key, now): RATE_LIMIT_DB[key] = time.time() +async def check_port_is_open(ip, port): + + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.settimeout(2) + result = sock.connect_ex((ip, port)) + return result == 0 + + async def query_dns(host, dns_entry_type): loop = asyncio.get_event_loop() dns_resolver = aiodns.DNSResolver(loop=loop) @@ -62,7 +71,7 @@ async def query_dns(host, dns_entry_type): logger.error("Unhandled error while resolving DNS entry") -@app.route("/check/", methods=["POST"]) +@app.route("/check-http/", methods=["POST"]) async def check_http(request): """ This function received an HTTP request from a YunoHost instance while this @@ -178,6 +187,78 @@ async def check_http(request): return json_response({"status": "ok"}) +@app.route("/check-ports/", methods=["POST"]) +async def check_ports(request): + """ + This function received an HTTP request from a YunoHost instance while this + server is hosted on our infrastructure. The request is expected to be a + POST request with a body like {"ports": [80,443,22,25]} + + The general workflow is the following: + + - grab the ip from the request + - check for ip based rate limit (see RATE_LIMIT_SECONDS value) + - get json from body and ports list from it + - check ports are opened or closed + - answer the list of opened / closed ports + """ + + # this is supposed to be a fast operation if run often enough + now = time.time() + clear_rate_limit_db(now) + + # ############################################# # + # Validate request and extract the parameters # + # ############################################# # + + ip = request.ip + + check_rate_limit_ip = check_rate_limit(ip, now) + if check_rate_limit_ip: + return check_rate_limit_ip + + try: + data = request.json + except InvalidUsage: + logger.info(f"Invalid json in request, body is : {request.body}") + return json_response({ + "status": "error", + "code": "error_bad_json", + "content": "Invalid usage, body isn't proper json", + }, status=400) + + def is_port_number(p): + return isinstance(p, int) and p > 0 and p < 65535 + + # Check "ports" exist in request and is a list of port + if not data or "ports" not in data: + logger.info(f"Unvalid request didn't specified a ports list (body is : {request.body}") + return json_response({ + "status": "error", + "code": "error_no_ports_list", + "content": "Request must specify a list of ports to check", + }, status=400) + elif not isinstance(data["ports"], list) or any(not is_port_number(p) for p in data["ports"]) or len(data["ports"]) > 30 or data["ports"] == []: + logger.info(f"Invalid request, ports list is not an actual list of ports, or is too long : {request.body}") + return json_response({ + "status": "error", + "code": "error_invalid_ports_list", + "content": "This is not an acceptable port list : ports must be between 0 and 65535 and at most 30 ports can be checked", + }, status=400) + + ports = set(data["ports"]) # Keep only a set so that we get unique ports + + # ############################################# # + # Run the actual check # + # ############################################# # + + result = {} + for port in ports: + result[port] = await check_port_is_open(ip, port) + + return json_response({"status": "ok", "ports": result}) + + @app.route("/") async def main(request): return html("You aren't really supposed to use this website using your browser.

It's a small server to check if a YunoHost instance can be reached by http before trying to instal a LE certificate.") From d23290299ec7d49c6658005c3cd98d35c5a58349 Mon Sep 17 00:00:00 2001 From: Alexandre Aubin Date: Mon, 29 Jul 2019 22:34:46 +0200 Subject: [PATCH 05/14] Add skeleton for check-smtp --- server.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/server.py b/server.py index 13ad57d..ae864b5 100644 --- a/server.py +++ b/server.py @@ -259,6 +259,16 @@ async def check_ports(request): return json_response({"status": "ok", "ports": result}) +@app.route("/check-smtp/", methods=["POST"]) +async def check_smtp(request): + + # TODO + + return json_reponse({"status": "error", + "code": "error_not_implemented_yet", + "content": "This is not yet implemented"}) + + @app.route("/") async def main(request): return html("You aren't really supposed to use this website using your browser.

It's a small server to check if a YunoHost instance can be reached by http before trying to instal a LE certificate.") From 1445a12ab6fe8e3227a574ed3698745ee15aaf07 Mon Sep 17 00:00:00 2001 From: Alexandre Aubin Date: Mon, 29 Jul 2019 22:54:58 +0200 Subject: [PATCH 06/14] Misc fixes / updates for messages --- server.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/server.py b/server.py index ae864b5..bc89a05 100644 --- a/server.py +++ b/server.py @@ -57,6 +57,7 @@ async def check_port_is_open(ip, port): return result == 0 +# FIXME : remove it ? not used anymore... async def query_dns(host, dns_entry_type): loop = asyncio.get_event_loop() dns_resolver = aiodns.DNSResolver(loop=loop) @@ -111,7 +112,7 @@ async def check_http(request): try: data = request.json except InvalidUsage: - logger.info(f"Invalid json in request, body is : {request.body}") + logger.info(f"Invalid json in request, body is: {request.body}") return json_response({ "status": "error", "code": "error_bad_json", @@ -119,21 +120,26 @@ async def check_http(request): }, status=400) if not data or "domain" not in data or "nonce" not in data: - logger.info(f"Unvalid request didn't specified a domain and a nonce id (body is : {request.body}") + logger.info(f"Invalid request: didn't specified a domain and a nonce id (body is: {request.body}") return json_response({ "status": "error", - "code": "error_no_domain", + "code": "error_no_domain_", "content": "Request must specify a domain and a nonce", }, status=400) domain = data["domain"] + # Since now we are only checking the IP itself, it seems + # unecessary to also have a rate limit on domains since the + # rate limit on IP will be hit first ... + # That would simplify some code, for example we could add the + # rate limit check in a decorator for each route/check check_rate_limit_domain = check_rate_limit(domain, now) if check_rate_limit_domain: return check_rate_limit_domain if not validators.domain(domain): - logger.info(f"Invalid request, is not in the right format (domain is : {domain})") + logger.info(f"Invalid request, is not in the right format (domain is: {domain})") return json_response({ "status": "error", "code": "error_domain_bad_format", @@ -145,7 +151,7 @@ async def check_http(request): # nonce id is arbitrarily defined to be a # 16-digit hexadecimal string if not re.match(r"^[a-f0-9]{16}$", nonce): - logger.info(f"Invalid request, is not in the right format (nonce is : {nonce})") + logger.info(f"Invalid request, is not in the right format (nonce is: {nonce})") return json_response({ "status": "error", "code": "error_nonce_bad_format", @@ -172,7 +178,7 @@ async def check_http(request): return json_response({ "status": "error", "code": "error_http_check_connection_error", - "content": "connection error, could not connect to the requested domain, it's very likely unreachable", + "content": "Connection error: could not connect to the requested domain, it's very likely unreachable", }, status=418) except Exception: import traceback @@ -181,7 +187,7 @@ async def check_http(request): return json_response({ "status": "error", "code": "error_http_check_unknown_error", - "content": "an error happen while trying to get your domain, it's very likely unreachable", + "content": "An error happened while trying to reach your domain, it's very likely unreachable", }, status=400) return json_response({"status": "ok"}) @@ -220,11 +226,11 @@ async def check_ports(request): try: data = request.json except InvalidUsage: - logger.info(f"Invalid json in request, body is : {request.body}") + logger.info(f"Invalid json in request, body is: {request.body}") return json_response({ "status": "error", "code": "error_bad_json", - "content": "Invalid usage, body isn't proper json", + "content": "Invalid usage: body isn't proper json", }, status=400) def is_port_number(p): @@ -232,18 +238,18 @@ async def check_ports(request): # Check "ports" exist in request and is a list of port if not data or "ports" not in data: - logger.info(f"Unvalid request didn't specified a ports list (body is : {request.body}") + logger.info(f"Invalid request didn't specified a ports list (body is: {request.body}") return json_response({ "status": "error", "code": "error_no_ports_list", "content": "Request must specify a list of ports to check", }, status=400) elif not isinstance(data["ports"], list) or any(not is_port_number(p) for p in data["ports"]) or len(data["ports"]) > 30 or data["ports"] == []: - logger.info(f"Invalid request, ports list is not an actual list of ports, or is too long : {request.body}") + logger.info(f"Invalid request, ports list is not an actual list of ports, or is too long: {request.body}") return json_response({ "status": "error", "code": "error_invalid_ports_list", - "content": "This is not an acceptable port list : ports must be between 0 and 65535 and at most 30 ports can be checked", + "content": "This is not an acceptable port list: ports must be between 0 and 65535 and at most 30 ports can be checked", }, status=400) ports = set(data["ports"]) # Keep only a set so that we get unique ports @@ -271,7 +277,7 @@ async def check_smtp(request): @app.route("/") async def main(request): - return html("You aren't really supposed to use this website using your browser.

It's a small server to check if a YunoHost instance can be reached by http before trying to instal a LE certificate.") + return html("You aren't really supposed to use this website using your browser.

It's a small server with an API to check if a services running on YunoHost instance can be reached from 'the global internet'.") if __name__ == "__main__": From 5bc02ab1c4e1e6f9b125cd76a2fcdabbf788d7f5 Mon Sep 17 00:00:00 2001 From: Alexandre Aubin Date: Wed, 31 Jul 2019 14:41:21 +0000 Subject: [PATCH 07/14] Switch to a max request per time-window rate limit, 'cause having to wait 4 seconds between requests is too complicated for the client side --- server.py | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/server.py b/server.py index bc89a05..5e2d2ee 100644 --- a/server.py +++ b/server.py @@ -17,15 +17,19 @@ app = Sanic() RATE_LIMIT_DB = {} # to prevent DDoS or bounce attack attempt or something like that -RATE_LIMIT_SECONDS = 5 - +# Can't do more than 10 requests in a 300-seconds window +RATE_LIMIT_SECONDS = 300 +RATE_LIMIT_NB_REQUESTS = 10 def clear_rate_limit_db(now): to_delete = [] "Remove too old rate limit values" - for key, value in RATE_LIMIT_DB.items(): - if now - value > RATE_LIMIT_SECONDS: + for key, times in RATE_LIMIT_DB.items(): + # Remove values older RATE_LIMIT_SECONDS + RATE_LIMIT_DB[key] = [t for t in times if now - t < RATE_LIMIT_SECONDS] + # If list is empty, remove the key + if RATE_LIMIT_DB[key] == []: # a dictionnary can't be modified during iteration so delegate this # operation to_delete.append(key) @@ -36,17 +40,21 @@ def clear_rate_limit_db(now): def check_rate_limit(key, now): - if key in RATE_LIMIT_DB: - since_last_attempt = now - RATE_LIMIT_DB[key] - if since_last_attempt < RATE_LIMIT_SECONDS: - logger.info(f"Rate limit reached for {key}, can retry in {int(RATE_LIMIT_SECONDS - since_last_attempt)} seconds") - return json_response({ - "status": "error", - "code": "error_rate_limit", - "content": f"Rate limit reached for this domain or ip, retry in {int(RATE_LIMIT_SECONDS - since_last_attempt)} seconds", - }, status=400) + # If there are more recent attempts than allowed + if key in RATE_LIMIT_DB and len(RATE_LIMIT_DB[key]) > RATE_LIMIT_NB_REQUESTS: + oldest_attempt = RATE_LIMIT_DB[key][0] + logger.info(f"Rate limit reached for {key}, can retry in {int(RATE_LIMIT_SECONDS - now + oldest_attempt)} seconds") + return json_response({ + "status": "error", + "code": "error_rate_limit", + "content": f"Rate limit reached for this domain or ip, retry in {int(RATE_LIMIT_SECONDS - now + oldest_attempt)} seconds", + }, status=400) - RATE_LIMIT_DB[key] = time.time() + # In any case, add this attempt to the DB + if key not in RATE_LIMIT_DB: + RATE_LIMIT_DB[key] = [now] + else: + RATE_LIMIT_DB[key].append(now) async def check_port_is_open(ip, port): From 15345db26146f89d4fd7f3c717e5a25a6db4a490 Mon Sep 17 00:00:00 2001 From: Alexandre Aubin Date: Wed, 20 Nov 2019 16:10:36 +0000 Subject: [PATCH 08/14] Moar things for deployment and for stuff to work and report meaningful errors --- server.py => yunodiagnoser.py | 36 ++++++++++++++++++++++++----------- yunodiagnoser.service | 11 +++++++++++ 2 files changed, 36 insertions(+), 11 deletions(-) rename server.py => yunodiagnoser.py (86%) create mode 100644 yunodiagnoser.service diff --git a/server.py b/yunodiagnoser.py similarity index 86% rename from server.py rename to yunodiagnoser.py index 5e2d2ee..9272ac9 100644 --- a/server.py +++ b/yunodiagnoser.py @@ -62,6 +62,7 @@ async def check_port_is_open(ip, port): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.settimeout(2) result = sock.connect_ex((ip, port)) + sock.close() return result == 0 @@ -111,7 +112,7 @@ async def check_http(request): # Validate request and extract the parameters # # ############################################# # - ip = request.ip + ip = request.headers["x-forwarded-for"].split(",")[0] check_rate_limit_ip = check_rate_limit(ip, now) if check_rate_limit_ip: @@ -175,30 +176,43 @@ async def check_http(request): url = "http://" + ip + "/.well-known/ynh-diagnosis/" + nonce async with session.get(url, headers={"Host": domain}, - timeout=aiohttp.ClientTimeout(total=30)) as response: + allow_redirects=False, + timeout=aiohttp.ClientTimeout(total=10)) as response: # XXX in the futur try to do a double check with the server to # see if the correct content is get await response.text() - assert response.status == 200 - logger.info(f"Success when checking http access for {domain} asked by {ip}") # TODO various kind of errors - except aiohttp.client_exceptions.ClientConnectorError: + except (aiohttp.client_exceptions.ServerTimeoutError, asyncio.TimeoutError): + return json_response({ + "status": "error", + "code": "error_http_check_timeout", + "content": "Timed-out while trying to contact your server from outside. It appears to be unreachable. You should check that you're correctly forwarding port 80, that nginx is running, and that a firewall is not interfering.", + }, status=418) + except aiohttp.client_exceptions.ClientConnectorError as e: return json_response({ "status": "error", "code": "error_http_check_connection_error", - "content": "Connection error: could not connect to the requested domain, it's very likely unreachable", + "content": "Connection error: could not connect to the requested domain, it's very likely unreachable. Raw error: " + str(e), }, status=418) - except Exception: + except Exception as e: import traceback traceback.print_exc() return json_response({ "status": "error", "code": "error_http_check_unknown_error", - "content": "An error happened while trying to reach your domain, it's very likely unreachable", + "content": "An error happened while trying to reach your domain, it's very likely unreachable. Raw error: %s" % e, }, status=400) - return json_response({"status": "ok"}) + if response.status != 200: + return json_response({ + "status": "error", + "code": "error_http_check_bad_status_code", + "content": "Could not reach your server as expected, it returned code %s. It might be that another machine answered instead of your server. You should check that you're correctly forwarding port 80, that your nginx configuration is up to date, and that a reverse-proxy is not interfering." % response.status, + }, status=418) + else: + logger.info(f"Success when checking http access for {domain} asked by {ip}") + return json_response({"status": "ok"}) @app.route("/check-ports/", methods=["POST"]) @@ -225,7 +239,7 @@ async def check_ports(request): # Validate request and extract the parameters # # ############################################# # - ip = request.ip + ip = request.headers["x-forwarded-for"].split(",")[0] check_rate_limit_ip = check_rate_limit(ip, now) if check_rate_limit_ip: @@ -268,7 +282,7 @@ async def check_ports(request): result = {} for port in ports: - result[port] = await check_port_is_open(ip, port) + result[int(port)] = await check_port_is_open(ip, port) return json_response({"status": "ok", "ports": result}) diff --git a/yunodiagnoser.service b/yunodiagnoser.service new file mode 100644 index 0000000..7500849 --- /dev/null +++ b/yunodiagnoser.service @@ -0,0 +1,11 @@ +[Unit] +Description=A server providing features for remote-diagnosis for Yunohost servers + +[Service] +Type=simple +WorkingDirectory={{ WORKING_DIR }} +ExecStart={{ WORKING_DIR }}/venv/bin/python3.6 yunodiagnoser.py &> server.log +ExecStop=/bin/kill `/bin/ps aux | /bin/grep yunodiagnoser.py | /bin/grep -v grep | /usr/bin/awk '{ print $2 }'` + +[Install] +WantedBy=multi-user.target From 763e2359ae3e4e9e1177bc479c9171ddfc79a4b3 Mon Sep 17 00:00:00 2001 From: Kay0u Date: Sat, 11 Apr 2020 16:08:14 +0200 Subject: [PATCH 09/14] ipv6 support --- yunodiagnoser.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yunodiagnoser.py b/yunodiagnoser.py index 9272ac9..381ce2f 100644 --- a/yunodiagnoser.py +++ b/yunodiagnoser.py @@ -59,7 +59,10 @@ def check_rate_limit(key, now): async def check_port_is_open(ip, port): - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + if ":" in ip: + sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) + else: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.settimeout(2) result = sock.connect_ex((ip, port)) sock.close() From a0521ab4e4ab7f5a49fb53df3b57416e72150707 Mon Sep 17 00:00:00 2001 From: Alexandre Aubin Date: Tue, 14 Apr 2020 13:44:26 +0000 Subject: [PATCH 10/14] Fix http diagnosis in IPv6 --- yunodiagnoser.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/yunodiagnoser.py b/yunodiagnoser.py index 381ce2f..90bf38d 100644 --- a/yunodiagnoser.py +++ b/yunodiagnoser.py @@ -174,6 +174,9 @@ async def check_http(request): # Run the actual check # # ############################################# # + if ":" in ip: + ip = "[%s]" % ip + async with aiohttp.ClientSession() as session: try: url = "http://" + ip + "/.well-known/ynh-diagnosis/" + nonce From f8efb079806c63c8619b70b279a41c540191f8f0 Mon Sep 17 00:00:00 2001 From: Alexandre Aubin Date: Thu, 16 Apr 2020 00:43:38 +0200 Subject: [PATCH 11/14] Simplify args validation and API response format, implement multiple domain checks for http --- yunodiagnoser.py | 239 ++++++++++++++++++++++++----------------------- 1 file changed, 124 insertions(+), 115 deletions(-) diff --git a/yunodiagnoser.py b/yunodiagnoser.py index 90bf38d..10fb509 100644 --- a/yunodiagnoser.py +++ b/yunodiagnoser.py @@ -1,7 +1,6 @@ import re import time import asyncio -import aiodns import aiohttp import validators import socket @@ -13,6 +12,10 @@ from sanic.exceptions import InvalidUsage app = Sanic() +# ########################################################################### # +# Rate limit # +# ########################################################################### # + # keep that in memory RATE_LIMIT_DB = {} @@ -45,9 +48,10 @@ def check_rate_limit(key, now): oldest_attempt = RATE_LIMIT_DB[key][0] logger.info(f"Rate limit reached for {key}, can retry in {int(RATE_LIMIT_SECONDS - now + oldest_attempt)} seconds") return json_response({ - "status": "error", - "code": "error_rate_limit", - "content": f"Rate limit reached for this domain or ip, retry in {int(RATE_LIMIT_SECONDS - now + oldest_attempt)} seconds", + "error": { + "code": "error_rate_limit", + "content": f"Rate limit reached for this domain or ip, retry in {int(RATE_LIMIT_SECONDS - now + oldest_attempt)} seconds" + } }, status=400) # In any case, add this attempt to the DB @@ -57,39 +61,17 @@ def check_rate_limit(key, now): RATE_LIMIT_DB[key].append(now) -async def check_port_is_open(ip, port): - - if ":" in ip: - sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) - else: - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.settimeout(2) - result = sock.connect_ex((ip, port)) - sock.close() - return result == 0 +# ########################################################################### # +# HTTP check # +# ########################################################################### # -# FIXME : remove it ? not used anymore... -async def query_dns(host, dns_entry_type): - loop = asyncio.get_event_loop() - dns_resolver = aiodns.DNSResolver(loop=loop) - - try: - return await dns_resolver.query(host, dns_entry_type) - except aiodns.error.DNSError: - return [] - except Exception: - import traceback - traceback.print_exc() - logger.error("Unhandled error while resolving DNS entry") - - -@app.route("/check-http/", methods=["POST"]) +@app.route("/check-http", methods=["POST"]) async def check_http(request): """ This function received an HTTP request from a YunoHost instance while this server is hosted on our infrastructure. The request is expected to be a - POST request with a body like {"domain": "domain-to-check.tld", + POST request with a body like {"domains": ["domain1.tld", "domain2.tld"], "nonce": "1234567890abcdef" } The nonce value is a single-use ID, and we will try to reach @@ -101,10 +83,11 @@ async def check_http(request): - grab the ip from the request - check for ip based rate limit (see RATE_LIMIT_SECONDS value) - get json from body and domain from it - - check for domain based rate limit (see RATE_LIMIT_SECONDS value) - - check domain is in valid format - - try to do an http request on the ip (using the domain as target host) for the page /.well-known/ynh-diagnosis/{nonce} - - answer saying if the domain can be reached + - check for domain-based rate limit (see RATE_LIMIT_SECONDS value) + - check domains are in valid format + - for each domain: + - try to do an http request on the ip (using the domain as target host) for the page /.well-known/ynh-diagnosis/{nonce} + - answer saying if the domain can be reached """ # this is supposed to be a fast operation if run often enough @@ -126,53 +109,58 @@ async def check_http(request): except InvalidUsage: logger.info(f"Invalid json in request, body is: {request.body}") return json_response({ - "status": "error", - "code": "error_bad_json", - "content": "Invalid usage, body isn't proper json", + "error": { + "code": "error_bad_json", + "content": "Invalid usage, body isn't proper json" + } }, status=400) - if not data or "domain" not in data or "nonce" not in data: - logger.info(f"Invalid request: didn't specified a domain and a nonce id (body is: {request.body}") + try: + assert data, "Empty request body" + assert isinstance(data, dict), "Request body ain't a proper dict" + assert "domains" in data, "No 'domains' provided" + assert "nounce" in data, "No 'nounce' provided" + + # Check domain list format + assert isinstance(data["domains"], list), "'domains' ain't a list" + assert len(data["domains"]) > 0, "'domains' list is empty" + assert len(data["domains"]) < 30, "You cannot test that many domains" + for domain in data["domains"]: + assert isinstance(domain, str), "domain names must be strings" + assert len(domain) < 100, "Domain %s name seems pretty long, that's suspicious...?" % domain + assert len(data["domains"]) == len(set(data["domains"])), "'domains' list should contain unique elements" + + # Check domain rate limit + for domain in data["domains"]: + check_rate_limit_domain = check_rate_limit(domain, now) + if check_rate_limit_domain: + return check_rate_limit_domain + + # Check domains are valid domain names + for domain in data["domains"]: + assert validators.domain(domain), f"{domain} is not a valid domain" + + # Check nonce format + assert isinstance(data["nonce"], str), "'nonce' ain't a string" + assert re.match(r"^[a-f0-9]{16}$", data["nonce"]), "'nonce' is not in the right forwat (it should be a 16-digit hexadecimal string)" + except AssertionError as e: + logger.info(f"Invalid request: {e} ... Original request body was: {request.body}") return json_response({ - "status": "error", - "code": "error_no_domain_", - "content": "Request must specify a domain and a nonce", - }, status=400) - - domain = data["domain"] - - # Since now we are only checking the IP itself, it seems - # unecessary to also have a rate limit on domains since the - # rate limit on IP will be hit first ... - # That would simplify some code, for example we could add the - # rate limit check in a decorator for each route/check - check_rate_limit_domain = check_rate_limit(domain, now) - if check_rate_limit_domain: - return check_rate_limit_domain - - if not validators.domain(domain): - logger.info(f"Invalid request, is not in the right format (domain is: {domain})") - return json_response({ - "status": "error", - "code": "error_domain_bad_format", - "content": "domain is not in the right format (do not include http:// or https://)", + "error": { + "code": "error_bad_json_data", + "content": f"Invalid request: {e} ... Original request body was: {request.body}" + } }, status=400) + domain = data["domains"] nonce = data["nonce"] - # nonce id is arbitrarily defined to be a - # 16-digit hexadecimal string - if not re.match(r"^[a-f0-9]{16}$", nonce): - logger.info(f"Invalid request, is not in the right format (nonce is: {nonce})") - return json_response({ - "status": "error", - "code": "error_nonce_bad_format", - "content": "nonce is not in the right format (it should be a 16-digit hexadecimal string)", - }, status=400) + return json_response({ + "http": {domain: await check_http_domain(ip, domain, nonce)} for domain in domains} + }) - # ############################################# # - # Run the actual check # - # ############################################# # + +async def check_http_domain(ip, domain, nonce): if ":" in ip: ip = "[%s]" % ip @@ -183,42 +171,44 @@ async def check_http(request): async with session.get(url, headers={"Host": domain}, allow_redirects=False, - timeout=aiohttp.ClientTimeout(total=10)) as response: + timeout=aiohttp.ClientTimeout(total=5)) as response: # XXX in the futur try to do a double check with the server to # see if the correct content is get await response.text() # TODO various kind of errors except (aiohttp.client_exceptions.ServerTimeoutError, asyncio.TimeoutError): - return json_response({ - "status": "error", - "code": "error_http_check_timeout", + return { + "status": "error_http_check_timeout", "content": "Timed-out while trying to contact your server from outside. It appears to be unreachable. You should check that you're correctly forwarding port 80, that nginx is running, and that a firewall is not interfering.", - }, status=418) + } except aiohttp.client_exceptions.ClientConnectorError as e: - return json_response({ - "status": "error", - "code": "error_http_check_connection_error", + return { + "status": "error_http_check_connection_error", "content": "Connection error: could not connect to the requested domain, it's very likely unreachable. Raw error: " + str(e), - }, status=418) + } except Exception as e: import traceback traceback.print_exc() - return json_response({ - "status": "error", - "code": "error_http_check_unknown_error", + return { + "status": "error_http_check_unknown_error", "content": "An error happened while trying to reach your domain, it's very likely unreachable. Raw error: %s" % e, - }, status=400) + } if response.status != 200: - return json_response({ - "status": "error", - "code": "error_http_check_bad_status_code", + return { + "status": "error_http_check_bad_status_code", "content": "Could not reach your server as expected, it returned code %s. It might be that another machine answered instead of your server. You should check that you're correctly forwarding port 80, that your nginx configuration is up to date, and that a reverse-proxy is not interfering." % response.status, - }, status=418) + } else: - logger.info(f"Success when checking http access for {domain} asked by {ip}") - return json_response({"status": "ok"}) + return { + "status": "ok" + } + + +# ########################################################################### # +# Ports check # +# ########################################################################### # @app.route("/check-ports/", methods=["POST"]) @@ -256,41 +246,60 @@ async def check_ports(request): except InvalidUsage: logger.info(f"Invalid json in request, body is: {request.body}") return json_response({ - "status": "error", - "code": "error_bad_json", - "content": "Invalid usage: body isn't proper json", + "error": { + "code": "error_bad_json", + "content": "Invalid usage, body isn't proper json" + } }, status=400) - def is_port_number(p): - return isinstance(p, int) and p > 0 and p < 65535 + try: + assert data, "Empty request body" + assert isinstance(data, dict), "Request body ain't a proper dict" + assert "ports" in data, "No 'ports' provided" - # Check "ports" exist in request and is a list of port - if not data or "ports" not in data: - logger.info(f"Invalid request didn't specified a ports list (body is: {request.body}") + assert isinstance(data["ports"], list), "'ports' ain't a list" + assert len(data["ports"]) > 0, "'ports' list is empty" + assert len(data["ports"]) < 30, "That's too many ports to check" + assert len(data["ports"]) == len(set(data["ports"])), "'ports' list should contain unique elements" + + def is_port_number(p): + return isinstance(p, int) and p > 0 and p < 65535 + assert all(not is_port_number(p) for p in data["ports"]), "'ports' should a list of valid port numbers" + except AssertionError as e: + logger.info(f"Invalid request: {e} ... Original request body was: {request.body}") return json_response({ - "status": "error", - "code": "error_no_ports_list", - "content": "Request must specify a list of ports to check", + "error": { + "code": "error_bad_json_data", + "content": f"Invalid request: {e} ... Original request body was: {request.body}" + } }, status=400) - elif not isinstance(data["ports"], list) or any(not is_port_number(p) for p in data["ports"]) or len(data["ports"]) > 30 or data["ports"] == []: - logger.info(f"Invalid request, ports list is not an actual list of ports, or is too long: {request.body}") - return json_response({ - "status": "error", - "code": "error_invalid_ports_list", - "content": "This is not an acceptable port list: ports must be between 0 and 65535 and at most 30 ports can be checked", - }, status=400) - - ports = set(data["ports"]) # Keep only a set so that we get unique ports # ############################################# # # Run the actual check # # ############################################# # result = {} - for port in ports: + for port in data["ports"]: result[int(port)] = await check_port_is_open(ip, port) - return json_response({"status": "ok", "ports": result}) + return json_response({"ports": result}) + + +async def check_port_is_open(ip, port): + + if ":" in ip: + sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) + else: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.settimeout(2) + result = sock.connect_ex((ip, port)) + sock.close() + return result == 0 + + +# ########################################################################### # +# SMTP check # +# ########################################################################### # @app.route("/check-smtp/", methods=["POST"]) From e8bd827f96d8ed4198ca299fb746c88e2eac81db Mon Sep 17 00:00:00 2001 From: Alexandre Aubin Date: Thu, 16 Apr 2020 00:43:53 +0200 Subject: [PATCH 12/14] Implement smtp check --- yunodiagnoser.py | 56 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/yunodiagnoser.py b/yunodiagnoser.py index 10fb509..569d474 100644 --- a/yunodiagnoser.py +++ b/yunodiagnoser.py @@ -304,12 +304,60 @@ async def check_port_is_open(ip, port): @app.route("/check-smtp/", methods=["POST"]) async def check_smtp(request): + """ + This function received an HTTP request from a YunoHost instance while this + server is hosted on our infrastructure. The request is expected to be a + POST request with an empty body - # TODO + The general workflow is the following: - return json_reponse({"status": "error", - "code": "error_not_implemented_yet", - "content": "This is not yet implemented"}) + - grab the ip from the request + - check for ip based rate limit (see RATE_LIMIT_SECONDS value) + - open a socket on port 25 + - the server is supposed to say '200 domain.tld Service ready' + - we return the domain.tld found + """ + + # this is supposed to be a fast operation if run often enough + now = time.time() + clear_rate_limit_db(now) + + # ############################################# # + # Validate request and extract the parameters # + # ############################################# # + + ip = request.headers["x-forwarded-for"].split(",")[0] + + check_rate_limit_ip = check_rate_limit(ip, now) + if check_rate_limit_ip: + return check_rate_limit_ip + + if ":" in ip: + sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) + else: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + + sock.settimeout(2) + result = sock.connect_ex((ip, 25)) + if result != 0: + return json_response({ + 'status': "error_smtp_unreachable", + 'content': "Could not open a connection on port 25, probably because of a firewall or port forwarding issue" + }) + + try: + recv = sock.recv(1024).decode('utf-8') + assert recv[:3] == "220" + helo_domain = recv.split()[1].strip() + except: + return json_response({ + 'status': "error_smtp_bad_answer", + 'content': "SMTP server did not reply with '220 domain.tld' after opening socket ... Maybe another machine answered." + }) + finally: + sock.close() + + return json_response({'status': 'ok', 'helo': helo_domain}) @app.route("/") From 91560500738613ee9408959d1dc1b575e638d8b4 Mon Sep 17 00:00:00 2001 From: Alexandre Aubin Date: Thu, 16 Apr 2020 01:28:10 +0000 Subject: [PATCH 13/14] Fixes following tests on the battlefield --- yunodiagnoser.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yunodiagnoser.py b/yunodiagnoser.py index 569d474..cbdf49d 100644 --- a/yunodiagnoser.py +++ b/yunodiagnoser.py @@ -119,7 +119,7 @@ async def check_http(request): assert data, "Empty request body" assert isinstance(data, dict), "Request body ain't a proper dict" assert "domains" in data, "No 'domains' provided" - assert "nounce" in data, "No 'nounce' provided" + assert "nonce" in data, "No 'nonce' provided" # Check domain list format assert isinstance(data["domains"], list), "'domains' ain't a list" @@ -152,11 +152,11 @@ async def check_http(request): } }, status=400) - domain = data["domains"] + domains = data["domains"] nonce = data["nonce"] return json_response({ - "http": {domain: await check_http_domain(ip, domain, nonce)} for domain in domains} + "http": {domain: await check_http_domain(ip, domain, nonce) for domain in domains} }) From 47c41cab06b48660b5ca16bbc73705055c40db37 Mon Sep 17 00:00:00 2001 From: Alexandre Aubin Date: Sun, 19 Apr 2020 00:35:54 +0000 Subject: [PATCH 14/14] Fix port format validation --- yunodiagnoser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yunodiagnoser.py b/yunodiagnoser.py index cbdf49d..d50d1eb 100644 --- a/yunodiagnoser.py +++ b/yunodiagnoser.py @@ -264,7 +264,7 @@ async def check_ports(request): def is_port_number(p): return isinstance(p, int) and p > 0 and p < 65535 - assert all(not is_port_number(p) for p in data["ports"]), "'ports' should a list of valid port numbers" + assert all(is_port_number(p) for p in data["ports"]), "'ports' should a list of valid port numbers" except AssertionError as e: logger.info(f"Invalid request: {e} ... Original request body was: {request.body}") return json_response({