Merge pull request #1 from YunoHost/check-http-with-nounce-and-check-ports

Check http with nounce + add check ports feature
2024-09-03 19:56:42 +02:00 · 2020-12-17 03:22:17 +01:00 · 2020-12-17 03:22:17 +01:00 · f5e9385955
commit f5e9385955
parent 7d868ec5dd 47c41cab06
3 changed files with 380 additions and 206 deletions
--- a/server.py
+++ b/server.py
@ -1,206 +0,0 @@
-import time
-import asyncio
-import aiodns
-import aiohttp
-import validators
-
-from sanic import Sanic
-from sanic.log import logger
-from sanic.response import html, json as json_response
-from sanic.exceptions import InvalidUsage
-
-app = Sanic()
-
-# keep that in memory
-RATE_LIMIT_DB = {}
-
-# to prevent DDoS or bounce attack attempt or something like that
-RATE_LIMIT_SECONDS = 5
-
-
-def clear_rate_limit_db(now):
-    to_delete = []
-
-    "Remove too old rate limit values"
-    for key, value in RATE_LIMIT_DB.items():
-        if now - value > RATE_LIMIT_SECONDS:
-            # a dictionnary can't be modified during iteration so delegate this
-            # operation
-            to_delete.append(key)
-
-    for key in to_delete:
-        del RATE_LIMIT_DB[key]
-
-
-async def query_dns(host, dns_entry_type):
-    loop = asyncio.get_event_loop()
-    dns_resolver = aiodns.DNSResolver(loop=loop)
-
-    try:
-        return await dns_resolver.query(host, dns_entry_type)
-    except aiodns.error.DNSError:
-        return []
-    except Exception:
-        import traceback
-        traceback.print_exc()
-        logger.error("Unhandled error while resolving DNS entry")
-
-
-@app.route("/check/", methods=["POST"])
-async def check_http(request):
-    """
-    This function received an HTTP request from a YunoHost instance while this
-    server is hosted on our infrastructure. The expected request body is:
-    {"domain": "domain-to-check.tld"} and the method POST
-
-    The general workflow is the following:
-
-    - grab the ip from the request
-    - check for ip based rate limit (see RATE_LIMIT_SECONDS value)
-    - get json from body and domain from it
-    - check for domain based rate limit (see RATE_LIMIT_SECONDS value)
-    - check domain is in valid format
-    - check dns entry for domain match the ip of the request (advanced rule for ipv6)
-    - everything is checked, now try to do an http request on the domain
-    - answer saying if the domain can be reached
-    """
-
-    # this is supposed to be a fast operation if run enough
-    now = time.time()
-    clear_rate_limit_db(now)
-
-    ip = request.ip
-
-    if ip in RATE_LIMIT_DB:
-        since_last_attempt = now - RATE_LIMIT_DB[ip]
-        if since_last_attempt < RATE_LIMIT_SECONDS:
-            logger.info(f"Rate limite {ip}, can retry in {int(RATE_LIMIT_SECONDS - since_last_attempt)} seconds")
-            return json_response({
-                "status": "error",
-                "code": "error_rate_limit",
-                "content": f"Rate limit on ip, retry in {int(RATE_LIMIT_SECONDS - since_last_attempt)} seconds",
-            }, status=400)
-
-    RATE_LIMIT_DB[ip] = time.time()
-
-    try:
-        data = request.json
-    except InvalidUsage:
-        logger.info(f"Unvalid json in request, body is : {request.body}")
-        return json_response({
-            "status": "error",
-            "code": "error_bad_json",
-            "content": "InvalidUsage, body isn't proper json",
-        }, status=400)
-
-    if not data or "domain" not in data:
-        logger.info(f"Unvalid request didn't specified a domain (body is : {request.body}")
-        return json_response({
-            "status": "error",
-            "code": "error_no_domain",
-            "content": "request must specify a domain",
-        }, status=400)
-
-    domain = data["domain"]
-
-    if domain in RATE_LIMIT_DB:
-        since_last_attempt = now - RATE_LIMIT_DB[domain]
-        if since_last_attempt < RATE_LIMIT_SECONDS:
-            logger.info(f"Rate limite {domain}, can retry in {int(RATE_LIMIT_SECONDS - since_last_attempt)} seconds")
-            return json_response({
-                "status": "error",
-                "code": "error_rate_limit",
-                "content": f"Rate limit on domain, retry in {int(RATE_LIMIT_SECONDS - since_last_attempt)} seconds",
-            }, status=400)
-
-    RATE_LIMIT_DB[domain] = time.time()
-
-    if not validators.domain(domain):
-        logger.info(f"Invalid request, is not in the right format (domain is : {domain})")
-        return json_response({
-            "status": "error",
-            "code": "error_domain_bad_format",
-            "content": "domain is not in the right format (do not include http:// or https://)",
-        }, status=400)
-
-    # TODO handle ipv6
-    # ipv6 situation
-    if ":" in ip:
-        dns_entry = await query_dns(domain, "AAAA")
-
-        if not dns_entry:
-            # check if entry in ip4 for custom error
-            dns_entry = await query_dns(domain, "A")
-
-            # there is an ipv4 entry but the request is made in ipv6, ask to uses ipv4 instead
-            if dns_entry:
-                logger.info(f"[ipv6] Invalid request, no AAAA DNS entry for domain {domain} BUT ipv4 entry, ask user to request in ipv4")
-                return json_response({
-                    "status": "error",
-                    "code": "error_no_ipv6_dns_entry_but_ipv4_dns_entry",
-                    "content": f"there is not AAAA (ipv6) DNS entry for domain {domain} BUT there is an entry in ipv4, please redo the request in ipv4",
-                }, status=400)
-
-            else:
-                logger.info(f"[ipv6] Invalid request, no DNS entry for domain {domain} (both in ipv6 and ip4)")
-                return json_response({
-                    "status": "error",
-                    "code": "error_no_ipv4_ipv6_dns_entry_for_domain",
-                    "content": f"there is not A (ipv4) and AAAA (ipv6) DNS entry for domain {domain}",
-                }, status=400)
-    # ipv4 situation
-    else:
-        dns_entry = await query_dns(domain, "A")
-
-        if not dns_entry:
-            logger.info(f"[ipv4] Invalid request, no DNS entry for domain {domain}")
-            return json_response({
-                "status": "error",
-                "code": "error_no_ipv4_dns_entry_for_domain",
-                "content": f"there is not A (ipv4) and AAAA (ipv6) DNS entry for domain {domain}",
-            }, status=400)
-
-    dns_entry = dns_entry[0]
-
-    if dns_entry.host != ip:
-        logger.info(f"Invalid request, A DNS entry {dns_entry.host} for domain {domain} doesn't match request ip {ip}")
-        return json_response({
-            "status": "error",
-            "code": "error_dns_entry_doesnt_match_request_ip",
-            "content": f"error, the request is made from the ip {ip} but the dns entry said {domain} has the ip {dns_entry.host}, you can only check a domain configured for your ip",
-        }, status=400)
-
-    async with aiohttp.ClientSession() as session:
-        try:
-            async with session.get("http://" + domain, timeout=aiohttp.ClientTimeout(total=30)) as response:
-                # XXX in the futur try to do a double check with the server to
-                # see if the correct content is get
-                await response.text()
-                logger.info(f"Success when checking http access for {domain} asked by {ip}")
-        # TODO various kind of errors
-        except aiohttp.client_exceptions.ClientConnectorError:
-            return json_response({
-                "status": "error",
-                "code": "error_http_check_connection_error",
-                "content": "connection error, could not connect to the requested domain, it's very likely unreachable",
-            }, status=418)
-        except Exception:
-            import traceback
-            traceback.print_exc()
-
-            return json_response({
-                "status": "error",
-                "code": "error_http_check_unknown_error",
-                "content": "an error happen while trying to get your domain, it's very likely unreachable",
-            }, status=400)
-
-    return json_response({"status": "ok"})
-
-
-@app.route("/")
-async def main(request):
-    return html("You aren't really supposed to use this website using your browser.<br><br>It's a small server to check if a YunoHost instance can be reached by http before trying to instal a LE certificate.")
-
-
-if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=7000)
--- a/yunodiagnoser.py
+++ b/yunodiagnoser.py
@ -0,0 +1,369 @@
+import re
+import time
+import asyncio
+import aiohttp
+import validators
+import socket
+
+from sanic import Sanic
+from sanic.log import logger
+from sanic.response import html, json as json_response
+from sanic.exceptions import InvalidUsage
+
+app = Sanic()
+
+# ########################################################################### #
+#   Rate limit                                                                #
+# ########################################################################### #
+
+# keep that in memory
+RATE_LIMIT_DB = {}
+
+# to prevent DDoS or bounce attack attempt or something like that
+# Can't do more than 10 requests in a 300-seconds window
+RATE_LIMIT_SECONDS = 300
+RATE_LIMIT_NB_REQUESTS = 10
+
+def clear_rate_limit_db(now):
+    to_delete = []
+
+    "Remove too old rate limit values"
+    for key, times in RATE_LIMIT_DB.items():
+        # Remove values older RATE_LIMIT_SECONDS
+        RATE_LIMIT_DB[key] = [t for t in times if now - t < RATE_LIMIT_SECONDS]
+        # If list is empty, remove the key
+        if RATE_LIMIT_DB[key] == []:
+            # a dictionnary can't be modified during iteration so delegate this
+            # operation
+            to_delete.append(key)
+
+    for key in to_delete:
+        del RATE_LIMIT_DB[key]
+
+
+def check_rate_limit(key, now):
+
+    # If there are more recent attempts than allowed
+    if key in RATE_LIMIT_DB and len(RATE_LIMIT_DB[key]) > RATE_LIMIT_NB_REQUESTS:
+        oldest_attempt = RATE_LIMIT_DB[key][0]
+        logger.info(f"Rate limit reached for {key}, can retry in {int(RATE_LIMIT_SECONDS - now + oldest_attempt)} seconds")
+        return json_response({
+            "error": {
+                "code": "error_rate_limit",
+                "content": f"Rate limit reached for this domain or ip, retry in {int(RATE_LIMIT_SECONDS - now + oldest_attempt)} seconds"
+            }
+        }, status=400)
+
+    # In any case, add this attempt to the DB
+    if key not in RATE_LIMIT_DB:
+        RATE_LIMIT_DB[key] = [now]
+    else:
+        RATE_LIMIT_DB[key].append(now)
+
+
+# ########################################################################### #
+#   HTTP check                                                                #
+# ########################################################################### #
+
+
+@app.route("/check-http", methods=["POST"])
+async def check_http(request):
+    """
+    This function received an HTTP request from a YunoHost instance while this
+    server is hosted on our infrastructure. The request is expected to be a
+    POST request with a body like {"domains": ["domain1.tld", "domain2.tld"],
+                                   "nonce": "1234567890abcdef" }
+
+    The nonce value is a single-use ID, and we will try to reach
+    http://domain.tld/.well-known/ynh-{nonce} which should return 200 if we
+    are indeed reaching the right server.
+
+    The general workflow is the following:
+
+    - grab the ip from the request
+    - check for ip based rate limit (see RATE_LIMIT_SECONDS value)
+    - get json from body and domain from it
+    - check for domain-based rate limit (see RATE_LIMIT_SECONDS value)
+    - check domains are in valid format
+    - for each domain:
+        - try to do an http request on the ip (using the domain as target host) for the page /.well-known/ynh-diagnosis/{nonce}
+        - answer saying if the domain can be reached
+    """
+
+    # this is supposed to be a fast operation if run often enough
+    now = time.time()
+    clear_rate_limit_db(now)
+
+    # ############################################# #
+    #  Validate request and extract the parameters  #
+    # ############################################# #
+
+    ip = request.headers["x-forwarded-for"].split(",")[0]
+
+    check_rate_limit_ip = check_rate_limit(ip, now)
+    if check_rate_limit_ip:
+        return check_rate_limit_ip
+
+    try:
+        data = request.json
+    except InvalidUsage:
+        logger.info(f"Invalid json in request, body is: {request.body}")
+        return json_response({
+            "error": {
+                "code": "error_bad_json",
+                "content": "Invalid usage, body isn't proper json"
+            }
+        }, status=400)
+
+    try:
+        assert data, "Empty request body"
+        assert isinstance(data, dict), "Request body ain't a proper dict"
+        assert "domains" in data, "No 'domains' provided"
+        assert "nonce" in data, "No 'nonce' provided"
+
+        # Check domain list format
+        assert isinstance(data["domains"], list), "'domains' ain't a list"
+        assert len(data["domains"]) > 0, "'domains' list is empty"
+        assert len(data["domains"]) < 30, "You cannot test that many domains"
+        for domain in data["domains"]:
+            assert isinstance(domain, str), "domain names must be strings"
+            assert len(domain) < 100, "Domain %s name seems pretty long, that's suspicious...?" % domain
+        assert len(data["domains"]) == len(set(data["domains"])), "'domains' list should contain unique elements"
+
+        # Check domain rate limit
+        for domain in data["domains"]:
+            check_rate_limit_domain = check_rate_limit(domain, now)
+            if check_rate_limit_domain:
+                return check_rate_limit_domain
+
+        # Check domains are valid domain names
+        for domain in data["domains"]:
+            assert validators.domain(domain), f"{domain} is not a valid domain"
+
+        # Check nonce format
+        assert isinstance(data["nonce"], str), "'nonce' ain't a string"
+        assert re.match(r"^[a-f0-9]{16}$", data["nonce"]), "'nonce' is not in the right forwat (it should be a 16-digit hexadecimal string)"
+    except AssertionError as e:
+        logger.info(f"Invalid request: {e} ... Original request body was: {request.body}")
+        return json_response({
+            "error": {
+                "code": "error_bad_json_data",
+                "content": f"Invalid request: {e} ... Original request body was: {request.body}"
+            }
+        }, status=400)
+
+    domains = data["domains"]
+    nonce = data["nonce"]
+
+    return json_response({
+        "http": {domain: await check_http_domain(ip, domain, nonce) for domain in domains}
+    })
+
+
+async def check_http_domain(ip, domain, nonce):
+
+    if ":" in ip:
+        ip = "[%s]" % ip
+
+    async with aiohttp.ClientSession() as session:
+        try:
+            url = "http://" + ip + "/.well-known/ynh-diagnosis/" + nonce
+            async with session.get(url,
+                                   headers={"Host": domain},
+                                   allow_redirects=False,
+                                   timeout=aiohttp.ClientTimeout(total=5)) as response:
+                # XXX in the futur try to do a double check with the server to
+                # see if the correct content is get
+                await response.text()
+        # TODO various kind of errors
+        except (aiohttp.client_exceptions.ServerTimeoutError, asyncio.TimeoutError):
+            return {
+                "status": "error_http_check_timeout",
+                "content": "Timed-out while trying to contact your server from outside. It appears to be unreachable. You should check that you're correctly forwarding port 80, that nginx is running, and that a firewall is not interfering.",
+            }
+        except aiohttp.client_exceptions.ClientConnectorError as e:
+            return {
+                "status": "error_http_check_connection_error",
+                "content": "Connection error: could not connect to the requested domain, it's very likely unreachable. Raw error: " + str(e),
+            }
+        except Exception as e:
+            import traceback
+            traceback.print_exc()
+
+            return {
+                "status": "error_http_check_unknown_error",
+                "content": "An error happened while trying to reach your domain, it's very likely unreachable. Raw error: %s" % e,
+            }
+
+    if response.status != 200:
+        return {
+            "status": "error_http_check_bad_status_code",
+            "content": "Could not reach your server as expected, it returned code %s. It might be that another machine answered instead of your server. You should check that you're correctly forwarding port 80, that your nginx configuration is up to date, and that a reverse-proxy is not interfering." % response.status,
+        }
+    else:
+        return {
+            "status": "ok"
+        }
+
+
+# ########################################################################### #
+#   Ports check                                                               #
+# ########################################################################### #
+
+
+@app.route("/check-ports/", methods=["POST"])
+async def check_ports(request):
+    """
+    This function received an HTTP request from a YunoHost instance while this
+    server is hosted on our infrastructure. The request is expected to be a
+    POST request with a body like {"ports": [80,443,22,25]}
+
+    The general workflow is the following:
+
+    - grab the ip from the request
+    - check for ip based rate limit (see RATE_LIMIT_SECONDS value)
+    - get json from body and ports list from it
+    - check ports are opened or closed
+    - answer the list of opened / closed ports
+    """
+
+    # this is supposed to be a fast operation if run often enough
+    now = time.time()
+    clear_rate_limit_db(now)
+
+    # ############################################# #
+    #  Validate request and extract the parameters  #
+    # ############################################# #
+
+    ip = request.headers["x-forwarded-for"].split(",")[0]
+
+    check_rate_limit_ip = check_rate_limit(ip, now)
+    if check_rate_limit_ip:
+        return check_rate_limit_ip
+
+    try:
+        data = request.json
+    except InvalidUsage:
+        logger.info(f"Invalid json in request, body is: {request.body}")
+        return json_response({
+            "error": {
+                "code": "error_bad_json",
+                "content": "Invalid usage, body isn't proper json"
+            }
+        }, status=400)
+
+    try:
+        assert data, "Empty request body"
+        assert isinstance(data, dict), "Request body ain't a proper dict"
+        assert "ports" in data, "No 'ports' provided"
+
+        assert isinstance(data["ports"], list), "'ports' ain't a list"
+        assert len(data["ports"]) > 0, "'ports' list is empty"
+        assert len(data["ports"]) < 30, "That's too many ports to check"
+        assert len(data["ports"]) == len(set(data["ports"])), "'ports' list should contain unique elements"
+
+        def is_port_number(p):
+            return isinstance(p, int) and p > 0 and p < 65535
+        assert all(is_port_number(p) for p in data["ports"]), "'ports' should a list of valid port numbers"
+    except AssertionError as e:
+        logger.info(f"Invalid request: {e} ... Original request body was: {request.body}")
+        return json_response({
+            "error": {
+                "code": "error_bad_json_data",
+                "content": f"Invalid request: {e} ... Original request body was: {request.body}"
+            }
+        }, status=400)
+
+    # ############################################# #
+    #  Run the actual check                         #
+    # ############################################# #
+
+    result = {}
+    for port in data["ports"]:
+        result[int(port)] = await check_port_is_open(ip, port)
+
+    return json_response({"ports": result})
+
+
+async def check_port_is_open(ip, port):
+
+    if ":" in ip:
+        sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+    else:
+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    sock.settimeout(2)
+    result = sock.connect_ex((ip, port))
+    sock.close()
+    return result == 0
+
+
+# ########################################################################### #
+#   SMTP check                                                                #
+# ########################################################################### #
+
+
+@app.route("/check-smtp/", methods=["POST"])
+async def check_smtp(request):
+    """
+    This function received an HTTP request from a YunoHost instance while this
+    server is hosted on our infrastructure. The request is expected to be a
+    POST request with an empty body
+
+    The general workflow is the following:
+
+    - grab the ip from the request
+    - check for ip based rate limit (see RATE_LIMIT_SECONDS value)
+    - open a socket on port 25
+    - the server is supposed to say '200 domain.tld Service ready'
+    - we return the domain.tld found
+    """
+
+    # this is supposed to be a fast operation if run often enough
+    now = time.time()
+    clear_rate_limit_db(now)
+
+    # ############################################# #
+    #  Validate request and extract the parameters  #
+    # ############################################# #
+
+    ip = request.headers["x-forwarded-for"].split(",")[0]
+
+    check_rate_limit_ip = check_rate_limit(ip, now)
+    if check_rate_limit_ip:
+        return check_rate_limit_ip
+
+    if ":" in ip:
+        sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+    else:
+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+
+    sock.settimeout(2)
+    result = sock.connect_ex((ip, 25))
+    if result != 0:
+        return json_response({
+            'status': "error_smtp_unreachable",
+            'content': "Could not open a connection on port 25, probably because of a firewall or port forwarding issue"
+        })
+
+    try:
+        recv = sock.recv(1024).decode('utf-8')
+        assert recv[:3] == "220"
+        helo_domain = recv.split()[1].strip()
+    except:
+        return json_response({
+            'status': "error_smtp_bad_answer",
+            'content': "SMTP server did not reply with '220 domain.tld' after opening socket ... Maybe another machine answered."
+        })
+    finally:
+        sock.close()
+
+    return json_response({'status': 'ok', 'helo': helo_domain})
+
+
+@app.route("/")
+async def main(request):
+    return html("You aren't really supposed to use this website using your browser.<br><br>It's a small server with an API to check if a services running on YunoHost instance can be reached from 'the global internet'.")
+
+
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7000)
--- a/yunodiagnoser.service
+++ b/yunodiagnoser.service
@ -0,0 +1,11 @@
+[Unit]
+Description=A server providing features for remote-diagnosis for Yunohost servers
+
+[Service]
+Type=simple
+WorkingDirectory={{ WORKING_DIR }}
+ExecStart={{ WORKING_DIR }}/venv/bin/python3.6 yunodiagnoser.py &> server.log
+ExecStop=/bin/kill `/bin/ps aux | /bin/grep yunodiagnoser.py | /bin/grep -v grep | /usr/bin/awk '{ print $2 }'`
+
+[Install]
+WantedBy=multi-user.target