Simplify args validation and API response format, implement multiple domain checks for http

This commit is contained in:
Alexandre Aubin 2020-04-16 00:43:38 +02:00
parent a0521ab4e4
commit f8efb07980

View file

@ -1,7 +1,6 @@
import re
import time
import asyncio
import aiodns
import aiohttp
import validators
import socket
@ -13,6 +12,10 @@ from sanic.exceptions import InvalidUsage
app = Sanic()
# ########################################################################### #
# Rate limit #
# ########################################################################### #
# keep that in memory
RATE_LIMIT_DB = {}
@ -45,9 +48,10 @@ def check_rate_limit(key, now):
oldest_attempt = RATE_LIMIT_DB[key][0]
logger.info(f"Rate limit reached for {key}, can retry in {int(RATE_LIMIT_SECONDS - now + oldest_attempt)} seconds")
return json_response({
"status": "error",
"error": {
"code": "error_rate_limit",
"content": f"Rate limit reached for this domain or ip, retry in {int(RATE_LIMIT_SECONDS - now + oldest_attempt)} seconds",
"content": f"Rate limit reached for this domain or ip, retry in {int(RATE_LIMIT_SECONDS - now + oldest_attempt)} seconds"
}
}, status=400)
# In any case, add this attempt to the DB
@ -57,39 +61,17 @@ def check_rate_limit(key, now):
RATE_LIMIT_DB[key].append(now)
async def check_port_is_open(ip, port):
if ":" in ip:
sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
else:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(2)
result = sock.connect_ex((ip, port))
sock.close()
return result == 0
# ########################################################################### #
# HTTP check #
# ########################################################################### #
# FIXME : remove it ? not used anymore...
async def query_dns(host, dns_entry_type):
loop = asyncio.get_event_loop()
dns_resolver = aiodns.DNSResolver(loop=loop)
try:
return await dns_resolver.query(host, dns_entry_type)
except aiodns.error.DNSError:
return []
except Exception:
import traceback
traceback.print_exc()
logger.error("Unhandled error while resolving DNS entry")
@app.route("/check-http/", methods=["POST"])
@app.route("/check-http", methods=["POST"])
async def check_http(request):
"""
This function received an HTTP request from a YunoHost instance while this
server is hosted on our infrastructure. The request is expected to be a
POST request with a body like {"domain": "domain-to-check.tld",
POST request with a body like {"domains": ["domain1.tld", "domain2.tld"],
"nonce": "1234567890abcdef" }
The nonce value is a single-use ID, and we will try to reach
@ -101,8 +83,9 @@ async def check_http(request):
- grab the ip from the request
- check for ip based rate limit (see RATE_LIMIT_SECONDS value)
- get json from body and domain from it
- check for domain based rate limit (see RATE_LIMIT_SECONDS value)
- check domain is in valid format
- check for domain-based rate limit (see RATE_LIMIT_SECONDS value)
- check domains are in valid format
- for each domain:
- try to do an http request on the ip (using the domain as target host) for the page /.well-known/ynh-diagnosis/{nonce}
- answer saying if the domain can be reached
"""
@ -126,53 +109,58 @@ async def check_http(request):
except InvalidUsage:
logger.info(f"Invalid json in request, body is: {request.body}")
return json_response({
"status": "error",
"error": {
"code": "error_bad_json",
"content": "Invalid usage, body isn't proper json",
"content": "Invalid usage, body isn't proper json"
}
}, status=400)
if not data or "domain" not in data or "nonce" not in data:
logger.info(f"Invalid request: didn't specified a domain and a nonce id (body is: {request.body}")
return json_response({
"status": "error",
"code": "error_no_domain_",
"content": "Request must specify a domain and a nonce",
}, status=400)
try:
assert data, "Empty request body"
assert isinstance(data, dict), "Request body ain't a proper dict"
assert "domains" in data, "No 'domains' provided"
assert "nounce" in data, "No 'nounce' provided"
domain = data["domain"]
# Check domain list format
assert isinstance(data["domains"], list), "'domains' ain't a list"
assert len(data["domains"]) > 0, "'domains' list is empty"
assert len(data["domains"]) < 30, "You cannot test that many domains"
for domain in data["domains"]:
assert isinstance(domain, str), "domain names must be strings"
assert len(domain) < 100, "Domain %s name seems pretty long, that's suspicious...?" % domain
assert len(data["domains"]) == len(set(data["domains"])), "'domains' list should contain unique elements"
# Since now we are only checking the IP itself, it seems
# unecessary to also have a rate limit on domains since the
# rate limit on IP will be hit first ...
# That would simplify some code, for example we could add the
# rate limit check in a decorator for each route/check
# Check domain rate limit
for domain in data["domains"]:
check_rate_limit_domain = check_rate_limit(domain, now)
if check_rate_limit_domain:
return check_rate_limit_domain
if not validators.domain(domain):
logger.info(f"Invalid request, is not in the right format (domain is: {domain})")
# Check domains are valid domain names
for domain in data["domains"]:
assert validators.domain(domain), f"{domain} is not a valid domain"
# Check nonce format
assert isinstance(data["nonce"], str), "'nonce' ain't a string"
assert re.match(r"^[a-f0-9]{16}$", data["nonce"]), "'nonce' is not in the right forwat (it should be a 16-digit hexadecimal string)"
except AssertionError as e:
logger.info(f"Invalid request: {e} ... Original request body was: {request.body}")
return json_response({
"status": "error",
"code": "error_domain_bad_format",
"content": "domain is not in the right format (do not include http:// or https://)",
"error": {
"code": "error_bad_json_data",
"content": f"Invalid request: {e} ... Original request body was: {request.body}"
}
}, status=400)
domain = data["domains"]
nonce = data["nonce"]
# nonce id is arbitrarily defined to be a
# 16-digit hexadecimal string
if not re.match(r"^[a-f0-9]{16}$", nonce):
logger.info(f"Invalid request, is not in the right format (nonce is: {nonce})")
return json_response({
"status": "error",
"code": "error_nonce_bad_format",
"content": "nonce is not in the right format (it should be a 16-digit hexadecimal string)",
}, status=400)
"http": {domain: await check_http_domain(ip, domain, nonce)} for domain in domains}
})
# ############################################# #
# Run the actual check #
# ############################################# #
async def check_http_domain(ip, domain, nonce):
if ":" in ip:
ip = "[%s]" % ip
@ -183,42 +171,44 @@ async def check_http(request):
async with session.get(url,
headers={"Host": domain},
allow_redirects=False,
timeout=aiohttp.ClientTimeout(total=10)) as response:
timeout=aiohttp.ClientTimeout(total=5)) as response:
# XXX in the futur try to do a double check with the server to
# see if the correct content is get
await response.text()
# TODO various kind of errors
except (aiohttp.client_exceptions.ServerTimeoutError, asyncio.TimeoutError):
return json_response({
"status": "error",
"code": "error_http_check_timeout",
return {
"status": "error_http_check_timeout",
"content": "Timed-out while trying to contact your server from outside. It appears to be unreachable. You should check that you're correctly forwarding port 80, that nginx is running, and that a firewall is not interfering.",
}, status=418)
}
except aiohttp.client_exceptions.ClientConnectorError as e:
return json_response({
"status": "error",
"code": "error_http_check_connection_error",
return {
"status": "error_http_check_connection_error",
"content": "Connection error: could not connect to the requested domain, it's very likely unreachable. Raw error: " + str(e),
}, status=418)
}
except Exception as e:
import traceback
traceback.print_exc()
return json_response({
"status": "error",
"code": "error_http_check_unknown_error",
return {
"status": "error_http_check_unknown_error",
"content": "An error happened while trying to reach your domain, it's very likely unreachable. Raw error: %s" % e,
}, status=400)
}
if response.status != 200:
return json_response({
"status": "error",
"code": "error_http_check_bad_status_code",
return {
"status": "error_http_check_bad_status_code",
"content": "Could not reach your server as expected, it returned code %s. It might be that another machine answered instead of your server. You should check that you're correctly forwarding port 80, that your nginx configuration is up to date, and that a reverse-proxy is not interfering." % response.status,
}, status=418)
}
else:
logger.info(f"Success when checking http access for {domain} asked by {ip}")
return json_response({"status": "ok"})
return {
"status": "ok"
}
# ########################################################################### #
# Ports check #
# ########################################################################### #
@app.route("/check-ports/", methods=["POST"])
@ -256,41 +246,60 @@ async def check_ports(request):
except InvalidUsage:
logger.info(f"Invalid json in request, body is: {request.body}")
return json_response({
"status": "error",
"error": {
"code": "error_bad_json",
"content": "Invalid usage: body isn't proper json",
"content": "Invalid usage, body isn't proper json"
}
}, status=400)
try:
assert data, "Empty request body"
assert isinstance(data, dict), "Request body ain't a proper dict"
assert "ports" in data, "No 'ports' provided"
assert isinstance(data["ports"], list), "'ports' ain't a list"
assert len(data["ports"]) > 0, "'ports' list is empty"
assert len(data["ports"]) < 30, "That's too many ports to check"
assert len(data["ports"]) == len(set(data["ports"])), "'ports' list should contain unique elements"
def is_port_number(p):
return isinstance(p, int) and p > 0 and p < 65535
# Check "ports" exist in request and is a list of port
if not data or "ports" not in data:
logger.info(f"Invalid request didn't specified a ports list (body is: {request.body}")
assert all(not is_port_number(p) for p in data["ports"]), "'ports' should a list of valid port numbers"
except AssertionError as e:
logger.info(f"Invalid request: {e} ... Original request body was: {request.body}")
return json_response({
"status": "error",
"code": "error_no_ports_list",
"content": "Request must specify a list of ports to check",
"error": {
"code": "error_bad_json_data",
"content": f"Invalid request: {e} ... Original request body was: {request.body}"
}
}, status=400)
elif not isinstance(data["ports"], list) or any(not is_port_number(p) for p in data["ports"]) or len(data["ports"]) > 30 or data["ports"] == []:
logger.info(f"Invalid request, ports list is not an actual list of ports, or is too long: {request.body}")
return json_response({
"status": "error",
"code": "error_invalid_ports_list",
"content": "This is not an acceptable port list: ports must be between 0 and 65535 and at most 30 ports can be checked",
}, status=400)
ports = set(data["ports"]) # Keep only a set so that we get unique ports
# ############################################# #
# Run the actual check #
# ############################################# #
result = {}
for port in ports:
for port in data["ports"]:
result[int(port)] = await check_port_is_open(ip, port)
return json_response({"status": "ok", "ports": result})
return json_response({"ports": result})
async def check_port_is_open(ip, port):
if ":" in ip:
sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
else:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(2)
result = sock.connect_ex((ip, port))
sock.close()
return result == 0
# ########################################################################### #
# SMTP check #
# ########################################################################### #
@app.route("/check-smtp/", methods=["POST"])