check-http/yunodiagnoser.py
2020-04-16 01:28:10 +00:00

369 lines
14 KiB
Python

import re
import time
import asyncio
import aiohttp
import validators
import socket
from sanic import Sanic
from sanic.log import logger
from sanic.response import html, json as json_response
from sanic.exceptions import InvalidUsage
app = Sanic()
# ########################################################################### #
# Rate limit #
# ########################################################################### #
# keep that in memory
RATE_LIMIT_DB = {}
# to prevent DDoS or bounce attack attempt or something like that
# Can't do more than 10 requests in a 300-seconds window
RATE_LIMIT_SECONDS = 300
RATE_LIMIT_NB_REQUESTS = 10
def clear_rate_limit_db(now):
to_delete = []
"Remove too old rate limit values"
for key, times in RATE_LIMIT_DB.items():
# Remove values older RATE_LIMIT_SECONDS
RATE_LIMIT_DB[key] = [t for t in times if now - t < RATE_LIMIT_SECONDS]
# If list is empty, remove the key
if RATE_LIMIT_DB[key] == []:
# a dictionnary can't be modified during iteration so delegate this
# operation
to_delete.append(key)
for key in to_delete:
del RATE_LIMIT_DB[key]
def check_rate_limit(key, now):
# If there are more recent attempts than allowed
if key in RATE_LIMIT_DB and len(RATE_LIMIT_DB[key]) > RATE_LIMIT_NB_REQUESTS:
oldest_attempt = RATE_LIMIT_DB[key][0]
logger.info(f"Rate limit reached for {key}, can retry in {int(RATE_LIMIT_SECONDS - now + oldest_attempt)} seconds")
return json_response({
"error": {
"code": "error_rate_limit",
"content": f"Rate limit reached for this domain or ip, retry in {int(RATE_LIMIT_SECONDS - now + oldest_attempt)} seconds"
}
}, status=400)
# In any case, add this attempt to the DB
if key not in RATE_LIMIT_DB:
RATE_LIMIT_DB[key] = [now]
else:
RATE_LIMIT_DB[key].append(now)
# ########################################################################### #
# HTTP check #
# ########################################################################### #
@app.route("/check-http", methods=["POST"])
async def check_http(request):
"""
This function received an HTTP request from a YunoHost instance while this
server is hosted on our infrastructure. The request is expected to be a
POST request with a body like {"domains": ["domain1.tld", "domain2.tld"],
"nonce": "1234567890abcdef" }
The nonce value is a single-use ID, and we will try to reach
http://domain.tld/.well-known/ynh-{nonce} which should return 200 if we
are indeed reaching the right server.
The general workflow is the following:
- grab the ip from the request
- check for ip based rate limit (see RATE_LIMIT_SECONDS value)
- get json from body and domain from it
- check for domain-based rate limit (see RATE_LIMIT_SECONDS value)
- check domains are in valid format
- for each domain:
- try to do an http request on the ip (using the domain as target host) for the page /.well-known/ynh-diagnosis/{nonce}
- answer saying if the domain can be reached
"""
# this is supposed to be a fast operation if run often enough
now = time.time()
clear_rate_limit_db(now)
# ############################################# #
# Validate request and extract the parameters #
# ############################################# #
ip = request.headers["x-forwarded-for"].split(",")[0]
check_rate_limit_ip = check_rate_limit(ip, now)
if check_rate_limit_ip:
return check_rate_limit_ip
try:
data = request.json
except InvalidUsage:
logger.info(f"Invalid json in request, body is: {request.body}")
return json_response({
"error": {
"code": "error_bad_json",
"content": "Invalid usage, body isn't proper json"
}
}, status=400)
try:
assert data, "Empty request body"
assert isinstance(data, dict), "Request body ain't a proper dict"
assert "domains" in data, "No 'domains' provided"
assert "nonce" in data, "No 'nonce' provided"
# Check domain list format
assert isinstance(data["domains"], list), "'domains' ain't a list"
assert len(data["domains"]) > 0, "'domains' list is empty"
assert len(data["domains"]) < 30, "You cannot test that many domains"
for domain in data["domains"]:
assert isinstance(domain, str), "domain names must be strings"
assert len(domain) < 100, "Domain %s name seems pretty long, that's suspicious...?" % domain
assert len(data["domains"]) == len(set(data["domains"])), "'domains' list should contain unique elements"
# Check domain rate limit
for domain in data["domains"]:
check_rate_limit_domain = check_rate_limit(domain, now)
if check_rate_limit_domain:
return check_rate_limit_domain
# Check domains are valid domain names
for domain in data["domains"]:
assert validators.domain(domain), f"{domain} is not a valid domain"
# Check nonce format
assert isinstance(data["nonce"], str), "'nonce' ain't a string"
assert re.match(r"^[a-f0-9]{16}$", data["nonce"]), "'nonce' is not in the right forwat (it should be a 16-digit hexadecimal string)"
except AssertionError as e:
logger.info(f"Invalid request: {e} ... Original request body was: {request.body}")
return json_response({
"error": {
"code": "error_bad_json_data",
"content": f"Invalid request: {e} ... Original request body was: {request.body}"
}
}, status=400)
domains = data["domains"]
nonce = data["nonce"]
return json_response({
"http": {domain: await check_http_domain(ip, domain, nonce) for domain in domains}
})
async def check_http_domain(ip, domain, nonce):
if ":" in ip:
ip = "[%s]" % ip
async with aiohttp.ClientSession() as session:
try:
url = "http://" + ip + "/.well-known/ynh-diagnosis/" + nonce
async with session.get(url,
headers={"Host": domain},
allow_redirects=False,
timeout=aiohttp.ClientTimeout(total=5)) as response:
# XXX in the futur try to do a double check with the server to
# see if the correct content is get
await response.text()
# TODO various kind of errors
except (aiohttp.client_exceptions.ServerTimeoutError, asyncio.TimeoutError):
return {
"status": "error_http_check_timeout",
"content": "Timed-out while trying to contact your server from outside. It appears to be unreachable. You should check that you're correctly forwarding port 80, that nginx is running, and that a firewall is not interfering.",
}
except aiohttp.client_exceptions.ClientConnectorError as e:
return {
"status": "error_http_check_connection_error",
"content": "Connection error: could not connect to the requested domain, it's very likely unreachable. Raw error: " + str(e),
}
except Exception as e:
import traceback
traceback.print_exc()
return {
"status": "error_http_check_unknown_error",
"content": "An error happened while trying to reach your domain, it's very likely unreachable. Raw error: %s" % e,
}
if response.status != 200:
return {
"status": "error_http_check_bad_status_code",
"content": "Could not reach your server as expected, it returned code %s. It might be that another machine answered instead of your server. You should check that you're correctly forwarding port 80, that your nginx configuration is up to date, and that a reverse-proxy is not interfering." % response.status,
}
else:
return {
"status": "ok"
}
# ########################################################################### #
# Ports check #
# ########################################################################### #
@app.route("/check-ports/", methods=["POST"])
async def check_ports(request):
"""
This function received an HTTP request from a YunoHost instance while this
server is hosted on our infrastructure. The request is expected to be a
POST request with a body like {"ports": [80,443,22,25]}
The general workflow is the following:
- grab the ip from the request
- check for ip based rate limit (see RATE_LIMIT_SECONDS value)
- get json from body and ports list from it
- check ports are opened or closed
- answer the list of opened / closed ports
"""
# this is supposed to be a fast operation if run often enough
now = time.time()
clear_rate_limit_db(now)
# ############################################# #
# Validate request and extract the parameters #
# ############################################# #
ip = request.headers["x-forwarded-for"].split(",")[0]
check_rate_limit_ip = check_rate_limit(ip, now)
if check_rate_limit_ip:
return check_rate_limit_ip
try:
data = request.json
except InvalidUsage:
logger.info(f"Invalid json in request, body is: {request.body}")
return json_response({
"error": {
"code": "error_bad_json",
"content": "Invalid usage, body isn't proper json"
}
}, status=400)
try:
assert data, "Empty request body"
assert isinstance(data, dict), "Request body ain't a proper dict"
assert "ports" in data, "No 'ports' provided"
assert isinstance(data["ports"], list), "'ports' ain't a list"
assert len(data["ports"]) > 0, "'ports' list is empty"
assert len(data["ports"]) < 30, "That's too many ports to check"
assert len(data["ports"]) == len(set(data["ports"])), "'ports' list should contain unique elements"
def is_port_number(p):
return isinstance(p, int) and p > 0 and p < 65535
assert all(not is_port_number(p) for p in data["ports"]), "'ports' should a list of valid port numbers"
except AssertionError as e:
logger.info(f"Invalid request: {e} ... Original request body was: {request.body}")
return json_response({
"error": {
"code": "error_bad_json_data",
"content": f"Invalid request: {e} ... Original request body was: {request.body}"
}
}, status=400)
# ############################################# #
# Run the actual check #
# ############################################# #
result = {}
for port in data["ports"]:
result[int(port)] = await check_port_is_open(ip, port)
return json_response({"ports": result})
async def check_port_is_open(ip, port):
if ":" in ip:
sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
else:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(2)
result = sock.connect_ex((ip, port))
sock.close()
return result == 0
# ########################################################################### #
# SMTP check #
# ########################################################################### #
@app.route("/check-smtp/", methods=["POST"])
async def check_smtp(request):
"""
This function received an HTTP request from a YunoHost instance while this
server is hosted on our infrastructure. The request is expected to be a
POST request with an empty body
The general workflow is the following:
- grab the ip from the request
- check for ip based rate limit (see RATE_LIMIT_SECONDS value)
- open a socket on port 25
- the server is supposed to say '200 domain.tld Service ready'
- we return the domain.tld found
"""
# this is supposed to be a fast operation if run often enough
now = time.time()
clear_rate_limit_db(now)
# ############################################# #
# Validate request and extract the parameters #
# ############################################# #
ip = request.headers["x-forwarded-for"].split(",")[0]
check_rate_limit_ip = check_rate_limit(ip, now)
if check_rate_limit_ip:
return check_rate_limit_ip
if ":" in ip:
sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
else:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(2)
result = sock.connect_ex((ip, 25))
if result != 0:
return json_response({
'status': "error_smtp_unreachable",
'content': "Could not open a connection on port 25, probably because of a firewall or port forwarding issue"
})
try:
recv = sock.recv(1024).decode('utf-8')
assert recv[:3] == "220"
helo_domain = recv.split()[1].strip()
except:
return json_response({
'status': "error_smtp_bad_answer",
'content': "SMTP server did not reply with '220 domain.tld' after opening socket ... Maybe another machine answered."
})
finally:
sock.close()
return json_response({'status': 'ok', 'helo': helo_domain})
@app.route("/")
async def main(request):
return html("You aren't really supposed to use this website using your browser.<br><br>It's a small server with an API to check if a services running on YunoHost instance can be reached from 'the global internet'.")
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7000)