2019-07-29 21:17:34 +02:00
import re
2019-01-19 09:58:39 +01:00
import time
2019-01-19 08:47:56 +01:00
import asyncio
2019-01-19 07:54:11 +01:00
import aiohttp
2019-01-19 07:58:37 +01:00
import validators
2019-07-29 22:23:14 +02:00
import socket
2019-01-19 07:38:13 +01:00
2019-01-19 07:25:23 +01:00
from sanic import Sanic
2019-01-19 07:54:11 +01:00
from sanic . log import logger
2019-01-19 07:38:13 +01:00
from sanic . response import html , json as json_response
from sanic . exceptions import InvalidUsage
2019-01-19 07:25:23 +01:00
app = Sanic ( )
2020-04-16 00:43:38 +02:00
# ########################################################################### #
# Rate limit #
# ########################################################################### #
2019-01-19 09:58:39 +01:00
# keep that in memory
RATE_LIMIT_DB = { }
# to prevent DDoS or bounce attack attempt or something like that
2019-07-31 16:41:21 +02:00
# Can't do more than 10 requests in a 300-seconds window
RATE_LIMIT_SECONDS = 300
RATE_LIMIT_NB_REQUESTS = 10
2019-01-19 09:58:39 +01:00
2021-01-08 02:33:56 +01:00
2019-01-19 09:58:39 +01:00
def clear_rate_limit_db ( now ) :
to_delete = [ ]
" Remove too old rate limit values "
2019-07-31 16:41:21 +02:00
for key , times in RATE_LIMIT_DB . items ( ) :
# Remove values older RATE_LIMIT_SECONDS
RATE_LIMIT_DB [ key ] = [ t for t in times if now - t < RATE_LIMIT_SECONDS ]
# If list is empty, remove the key
if RATE_LIMIT_DB [ key ] == [ ] :
2019-01-19 09:58:39 +01:00
# a dictionnary can't be modified during iteration so delegate this
# operation
to_delete . append ( key )
for key in to_delete :
del RATE_LIMIT_DB [ key ]
2019-01-19 07:25:23 +01:00
2019-07-25 11:07:39 +02:00
def check_rate_limit ( key , now ) :
2019-07-31 16:41:21 +02:00
# If there are more recent attempts than allowed
if key in RATE_LIMIT_DB and len ( RATE_LIMIT_DB [ key ] ) > RATE_LIMIT_NB_REQUESTS :
oldest_attempt = RATE_LIMIT_DB [ key ] [ 0 ]
logger . info ( f " Rate limit reached for { key } , can retry in { int ( RATE_LIMIT_SECONDS - now + oldest_attempt ) } seconds " )
return json_response ( {
2020-04-16 00:43:38 +02:00
" error " : {
" code " : " error_rate_limit " ,
" content " : f " Rate limit reached for this domain or ip, retry in { int ( RATE_LIMIT_SECONDS - now + oldest_attempt ) } seconds "
}
2019-07-31 16:41:21 +02:00
} , status = 400 )
# In any case, add this attempt to the DB
if key not in RATE_LIMIT_DB :
RATE_LIMIT_DB [ key ] = [ now ]
else :
RATE_LIMIT_DB [ key ] . append ( now )
2019-07-25 11:07:39 +02:00
2020-04-16 00:43:38 +02:00
# ########################################################################### #
# HTTP check #
# ########################################################################### #
2019-07-29 22:23:14 +02:00
2020-04-16 00:43:38 +02:00
@app.route ( " /check-http " , methods = [ " POST " ] )
2019-01-19 07:38:13 +01:00
async def check_http ( request ) :
2019-01-19 13:10:39 +01:00
"""
This function received an HTTP request from a YunoHost instance while this
2019-07-29 21:17:34 +02:00
server is hosted on our infrastructure . The request is expected to be a
2020-04-16 00:43:38 +02:00
POST request with a body like { " domains " : [ " domain1.tld " , " domain2.tld " ] ,
2019-07-29 21:17:34 +02:00
" nonce " : " 1234567890abcdef " }
The nonce value is a single - use ID , and we will try to reach
http : / / domain . tld / . well - known / ynh - { nonce } which should return 200 if we
are indeed reaching the right server .
2019-01-19 13:10:39 +01:00
The general workflow is the following :
- grab the ip from the request
- check for ip based rate limit ( see RATE_LIMIT_SECONDS value )
- get json from body and domain from it
2020-04-16 00:43:38 +02:00
- check for domain - based rate limit ( see RATE_LIMIT_SECONDS value )
- check domains are in valid format
- for each domain :
- try to do an http request on the ip ( using the domain as target host ) for the page / . well - known / ynh - diagnosis / { nonce }
- answer saying if the domain can be reached
2019-01-19 13:10:39 +01:00
"""
2019-07-25 11:07:39 +02:00
# this is supposed to be a fast operation if run often enough
2019-01-19 09:58:39 +01:00
now = time . time ( )
clear_rate_limit_db ( now )
2019-07-29 21:17:34 +02:00
# ############################################# #
# Validate request and extract the parameters #
# ############################################# #
2019-11-20 17:10:36 +01:00
ip = request . headers [ " x-forwarded-for " ] . split ( " , " ) [ 0 ]
2019-01-19 07:38:13 +01:00
2019-07-25 11:07:39 +02:00
check_rate_limit_ip = check_rate_limit ( ip , now )
if check_rate_limit_ip :
return check_rate_limit_ip
2019-01-19 09:58:39 +01:00
2019-01-19 07:38:13 +01:00
try :
data = request . json
except InvalidUsage :
2019-07-29 22:54:58 +02:00
logger . info ( f " Invalid json in request, body is: { request . body } " )
2019-01-19 07:38:13 +01:00
return json_response ( {
2020-04-16 00:43:38 +02:00
" error " : {
" code " : " error_bad_json " ,
" content " : " Invalid usage, body isn ' t proper json "
}
2019-01-19 10:05:06 +01:00
} , status = 400 )
2019-01-19 07:38:13 +01:00
2020-04-16 00:43:38 +02:00
try :
assert data , " Empty request body "
assert isinstance ( data , dict ) , " Request body ain ' t a proper dict "
assert " domains " in data , " No ' domains ' provided "
2020-04-16 03:28:10 +02:00
assert " nonce " in data , " No ' nonce ' provided "
2020-04-16 00:43:38 +02:00
# Check domain list format
assert isinstance ( data [ " domains " ] , list ) , " ' domains ' ain ' t a list "
assert len ( data [ " domains " ] ) > 0 , " ' domains ' list is empty "
2021-01-08 03:41:35 +01:00
assert len ( data [ " domains " ] ) < 60 , " You cannot test that many domains "
2020-04-16 00:43:38 +02:00
for domain in data [ " domains " ] :
assert isinstance ( domain , str ) , " domain names must be strings "
assert len ( domain ) < 100 , " Domain %s name seems pretty long, that ' s suspicious...? " % domain
assert len ( data [ " domains " ] ) == len ( set ( data [ " domains " ] ) ) , " ' domains ' list should contain unique elements "
# Check domain rate limit
for domain in data [ " domains " ] :
check_rate_limit_domain = check_rate_limit ( domain , now )
if check_rate_limit_domain :
return check_rate_limit_domain
# Check domains are valid domain names
for domain in data [ " domains " ] :
assert validators . domain ( domain ) , f " { domain } is not a valid domain "
# Check nonce format
assert isinstance ( data [ " nonce " ] , str ) , " ' nonce ' ain ' t a string "
assert re . match ( r " ^[a-f0-9] {16} $ " , data [ " nonce " ] ) , " ' nonce ' is not in the right forwat (it should be a 16-digit hexadecimal string) "
except AssertionError as e :
logger . info ( f " Invalid request: { e } ... Original request body was: { request . body } " )
2019-01-19 08:08:36 +01:00
return json_response ( {
2020-04-16 00:43:38 +02:00
" error " : {
" code " : " error_bad_json_data " ,
" content " : f " Invalid request: { e } ... Original request body was: { request . body } "
}
2019-01-19 10:05:06 +01:00
} , status = 400 )
2019-01-19 07:58:37 +01:00
2020-04-16 03:28:10 +02:00
domains = data [ " domains " ]
2019-07-29 21:17:34 +02:00
nonce = data [ " nonce " ]
2020-04-16 00:43:38 +02:00
return json_response ( {
2020-04-16 03:28:10 +02:00
" http " : { domain : await check_http_domain ( ip , domain , nonce ) for domain in domains }
2020-04-16 00:43:38 +02:00
} )
2019-07-29 21:17:34 +02:00
2020-04-16 00:43:38 +02:00
async def check_http_domain ( ip , domain , nonce ) :
2019-07-29 21:17:34 +02:00
2020-04-14 15:44:26 +02:00
if " : " in ip :
ip = " [ %s ] " % ip
2019-01-19 07:54:11 +01:00
async with aiohttp . ClientSession ( ) as session :
try :
2019-07-29 21:17:34 +02:00
url = " http:// " + ip + " /.well-known/ynh-diagnosis/ " + nonce
async with session . get ( url ,
2019-07-25 11:23:47 +02:00
headers = { " Host " : domain } ,
2019-11-20 17:10:36 +01:00
allow_redirects = False ,
2020-04-16 00:43:38 +02:00
timeout = aiohttp . ClientTimeout ( total = 5 ) ) as response :
2019-01-19 07:54:11 +01:00
# XXX in the futur try to do a double check with the server to
# see if the correct content is get
await response . text ( )
# TODO various kind of errors
2019-11-20 17:10:36 +01:00
except ( aiohttp . client_exceptions . ServerTimeoutError , asyncio . TimeoutError ) :
2020-04-16 00:43:38 +02:00
return {
" status " : " error_http_check_timeout " ,
2019-11-20 17:10:36 +01:00
" content " : " Timed-out while trying to contact your server from outside. It appears to be unreachable. You should check that you ' re correctly forwarding port 80, that nginx is running, and that a firewall is not interfering. " ,
2020-04-16 00:43:38 +02:00
}
2019-11-20 17:10:36 +01:00
except aiohttp . client_exceptions . ClientConnectorError as e :
2020-04-16 00:43:38 +02:00
return {
" status " : " error_http_check_connection_error " ,
2019-11-20 17:10:36 +01:00
" content " : " Connection error: could not connect to the requested domain, it ' s very likely unreachable. Raw error: " + str ( e ) ,
2020-04-16 00:43:38 +02:00
}
2019-11-20 17:10:36 +01:00
except Exception as e :
2019-01-19 07:54:11 +01:00
import traceback
traceback . print_exc ( )
2020-04-16 00:43:38 +02:00
return {
" status " : " error_http_check_unknown_error " ,
2019-11-20 17:10:36 +01:00
" content " : " An error happened while trying to reach your domain, it ' s very likely unreachable. Raw error: %s " % e ,
2020-04-16 00:43:38 +02:00
}
2019-01-19 07:54:11 +01:00
2019-11-20 17:10:36 +01:00
if response . status != 200 :
2020-04-16 00:43:38 +02:00
return {
" status " : " error_http_check_bad_status_code " ,
2019-11-20 17:10:36 +01:00
" content " : " Could not reach your server as expected, it returned code %s . It might be that another machine answered instead of your server. You should check that you ' re correctly forwarding port 80, that your nginx configuration is up to date, and that a reverse-proxy is not interfering. " % response . status ,
2020-04-16 00:43:38 +02:00
}
2019-11-20 17:10:36 +01:00
else :
2020-04-16 00:43:38 +02:00
return {
" status " : " ok "
}
# ########################################################################### #
# Ports check #
# ########################################################################### #
2019-01-19 07:38:13 +01:00
2019-07-29 22:23:14 +02:00
@app.route ( " /check-ports/ " , methods = [ " POST " ] )
async def check_ports ( request ) :
"""
This function received an HTTP request from a YunoHost instance while this
server is hosted on our infrastructure . The request is expected to be a
POST request with a body like { " ports " : [ 80 , 443 , 22 , 25 ] }
The general workflow is the following :
- grab the ip from the request
- check for ip based rate limit ( see RATE_LIMIT_SECONDS value )
- get json from body and ports list from it
- check ports are opened or closed
- answer the list of opened / closed ports
"""
# this is supposed to be a fast operation if run often enough
now = time . time ( )
clear_rate_limit_db ( now )
# ############################################# #
# Validate request and extract the parameters #
# ############################################# #
2019-11-20 17:10:36 +01:00
ip = request . headers [ " x-forwarded-for " ] . split ( " , " ) [ 0 ]
2019-07-29 22:23:14 +02:00
check_rate_limit_ip = check_rate_limit ( ip , now )
if check_rate_limit_ip :
return check_rate_limit_ip
try :
data = request . json
except InvalidUsage :
2019-07-29 22:54:58 +02:00
logger . info ( f " Invalid json in request, body is: { request . body } " )
2019-07-29 22:23:14 +02:00
return json_response ( {
2020-04-16 00:43:38 +02:00
" error " : {
" code " : " error_bad_json " ,
" content " : " Invalid usage, body isn ' t proper json "
}
2019-07-29 22:23:14 +02:00
} , status = 400 )
2020-04-16 00:43:38 +02:00
try :
assert data , " Empty request body "
assert isinstance ( data , dict ) , " Request body ain ' t a proper dict "
assert " ports " in data , " No ' ports ' provided "
assert isinstance ( data [ " ports " ] , list ) , " ' ports ' ain ' t a list "
assert len ( data [ " ports " ] ) > 0 , " ' ports ' list is empty "
assert len ( data [ " ports " ] ) < 30 , " That ' s too many ports to check "
assert len ( data [ " ports " ] ) == len ( set ( data [ " ports " ] ) ) , " ' ports ' list should contain unique elements "
def is_port_number ( p ) :
return isinstance ( p , int ) and p > 0 and p < 65535
2020-04-19 02:35:54 +02:00
assert all ( is_port_number ( p ) for p in data [ " ports " ] ) , " ' ports ' should a list of valid port numbers "
2020-04-16 00:43:38 +02:00
except AssertionError as e :
logger . info ( f " Invalid request: { e } ... Original request body was: { request . body } " )
2019-07-29 22:23:14 +02:00
return json_response ( {
2020-04-16 00:43:38 +02:00
" error " : {
" code " : " error_bad_json_data " ,
" content " : f " Invalid request: { e } ... Original request body was: { request . body } "
}
2019-07-29 22:23:14 +02:00
} , status = 400 )
# ############################################# #
# Run the actual check #
# ############################################# #
result = { }
2020-04-16 00:43:38 +02:00
for port in data [ " ports " ] :
2019-11-20 17:10:36 +01:00
result [ int ( port ) ] = await check_port_is_open ( ip , port )
2019-07-29 22:23:14 +02:00
2020-04-16 00:43:38 +02:00
return json_response ( { " ports " : result } )
async def check_port_is_open ( ip , port ) :
if " : " in ip :
2021-01-08 03:05:28 +01:00
futur = asyncio . open_connection ( ip , port , family = socket . AF_INET6 )
2020-04-16 00:43:38 +02:00
else :
2021-01-08 03:05:28 +01:00
futur = asyncio . open_connection ( ip , port , family = socket . AF_INET )
2020-04-16 00:43:38 +02:00
2021-01-08 03:05:28 +01:00
try :
_ , writer = await asyncio . wait_for ( futur , timeout = 2 )
2021-01-13 01:58:02 +01:00
except ( asyncio . TimeoutError , ConnectionRefusedError , OSError ) : # OSError: [Errno 113] No route to host
2021-01-08 03:05:28 +01:00
return False
except Exception :
import traceback
traceback . print_exc ( )
return False
else :
writer . close ( )
2021-01-08 03:58:42 +01:00
# XXX we are still in python 3.6 in prod :(
# await writer.wait_closed()
2021-01-08 03:05:28 +01:00
return True
2020-04-16 00:43:38 +02:00
# ########################################################################### #
# SMTP check #
# ########################################################################### #
2019-07-29 22:23:14 +02:00
2019-07-29 22:34:46 +02:00
@app.route ( " /check-smtp/ " , methods = [ " POST " ] )
async def check_smtp ( request ) :
2020-04-16 00:43:53 +02:00
"""
This function received an HTTP request from a YunoHost instance while this
server is hosted on our infrastructure . The request is expected to be a
POST request with an empty body
The general workflow is the following :
- grab the ip from the request
- check for ip based rate limit ( see RATE_LIMIT_SECONDS value )
- open a socket on port 25
- the server is supposed to say ' 200 domain.tld Service ready '
- we return the domain . tld found
"""
# this is supposed to be a fast operation if run often enough
now = time . time ( )
clear_rate_limit_db ( now )
# ############################################# #
# Validate request and extract the parameters #
# ############################################# #
2019-07-29 22:34:46 +02:00
2020-04-16 00:43:53 +02:00
ip = request . headers [ " x-forwarded-for " ] . split ( " , " ) [ 0 ]
check_rate_limit_ip = check_rate_limit ( ip , now )
if check_rate_limit_ip :
return check_rate_limit_ip
if " : " in ip :
2021-01-08 03:41:09 +01:00
futur = asyncio . open_connection ( ip , 25 , family = socket . AF_INET6 )
2020-04-16 00:43:53 +02:00
else :
2021-01-08 03:41:09 +01:00
futur = asyncio . open_connection ( ip , 25 , family = socket . AF_INET )
2020-04-16 00:43:53 +02:00
2021-01-08 03:41:09 +01:00
try :
reader , writer = await asyncio . wait_for ( futur , timeout = 2 )
2021-01-13 01:58:02 +01:00
except ( asyncio . TimeoutError , ConnectionRefusedError , OSError ) : # OSError: [Errno 113] No route to host
2021-01-08 03:41:09 +01:00
return json_response ( {
' status ' : " error_smtp_unreachable " ,
' content ' : " Could not open a connection on port 25, probably because of a firewall or port forwarding issue "
} )
except Exception :
import traceback
traceback . print_exc ( )
2020-04-16 00:43:53 +02:00
return json_response ( {
' status ' : " error_smtp_unreachable " ,
' content ' : " Could not open a connection on port 25, probably because of a firewall or port forwarding issue "
} )
try :
2021-01-08 03:41:09 +01:00
recv = await asyncio . wait_for ( reader . read ( 1024 ) , timeout = 200 )
recv = recv . decode ( " Utf-8 " )
2020-04-16 00:43:53 +02:00
assert recv [ : 3 ] == " 220 "
helo_domain = recv . split ( ) [ 1 ] . strip ( )
2021-01-08 03:41:09 +01:00
except asyncio . TimeoutError :
return json_response ( {
' status ' : " error_smtp_timeout_answer " ,
' content ' : " SMTP server took more than 2 seconds to answer. "
} )
2021-01-08 02:33:56 +01:00
except Exception as e :
import traceback
traceback . print_exc ( )
print ( f " Error when trying to get smtp answer: { e } " )
2020-04-16 00:43:53 +02:00
return json_response ( {
' status ' : " error_smtp_bad_answer " ,
' content ' : " SMTP server did not reply with ' 220 domain.tld ' after opening socket ... Maybe another machine answered. "
} )
finally :
2021-01-08 03:41:09 +01:00
writer . close ( )
2021-01-08 03:58:42 +01:00
# XXX we are still in python 3.6 in prod :(
# await writer.wait_closed()
2019-07-29 22:34:46 +02:00
2020-04-16 00:43:53 +02:00
return json_response ( { ' status ' : ' ok ' , ' helo ' : helo_domain } )
2019-07-29 22:34:46 +02:00
2019-01-19 07:25:23 +01:00
@app.route ( " / " )
2019-01-19 07:38:13 +01:00
async def main ( request ) :
2019-07-29 22:54:58 +02:00
return html ( " You aren ' t really supposed to use this website using your browser.<br><br>It ' s a small server with an API to check if a services running on YunoHost instance can be reached from ' the global internet ' . " )
2019-01-19 07:25:23 +01:00
if __name__ == " __main__ " :
2021-01-08 03:48:03 +01:00
app . run ( host = " 0.0.0.0 " , port = 7000 , workers = 16 )