Merge pull request #838 from YunoHost/improve-service-status-checks

[enh] Improve service status checks (and service add CLI)
This commit is contained in:
Alexandre Aubin 2019-11-18 14:11:37 +01:00 committed by GitHub
commit d50a69fe3e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 146 additions and 122 deletions

View file

@ -960,24 +960,13 @@ service:
arguments:
name:
help: Service name to add
-s:
full: --status
help: Custom status command
-d:
full: --description
help: Description of the service
-l:
full: --log
help: Absolute path to log file to display
nargs: "+"
-r:
full: --runlevel
help: Runlevel priority of the service
type: int
-n:
full: --need_lock
help: Use this option to prevent deadlocks if the service does invoke yunohost commands.
action: store_true
-d:
full: --description
help: Description of the service
-t:
full: --log_type
help: Type of the log (file or systemd)
@ -986,6 +975,22 @@ service:
- file
- systemd
default: file
--test_status:
help: Specify a custom bash command to check the status of the service. Note that it only makes sense to specify this if the corresponding systemd service does not return the proper information already.
--test_conf:
help: Specify a custom bash command to check if the configuration of the service is valid or broken, similar to nginx -t.
--needs_exposed_ports:
help: A list of ports that needs to be publicly exposed for the service to work as intended.
nargs: "+"
type: int
metavar: PORT
-n:
full: --need_lock
help: Use this option to prevent deadlocks if the service does invoke yunohost commands.
action: store_true
-s:
full: --status
help: Deprecated, old option. Does nothing anymore. Possibly check the --test_status option.
### service_remove()
remove:

View file

@ -101,6 +101,12 @@ for service, conf in new_services.items():
if conffiles:
services[service]['conffiles'] = conffiles
# Remove legacy /var/log/daemon.log and /var/log/syslog from log entries
# because they are too general. Instead, now the journalctl log is
# returned by default which is more relevant.
if "log" in services[service]:
if services[service]["log"] in ["/var/log/syslog", "/var/log/daemon.log"]:
del services[service]["log"]
if updated:
with open('/etc/yunohost/services.yml-new', 'w') as f:

View file

@ -5,7 +5,7 @@ import requests
from yunohost.diagnosis import Diagnoser
from yunohost.utils.error import YunohostError
from yunohost.service import _get_services
class PortsDiagnoser(Diagnoser):
@ -15,16 +15,18 @@ class PortsDiagnoser(Diagnoser):
def run(self):
# FIXME / TODO : in the future, maybe we want to report different
# things per port depending on how important they are
# (e.g. XMPP sounds to me much less important than other ports)
# Ideally, a port could be related to a service...
# FIXME / TODO : for now this list of port is hardcoded, might want
# to fetch this from the firewall.yml in /etc/yunohost/
ports = [22, 25, 53, 80, 443, 587, 993, 5222, 5269]
# This dict is something like :
# { 80: "nginx",
# 25: "postfix",
# 443: "nginx"
# ... }
ports = {}
for service, infos in _get_services().items():
for port in infos.get("needs_exposed_ports", []):
ports[port] = service
try:
r = requests.post('https://diagnosis.yunohost.org/check-ports', json={'ports': ports}, timeout=30).json()
r = requests.post('https://diagnosis.yunohost.org/check-ports', json={'ports': ports.keys()}, timeout=30).json()
if "status" not in r.keys():
raise Exception("Bad syntax for response ? Raw json: %s" % str(r))
elif r["status"] == "error":
@ -37,15 +39,17 @@ class PortsDiagnoser(Diagnoser):
except Exception as e:
raise YunohostError("diagnosis_ports_could_not_diagnose", error=e)
for port in ports:
for port, service in ports.items():
if r["ports"].get(str(port), None) is not True:
yield dict(meta={"port": port},
yield dict(meta={"port": port, "needed_by": service},
status="ERROR",
summary=("diagnosis_ports_unreachable", {"port": port}))
summary=("diagnosis_ports_unreachable", {"port": port}),
details=[("diagnosis_ports_needed_by", (service,)), ("diagnosis_ports_forwarding_tip", ())])
else:
yield dict(meta={},
yield dict(meta={"port": port, "needed_by": service},
status="SUCCESS",
summary=("diagnosis_ports_ok", {"port": port}))
summary=("diagnosis_ports_ok", {"port": port}),
details=[("diagnosis_ports_needed_by", (service))])
def main(args, env, loggers):

View file

@ -5,17 +5,6 @@ import os
from yunohost.diagnosis import Diagnoser
from yunohost.service import service_status
# TODO : all these are arbitrary, should be collectively validated
services_ignored = {"glances"}
services_critical = {"dnsmasq", "fail2ban", "yunohost-firewall", "nginx", "slapd", "ssh"}
# TODO / FIXME : we should do something about this postfix thing
# The nominal value is to be "exited" ... some daemon is actually running
# in a different thread that the thing started by systemd, which is fine
# but somehow sometimes it gets killed and there's no easy way to detect it
# Just randomly restarting it will fix ths issue. We should find some trick
# to identify the PID of the process and check it's still up or idk
services_expected_to_be_exited = {"postfix", "yunohost-firewall"}
class ServicesDiagnoser(Diagnoser):
id_ = os.path.splitext(os.path.basename(__file__))[0].split("-")[1]
@ -28,23 +17,22 @@ class ServicesDiagnoser(Diagnoser):
for service, result in sorted(all_result.items()):
if service in services_ignored:
continue
item = dict(meta={"service": service})
expected_status = "running" if service not in services_expected_to_be_exited else "exited"
# TODO / FIXME : might also want to check that services are enabled
if result["status"] != "running":
item["status"] = "ERROR"
item["summary"] = ("diagnosis_services_bad_status", {"service": service, "status": result["status"]})
if result["active"] != "active" or result["status"] != expected_status:
item["status"] = "WARNING" if service not in services_critical else "ERROR"
item["summary"] = ("diagnosis_services_bad_status", {"service": service, "status": result["active"] + "/" + result["status"]})
elif result["configuration"] == "broken":
item["status"] = "WARNING"
item["summary"] = ("diagnosis_services_conf_broken", {"service": service})
# TODO : could try to append the tail of the service log to the "details" key ...
else:
item["status"] = "SUCCESS"
item["summary"] = ("diagnosis_services_good_status", {"service": service, "status": result["active"] + "/" + result["status"]})
item["summary"] = ("diagnosis_services_running", {"service": service, "status": result["status"]})
if result["configuration"] == "broken":
item["details"] = [(d, tuple()) for d in result["configuration-details"]]
yield item
def main(args, env, loggers):

View file

@ -15,19 +15,6 @@ class RegenconfDiagnoser(Diagnoser):
def run(self):
# nginx -t
p = subprocess.Popen("nginx -t".split(),
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
out, _ = p.communicate()
if p.returncode != 0:
yield dict(meta={"test": "nginx-t"},
status="ERROR",
summary=("diagnosis_regenconf_nginx_conf_broken", {}),
details=[(out, ())]
)
regenconf_modified_files = manually_modified_files()
debian_modified_files = manually_modified_files_compared_to_debian_default(ignore_handled_by_regenconf=True)

View file

@ -1,36 +1,42 @@
nginx:
log: /var/log/nginx
avahi-daemon:
log: /var/log/daemon.log
dnsmasq:
log: /var/log/daemon.log
fail2ban:
log: /var/log/fail2ban.log
avahi-daemon: {}
dnsmasq: {}
dovecot:
log: [/var/log/mail.log,/var/log/mail.err]
postfix:
log: [/var/log/mail.log,/var/log/mail.err]
rspamd:
log: /var/log/rspamd/rspamd.log
redis-server:
log: /var/log/redis/redis-server.log
mysql:
log: [/var/log/mysql.log,/var/log/mysql.err]
alternates: ['mariadb']
ssh:
log: /var/log/auth.log
needs_exposed_ports: [993]
fail2ban:
log: /var/log/fail2ban.log
metronome:
log: [/var/log/metronome/metronome.log,/var/log/metronome/metronome.err]
slapd:
log: /var/log/syslog
needs_exposed_ports: [5222, 5269]
mysql:
log: [/var/log/mysql.log,/var/log/mysql.err,/var/log/mysql/error.log]
alternates: ['mariadb']
nginx:
log: /var/log/nginx
test_conf: nginx -t
needs_exposed_ports: [80, 443]
nslcd: {}
php7.0-fpm:
log: /var/log/php7.0-fpm.log
test_conf: php-fpm7.0 --test
postfix:
log: [/var/log/mail.log,/var/log/mail.err]
test_status: systemctl show postfix@- | grep -q "^SubState=running"
needs_exposed_ports: [25, 587]
redis-server:
log: /var/log/redis/redis-server.log
rspamd:
log: /var/log/rspamd/rspamd.log
slapd: {}
ssh:
log: /var/log/auth.log
test_conf: sshd -t
needs_exposed_ports: [22]
yunohost-api:
log: /var/log/yunohost/yunohost-api.log
yunohost-firewall:
need_lock: true
nslcd:
log: /var/log/syslog
test_status: iptables -S | grep "^-A INPUT" | grep " --dport" | grep -q ACCEPT
glances: null
nsswitch: null
ssl: null

View file

@ -172,8 +172,9 @@
"diagnosis_dns_bad_conf": "Bad / missing DNS configuration for domain {domain} (category {category})",
"diagnosis_dns_missing_record": "According to the recommended DNS configuration, you should add a DNS record with type {0}, name {1} and value {2}",
"diagnosis_dns_discrepancy": "According to the recommended DNS configuration, the value for the DNS record with type {0} and name {1} should be {2}, not {3}.",
"diagnosis_services_good_status": "Service {service} is {status} as expected!",
"diagnosis_services_bad_status": "Service {service} is {status} :/",
"diagnosis_services_running": "Service {service} is running!",
"diagnosis_services_conf_broken": "Configuration is broken for service {service}!",
"diagnosis_services_bad_status": "Service {service} is {status} :(",
"diagnosis_diskusage_verylow": "Storage {mountpoint} (on device {device}) has only {free_abs_GB} GB ({free_percent}%) space remaining. You should really consider cleaning up some space.",
"diagnosis_diskusage_low": "Storage {mountpoint} (on device {device}) has only {free_abs_GB} GB ({free_percent}%) space remaining. Be careful.",
"diagnosis_diskusage_ok": "Storage {mountpoint} (on device {device}) still has {free_abs_GB} GB ({free_percent}%) space left!",
@ -207,6 +208,8 @@
"diagnosis_ports_could_not_diagnose": "Could not diagnose if ports are reachable from outside. Error: {error}",
"diagnosis_ports_unreachable": "Port {port} is not reachable from outside.",
"diagnosis_ports_ok": "Port {port} is reachable from outside.",
"diagnosis_ports_needed_by": "Exposing this port is needed for service {0}",
"diagnosis_ports_forwarding_tip": "To fix this issue, most probably you need to configure port forwarding on your internet router as described in https://yunohost.org/port_forwarding",
"diagnosis_http_could_not_diagnose": "Could not diagnose if domain is reachable from outside. Error: {error}",
"diagnosis_http_ok": "Domain {domain} is reachable from outside.",
"diagnosis_http_unreachable": "Domain {domain} is unreachable through HTTP from outside.",
@ -535,7 +538,6 @@
"service_disabled": "The '{service:s}' service was turned off",
"service_enable_failed": "Could not turn on the service '{service:s}'\n\nRecent service logs:{logs:s}",
"service_enabled": "The '{service:s}' service was turned off",
"service_no_log": "No logs to display for the service '{service:s}'",
"service_regen_conf_is_deprecated": "'yunohost service regen-conf' is deprecated! Please use 'yunohost tools regen-conf' instead.",
"service_remove_failed": "Could not remove the service '{service:s}'",
"service_removed": "'{service:s}' service removed",

View file

@ -2829,7 +2829,7 @@ def _assert_system_is_sane_for_app(manifest, when):
services.append("fail2ban")
# List services currently down and raise an exception if any are found
faulty_services = [s for s in services if service_status(s)["active"] != "active"]
faulty_services = [s for s in services if service_status(s)["status"] != "running"]
if faulty_services:
if when == "pre":
raise YunohostError('app_action_cannot_be_ran_because_required_services_down',

View file

@ -40,25 +40,24 @@ MOULINETTE_LOCK = "/var/run/moulinette_yunohost.lock"
logger = log.getActionLogger('yunohost.service')
def service_add(name, status=None, log=None, runlevel=None, need_lock=False, description=None, log_type="file"):
def service_add(name, description=None, log=None, log_type="file", test_status=None, test_conf=None, needs_exposed_ports=None, need_lock=False, status=None):
"""
Add a custom service
Keyword argument:
name -- Service name to add
status -- Custom status command
log -- Absolute path to log file to display
runlevel -- Runlevel priority of the service
need_lock -- Use this option to prevent deadlocks if the service does invoke yunohost commands.
description -- description of the service
log_type -- Precise if the corresponding log is a file or a systemd log
log -- Absolute path to log file to display
log_type -- Specify if the corresponding log is a file or a systemd log
test_status -- Specify a custom bash command to check the status of the service. N.B. : it only makes sense to specify this if the corresponding systemd service does not return the proper information.
test_conf -- Specify a custom bash command to check if the configuration of the service is valid or broken, similar to nginx -t.
needs_exposed_ports -- A list of ports that needs to be publicly exposed for the service to work as intended.
need_lock -- Use this option to prevent deadlocks if the service does invoke yunohost commands.
status -- Deprecated, doesn't do anything anymore. Use test_status instead.
"""
services = _get_services()
if not status:
services[name] = {'status': 'service'}
else:
services[name] = {'status': status}
services[name] = {}
if log is not None:
if not isinstance(log, list):
@ -77,15 +76,22 @@ def service_add(name, status=None, log=None, runlevel=None, need_lock=False, des
else:
raise YunohostError('service_add_failed', service=name)
if runlevel is not None:
services[name]['runlevel'] = runlevel
if description:
services[name]['description'] = description
else:
logger.warning("/!\\ Packager ! You added a custom service without specifying a description. Please add --description to explain what the service does in a similar fashion to existing services.")
if need_lock:
services[name]['need_lock'] = True
if description is not None:
services[name]['description'] = description
if test_status:
services[name]["test_status"] = test_status
if test_conf:
services[name]["test_conf"] = test_conf
if needs_exposed_ports:
services[name]["needs_exposed_ports"] = needs_exposed_ports
try:
_save_services(services)
@ -277,7 +283,7 @@ def service_status(names=[]):
# the hack was to add fake services...
# we need to extract regenconf from service at some point, also because
# some app would really like to use it
if "status" in services[name] and services[name]["status"] is None:
if services[name].get("status", "") is None:
continue
status = _get_service_information_from_systemd(name)
@ -292,11 +298,10 @@ def service_status(names=[]):
logger.error("Failed to get status information via dbus for service %s, systemctl didn't recognize this service ('NoSuchUnit')." % name)
result[name] = {
'status': "unknown",
'loaded': "unknown",
'active': "unknown",
'active_at': "unknown",
'start_on_boot': "unknown",
'last_state_change': "unknown",
'description': "Error: failed to get information for this service, it doesn't exists for systemd",
'service_file_path': "unknown",
'configuration': "unknown",
}
else:
@ -314,21 +319,38 @@ def service_status(names=[]):
result[name] = {
'status': str(status.get("SubState", "unknown")),
'loaded': str(status.get("UnitFileState", "unknown")),
'active': str(status.get("ActiveState", "unknown")),
'start_on_boot': str(status.get("UnitFileState", "unknown")),
'last_state_change': "unknown",
'description': description,
'service_file_path': str(status.get("FragmentPath", "unknown")),
'configuration': "unknown",
}
# Fun stuff™ : to obtain the enabled/disabled status for sysv services,
# gotta do this ... cf code of /lib/systemd/systemd-sysv-install
if result[name]["loaded"] == "generated":
result[name]["loaded"] = "enabled" if glob("/etc/rc[S5].d/S??"+name) else "disabled"
if result[name]["start_on_boot"] == "generated":
result[name]["start_on_boot"] = "enabled" if glob("/etc/rc[S5].d/S??"+name) else "disabled"
if "ActiveEnterTimestamp" in status:
result[name]['active_at'] = datetime.utcfromtimestamp(status["ActiveEnterTimestamp"] / 1000000)
if "StateChangeTimestamp" in status:
result[name]['last_state_change'] = datetime.utcfromtimestamp(status["StateChangeTimestamp"] / 1000000)
# 'test_status' is an optional field to test the status of the service using a custom command
if "test_status" in services[name]:
status = os.system(services[name]["test_status"])
result[name]["status"] = "running" if status == 0 else "failed"
# 'test_status' is an optional field to test the status of the service using a custom command
if "test_conf" in services[name]:
p = subprocess.Popen(services[name]["test_conf"],
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
out, _ = p.communicate()
if p.returncode == 0:
result[name]["configuration"] = "valid"
else:
result[name]['active_at'] = "unknown"
result[name]["configuration"] = "broken"
result[name]["configuration-details"] = out.strip().split("\n")
if len(names) == 1:
return result[names[0]]
@ -373,10 +395,7 @@ def service_log(name, number=50):
if name not in services.keys():
raise YunohostError('service_unknown', service=name)
if 'log' not in services[name]:
raise YunohostError('service_no_log', service=name)
log_list = services[name]['log']
log_list = services[name].get('log', [])
log_type_list = services[name].get('log_type', [])
if not isinstance(log_list, list):
@ -386,6 +405,13 @@ def service_log(name, number=50):
result = {}
# First we always add the logs from journalctl / systemd
result["journalctl"] = _get_journalctl_logs(name, int(number)).splitlines()
# Mysql and journalctl are fucking annoying, we gotta explictly fetch mariadb ...
if name == "mysql":
result["journalctl"] = _get_journalctl_logs("mariadb", int(number)).splitlines()
for index, log_path in enumerate(log_list):
log_type = log_type_list[index]