Add a diagnosis of processes rencently killed by oom_reaper

This commit is contained in:
Alexandre Aubin 2020-08-28 17:12:07 +02:00
parent 2ee08189ba
commit f5acbffb5c
2 changed files with 47 additions and 0 deletions

View file

@ -1,6 +1,9 @@
#!/usr/bin/env python
import os
import psutil
import subprocess
import datetime
import re
from yunohost.diagnosis import Diagnoser
@ -96,6 +99,49 @@ class SystemResourcesDiagnoser(Diagnoser):
yield item
#
# Recent kills by oom_reaper
#
kills_count = self.recent_kills_by_oom_reaper()
if kills_count:
kills_summary = "\n".join(["%s (x%s)" % (proc, count) for proc, count in kills_count])
yield dict(meta={"test": "oom_reaper"},
status="WARNING",
summary="diagnosis_processes_killed_by_oom_reaper",
data={"kills_summary": kills_summary})
def recent_kills_by_oom_reaper(self):
if not os.path.exists("/var/log/kern.log"):
return []
def analyzed_kern_log():
cmd = 'tail -n 10000 /var/log/kern.log | grep "oom_reaper: reaped process"'
out = subprocess.check_output(cmd, shell=True)
lines = out.strip().split("\n")
now = datetime.datetime.now()
for line in reversed(lines):
# Lines look like :
# Aug 25 18:48:21 yolo kernel: [ 9623.613667] oom_reaper: reaped process 11509 (uwsgi), now anon-rss:0kB, file-rss:0kB, shmem-rss:328kB
date_str = str(now.year) + " " + " ".join(line.split()[:3])
date = datetime.datetime.strptime(date_str, '%Y %b %d %H:%M:%S')
diff = now - date
if diff.days >= 1:
break
process_killed = re.search(r"\(.*\)", line).group().strip("()")
yield process_killed
processes = list(analyzed_kern_log())
kills_count = [(p, len([p_ for p_ in processes if p_ == p])) for p in set(processes)]
kills_count = sorted(kills_count, key=lambda p: p[1], reverse=True)
return kills_count
def human_size(bytes_):
# Adapted from https://stackoverflow.com/a/1094933

View file

@ -262,6 +262,7 @@
"diagnosis_http_nginx_conf_not_up_to_date_details": "To fix the situation, inspect the difference with the command line using <cmd>yunohost tools regen-conf nginx --dry-run --with-diff</cmd> and if you're ok, apply the changes with <cmd>yunohost tools regen-conf nginx --force</cmd>.",
"diagnosis_unknown_categories": "The following categories are unknown: {categories}",
"diagnosis_never_ran_yet": "It looks like this server was setup recently and there's no diagnosis report to show yet. You should start by running a full diagnosis, either from the webadmin or using 'yunohost diagnosis run' from the command line.",
"diagnosis_processes_killed_by_oom_reaper": "Some processes were recently killed by the system because it ran out of memory. This is typically symptomatic of a lack of memory on the system or of a process that ate up to much memory. Summary of the processes killed:\n{kills_summary}",
"domain_cannot_remove_main": "You cannot remove '{domain:s}' since it's the main domain, you first need to set another domain as the main domain using 'yunohost domain main-domain -n <another-domain>'; here is the list of candidate domains: {other_domains:s}",
"domain_cannot_add_xmpp_upload": "You cannot add domains starting with 'xmpp-upload.'. This kind of name is reserved for the XMPP upload feature integrated in YunoHost.",
"domain_cannot_remove_main_add_new_one": "You cannot remove '{domain:s}' since it's the main domain and your only domain, you need to first add another domain using 'yunohost domain add <another-domain.com>', then set is as the main domain using 'yunohost domain main-domain -n <another-domain.com>' and then you can remove the domain '{domain:s}' using 'yunohost domain remove {domain:s}'.'",