yunohost/src/log.py
2021-11-14 21:48:54 +01:00

805 lines
28 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
""" License
Copyright (C) 2018 YunoHost
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program; if not, see http://www.gnu.org/licenses
"""
""" yunohost_log.py
Manage debug logs
"""
import os
import re
import yaml
import glob
import psutil
from typing import List
from datetime import datetime, timedelta
from logging import FileHandler, getLogger, Formatter
from io import IOBase
from moulinette import m18n, Moulinette
from moulinette.core import MoulinetteError
from yunohost.utils.error import YunohostError, YunohostValidationError
from yunohost.utils.packages import get_ynh_package_version
from moulinette.utils.log import getActionLogger
from moulinette.utils.filesystem import read_file, read_yaml
logger = getActionLogger("yunohost.log")
CATEGORIES_PATH = "/var/log/yunohost/categories/"
OPERATIONS_PATH = "/var/log/yunohost/categories/operation/"
METADATA_FILE_EXT = ".yml"
LOG_FILE_EXT = ".log"
BORING_LOG_LINES = [
r"set [+-]x$",
r"set [+-]o xtrace$",
r"set [+-]o errexit$",
r"set [+-]o nounset$",
r"trap '' EXIT",
r"local \w+$",
r"local exit_code=(1|0)$",
r"local legacy_args=.*$",
r"local -A args_array$",
r"args_array=.*$",
r"ret_code=1",
r".*Helper used in legacy mode.*",
r"ynh_handle_getopts_args",
r"ynh_script_progression",
r"sleep 0.5",
r"'\[' (1|0) -eq (1|0) '\]'$",
r"\[?\['? -n '' '?\]\]?$",
r"rm -rf /var/cache/yunohost/download/$",
r"type -t ynh_clean_setup$",
r"DEBUG - \+ echo '",
r"DEBUG - \+ exit (1|0)$",
]
def log_list(limit=None, with_details=False, with_suboperations=False):
"""
List available logs
Keyword argument:
limit -- Maximum number of logs
with_details -- Include details (e.g. if the operation was a success).
Likely to increase the command time as it needs to open and parse the
metadata file for each log...
with_suboperations -- Include operations that are not the "main"
operation but are sub-operations triggered by another ongoing operation
... (e.g. initializing groups/permissions when installing an app)
"""
operations = {}
logs = [x for x in os.listdir(OPERATIONS_PATH) if x.endswith(METADATA_FILE_EXT)]
logs = list(reversed(sorted(logs)))
if limit is not None:
if with_suboperations:
logs = logs[:limit]
else:
# If we displaying only parent, we are still gonna load up to limit * 5 logs
# because many of them are suboperations which are not gonna be kept
# Yet we still want to obtain ~limit number of logs
logs = logs[: limit * 5]
for log in logs:
base_filename = log[: -len(METADATA_FILE_EXT)]
md_path = os.path.join(OPERATIONS_PATH, log)
entry = {
"name": base_filename,
"path": md_path,
"description": _get_description_from_name(base_filename),
}
try:
entry["started_at"] = _get_datetime_from_name(base_filename)
except ValueError:
pass
try:
metadata = (
read_yaml(md_path) or {}
) # Making sure this is a dict and not None..?
except Exception as e:
# If we can't read the yaml for some reason, report an error and ignore this entry...
logger.error(m18n.n("log_corrupted_md_file", md_file=md_path, error=e))
continue
if with_details:
entry["success"] = metadata.get("success", "?")
entry["parent"] = metadata.get("parent")
if with_suboperations:
entry["parent"] = metadata.get("parent")
entry["suboperations"] = []
elif metadata.get("parent") is not None:
continue
operations[base_filename] = entry
# When displaying suboperations, we build a tree-like structure where
# "suboperations" is a list of suboperations (each of them may also have a list of
# "suboperations" suboperations etc...
if with_suboperations:
suboperations = [o for o in operations.values() if o["parent"] is not None]
for suboperation in suboperations:
parent = operations.get(suboperation["parent"])
if not parent:
continue
parent["suboperations"].append(suboperation)
operations = [o for o in operations.values() if o["parent"] is None]
else:
operations = [o for o in operations.values()]
if limit:
operations = operations[:limit]
operations = list(reversed(sorted(operations, key=lambda o: o["name"])))
# Reverse the order of log when in cli, more comfortable to read (avoid
# unecessary scrolling)
is_api = Moulinette.interface.type == "api"
if not is_api:
operations = list(reversed(operations))
return {"operation": operations}
def log_show(
path, number=None, share=False, filter_irrelevant=False, with_suboperations=False
):
"""
Display a log file enriched with metadata if any.
If the file_name is not an absolute path, it will try to search the file in
the unit operations log path (see OPERATIONS_PATH).
Argument:
file_name
number
share
"""
if share:
filter_irrelevant = True
if filter_irrelevant:
def _filter(lines):
filters = [re.compile(f) for f in BORING_LOG_LINES]
return [
line
for line in lines
if not any(f.search(line.strip()) for f in filters)
]
else:
def _filter(lines):
return lines
# Normalize log/metadata paths and filenames
abs_path = path
log_path = None
if not path.startswith("/"):
abs_path = os.path.join(OPERATIONS_PATH, path)
if os.path.exists(abs_path) and not path.endswith(METADATA_FILE_EXT):
log_path = abs_path
if abs_path.endswith(METADATA_FILE_EXT) or abs_path.endswith(LOG_FILE_EXT):
base_path = "".join(os.path.splitext(abs_path)[:-1])
else:
base_path = abs_path
base_filename = os.path.basename(base_path)
md_path = base_path + METADATA_FILE_EXT
if log_path is None:
log_path = base_path + LOG_FILE_EXT
if not os.path.exists(md_path) and not os.path.exists(log_path):
raise YunohostValidationError("log_does_exists", log=path)
infos = {}
# If it's a unit operation, display the name and the description
if base_path.startswith(CATEGORIES_PATH):
infos["description"] = _get_description_from_name(base_filename)
infos["name"] = base_filename
if share:
from yunohost.utils.yunopaste import yunopaste
content = ""
if os.path.exists(md_path):
content += read_file(md_path)
content += "\n============\n\n"
if os.path.exists(log_path):
actual_log = read_file(log_path)
content += "\n".join(_filter(actual_log.split("\n")))
url = yunopaste(content)
logger.info(m18n.n("log_available_on_yunopaste", url=url))
if Moulinette.interface.type == "api":
return {"url": url}
else:
return
# Display metadata if exist
if os.path.exists(md_path):
try:
metadata = read_yaml(md_path) or {}
except MoulinetteError as e:
error = m18n.n("log_corrupted_md_file", md_file=md_path, error=e)
if os.path.exists(log_path):
logger.warning(error)
else:
raise YunohostError(error)
else:
infos["metadata_path"] = md_path
infos["metadata"] = metadata
if "log_path" in metadata:
log_path = metadata["log_path"]
if with_suboperations:
def suboperations():
try:
log_start = _get_datetime_from_name(base_filename)
except ValueError:
return
for filename in os.listdir(OPERATIONS_PATH):
if not filename.endswith(METADATA_FILE_EXT):
continue
# We first retrict search to a ~48h time window to limit the number
# of .yml we look into
try:
date = _get_datetime_from_name(base_filename)
except ValueError:
continue
if (date < log_start) or (
date > log_start + timedelta(hours=48)
):
continue
try:
submetadata = read_yaml(
os.path.join(OPERATIONS_PATH, filename)
)
except Exception:
continue
if submetadata and submetadata.get("parent") == base_filename:
yield {
"name": filename[: -len(METADATA_FILE_EXT)],
"description": _get_description_from_name(
filename[: -len(METADATA_FILE_EXT)]
),
"success": submetadata.get("success", "?"),
}
metadata["suboperations"] = list(suboperations())
# Display logs if exist
if os.path.exists(log_path):
from yunohost.service import _tail
if number and filter_irrelevant:
logs = _tail(log_path, int(number * 4))
elif number:
logs = _tail(log_path, int(number))
else:
logs = read_file(log_path)
logs = list(_filter(logs))
if number:
logs = logs[-number:]
infos["log_path"] = log_path
infos["logs"] = logs
return infos
def log_share(path):
return log_show(path, share=True)
def is_unit_operation(
entities=["app", "domain", "group", "service", "user"],
exclude=["password"],
operation_key=None,
):
"""
Configure quickly a unit operation
This decorator help you to configure the record of a unit operations.
Argument:
entities A list of entity types related to the unit operation. The entity
type is searched inside argument's names of the decorated function. If
something match, the argument value is added as related entity. If the
argument name is different you can specify it with a tuple
(argname, entity_type) instead of just put the entity type.
exclude Remove some arguments from the context. By default, arguments
called 'password' are removed. If an argument is an object, you need to
exclude it or create manually the unit operation without this decorator.
operation_key A key to describe the unit operation log used to create the
filename and search a translation. Please ensure that this key prefixed by
'log_' is present in locales/en.json otherwise it won't be translatable.
"""
def decorate(func):
def func_wrapper(*args, **kwargs):
op_key = operation_key
if op_key is None:
op_key = func.__name__
# If the function is called directly from an other part of the code
# and not by the moulinette framework, we need to complete kwargs
# dictionnary with the args list.
# Indeed, we use convention naming in this decorator and we need to
# know name of each args (so we need to use kwargs instead of args)
if len(args) > 0:
from inspect import signature
keys = list(signature(func).parameters.keys())
if "operation_logger" in keys:
keys.remove("operation_logger")
for k, arg in enumerate(args):
kwargs[keys[k]] = arg
args = ()
# Search related entity in arguments of the decorated function
related_to = []
for entity in entities:
if isinstance(entity, tuple):
entity_type = entity[1]
entity = entity[0]
else:
entity_type = entity
if entity in kwargs and kwargs[entity] is not None:
if isinstance(kwargs[entity], str):
related_to.append((entity_type, kwargs[entity]))
else:
for x in kwargs[entity]:
related_to.append((entity_type, x))
context = kwargs.copy()
# Exclude unappropriate data from the context
for field in exclude:
if field in context:
context.pop(field, None)
# Context is made from args given to main function by argparse
# This context will be added in extra parameters in yml file, so this context should
# be serializable and short enough (it will be displayed in webadmin)
# Argparse can provide some File or Stream, so here we display the filename or
# the IOBase, if we have no name.
for field, value in context.items():
if isinstance(value, IOBase):
try:
context[field] = value.name
except Exception:
context[field] = "IOBase"
operation_logger = OperationLogger(op_key, related_to, args=context)
try:
# Start the actual function, and give the unit operation
# in argument to let the developper start the record itself
args = (operation_logger,) + args
result = func(*args, **kwargs)
except Exception as e:
operation_logger.error(e)
raise
else:
operation_logger.success()
return result
return func_wrapper
return decorate
class RedactingFormatter(Formatter):
def __init__(self, format_string, data_to_redact):
super(RedactingFormatter, self).__init__(format_string)
self.data_to_redact = data_to_redact
def format(self, record):
msg = super(RedactingFormatter, self).format(record)
self.identify_data_to_redact(msg)
for data in self.data_to_redact:
# we check that data is not empty string,
# otherwise this may lead to super epic stuff
# (try to run "foo".replace("", "bar"))
if data:
msg = msg.replace(data, "**********")
return msg
def identify_data_to_redact(self, record):
# Wrapping this in a try/except because we don't want this to
# break everything in case it fails miserably for some reason :s
try:
# This matches stuff like db_pwd=the_secret or admin_password=other_secret
# (the secret part being at least 3 chars to avoid catching some lines like just "db_pwd=")
# Some names like "key" or "manifest_key" are ignored, used in helpers like ynh_app_setting_set or ynh_read_manifest
match = re.search(
r"(pwd|pass|passwd|password|passphrase|secret\w*|\w+key|token|PASSPHRASE)=(\S{3,})$",
record.strip(),
)
if (
match
and match.group(2) not in self.data_to_redact
and match.group(1) not in ["key", "manifest_key"]
):
self.data_to_redact.append(match.group(2))
except Exception as e:
logger.warning(
"Failed to parse line to try to identify data to redact ... : %s" % e
)
class OperationLogger(object):
"""
Instances of this class represents unit operation done on the ynh instance.
Each time an action of the yunohost cli/api change the system, one or
several unit operations should be registered.
This class record logs and metadata like context or start time/end time.
"""
_instances: List[object] = []
def __init__(self, operation, related_to=None, **kwargs):
# TODO add a way to not save password on app installation
self.operation = operation
self.related_to = related_to
self.extra = kwargs
self.started_at = None
self.ended_at = None
self.logger = None
self._name = None
self.data_to_redact = []
self.parent = self.parent_logger()
self._instances.append(self)
for filename in ["/etc/yunohost/mysql", "/etc/yunohost/psql"]:
if os.path.exists(filename):
self.data_to_redact.append(read_file(filename).strip())
self.path = OPERATIONS_PATH
if not os.path.exists(self.path):
os.makedirs(self.path)
def parent_logger(self):
# If there are other operation logger instances
for instance in reversed(self._instances):
# Is one of these operation logger started but not yet done ?
if instance.started_at is not None and instance.ended_at is None:
# We are a child of the first one we found
return instance.name
# If no lock exists, we are probably in tests or yunohost is used as a
# lib ... let's not really care about that case and assume we're the
# root logger then.
if not os.path.exists("/var/run/moulinette_yunohost.lock"):
return None
locks = read_file("/var/run/moulinette_yunohost.lock").strip().split("\n")
# If we're the process with the lock, we're the root logger
if locks == [] or str(os.getpid()) in locks:
return None
# If we get here, we are in a yunohost command called by a yunohost
# (maybe indirectly from an app script for example...)
#
# The strategy is :
# 1. list 20 most recent log files
# 2. iterate over the PID of parent processes
# 3. see if parent process has some log file open (being actively
# written in)
# 4. if among those file, there's an operation log file, we use the id
# of the most recent file
recent_operation_logs = sorted(
glob.iglob(OPERATIONS_PATH + "*.log"), key=os.path.getctime, reverse=True
)[:20]
proc = psutil.Process().parent()
while proc is not None:
# We use proc.open_files() to list files opened / actively used by this proc
# We only keep files matching a recent yunohost operation log
active_logs = sorted(
[f.path for f in proc.open_files() if f.path in recent_operation_logs],
key=os.path.getctime,
reverse=True,
)
if active_logs != []:
# extra the log if from the full path
return os.path.basename(active_logs[0])[:-4]
else:
proc = proc.parent()
continue
# If nothing found, assume we're the root operation logger
return None
def start(self):
"""
Start to record logs that change the system
Until this start method is run, no unit operation will be registered.
"""
if self.started_at is None:
self.started_at = datetime.utcnow()
self.flush()
self._register_log()
@property
def md_path(self):
"""
Metadata path file
"""
return os.path.join(self.path, self.name + METADATA_FILE_EXT)
@property
def log_path(self):
"""
Log path file
"""
return os.path.join(self.path, self.name + LOG_FILE_EXT)
def _register_log(self):
"""
Register log with a handler connected on log system
"""
self.file_handler = FileHandler(self.log_path)
# We use a custom formatter that's able to redact all stuff in self.data_to_redact
# N.B. : the subtle thing here is that the class will remember a pointer to the list,
# so we can directly append stuff to self.data_to_redact and that'll be automatically
# propagated to the RedactingFormatter
self.file_handler.formatter = RedactingFormatter(
"%(asctime)s: %(levelname)s - %(message)s", self.data_to_redact
)
# Listen to the root logger
self.logger = getLogger("yunohost")
self.logger.addHandler(self.file_handler)
def flush(self):
"""
Write or rewrite the metadata file with all metadata known
"""
dump = yaml.safe_dump(self.metadata, default_flow_style=False)
for data in self.data_to_redact:
# N.B. : we need quotes here, otherwise yaml isn't happy about loading the yml later
dump = dump.replace(data, "'**********'")
with open(self.md_path, "w") as outfile:
outfile.write(dump)
@property
def name(self):
"""
Name of the operation
This name is used as filename, so don't use space
"""
if self._name is not None:
return self._name
name = [self.started_at.strftime("%Y%m%d-%H%M%S")]
name += [self.operation]
if hasattr(self, "name_parameter_override"):
# This is for special cases where the operation is not really
# unitary. For instance, the regen conf cannot be logged "per
# service" because of the way it's built
name.append(self.name_parameter_override)
elif self.related_to:
# We use the name of the first related thing
name.append(self.related_to[0][1])
self._name = "-".join(name)
return self._name
@property
def metadata(self):
"""
Dictionnary of all metadata collected
"""
data = {
"started_at": self.started_at,
"operation": self.operation,
"parent": self.parent,
"yunohost_version": get_ynh_package_version("yunohost")["version"],
"interface": Moulinette.interface.type,
}
if self.related_to is not None:
data["related_to"] = self.related_to
if self.ended_at is not None:
data["ended_at"] = self.ended_at
data["success"] = self._success
if self.error is not None:
data["error"] = self._error
# TODO: detect if 'extra' erase some key of 'data'
data.update(self.extra)
return data
def success(self):
"""
Declare the success end of the unit operation
"""
self.close()
def error(self, error):
"""
Declare the failure of the unit operation
"""
return self.close(error)
def close(self, error=None):
"""
Close properly the unit operation
"""
# When the error happen's in the is_unit_operation try/except,
# we want to inject the log ref in the exception, such that it may be
# transmitted to the webadmin which can then redirect to the appropriate
# log page
if (
self.started_at
and isinstance(error, Exception)
and not isinstance(error, YunohostValidationError)
):
error.log_ref = self.name
if self.ended_at is not None or self.started_at is None:
return
if error is not None and not isinstance(error, str):
error = str(error)
self.ended_at = datetime.utcnow()
self._error = error
self._success = error is None
if self.logger is not None:
self.logger.removeHandler(self.file_handler)
self.file_handler.close()
is_api = Moulinette.interface.type == "api"
desc = _get_description_from_name(self.name)
if error is None:
if is_api:
msg = m18n.n("log_link_to_log", name=self.name, desc=desc)
else:
msg = m18n.n("log_help_to_get_log", name=self.name, desc=desc)
logger.debug(msg)
else:
if is_api:
msg = (
"<strong>"
+ m18n.n("log_link_to_failed_log", name=self.name, desc=desc)
+ "</strong>"
)
else:
msg = m18n.n("log_help_to_get_failed_log", name=self.name, desc=desc)
logger.info(msg)
self.flush()
return msg
def __del__(self):
"""
Try to close the unit operation, if it's missing.
The missing of the message below could help to see an electrical
shortage.
"""
if self.ended_at is not None or self.started_at is None:
return
else:
self.error(m18n.n("log_operation_unit_unclosed_properly"))
def dump_script_log_extract_for_debugging(self):
with open(self.log_path, "r") as f:
lines = f.readlines()
# A line typically looks like
# 2019-10-19 16:10:27,611: DEBUG - + mysql -u piwigo --password=********** -B piwigo
# And we just want the part starting by "DEBUG - "
lines = [line for line in lines if ":" in line.strip()]
lines = [line.strip().split(": ", 1)[1] for line in lines]
# And we ignore boring/irrelevant lines
# Annnnnnd we also ignore lines matching [number] + such as
# 72971 DEBUG 29739 + ynh_exit_properly
# which are lines from backup-before-upgrade or restore-after-failed-upgrade ...
filters = [re.compile(f_) for f_ in BORING_LOG_LINES]
filters.append(re.compile(r"\d+ \+ "))
lines = [
line
for line in lines
if not any(filter_.search(line) for filter_ in filters)
]
lines_to_display = []
# Get the 20 lines before the last 'ynh_exit_properly'
rev_lines = list(reversed(lines))
for i, line in enumerate(rev_lines):
if line.endswith("+ ynh_exit_properly"):
lines_to_display = reversed(rev_lines[i : i + 20])
break
# If didnt find anything, just get the last 20 lines
if not lines_to_display:
lines_to_display = lines[-20:]
logger.warning(
"Here's an extract of the logs before the crash. It might help debugging the error:"
)
for line in lines_to_display:
logger.info(line)
def _get_datetime_from_name(name):
# Filenames are expected to follow the format:
# 20200831-170740-short_description-and-stuff
raw_datetime = " ".join(name.split("-")[:2])
return datetime.strptime(raw_datetime, "%Y%m%d %H%M%S")
def _get_description_from_name(name):
"""
Return the translated description from the filename
"""
parts = name.split("-", 3)
try:
try:
datetime.strptime(" ".join(parts[:2]), "%Y%m%d %H%M%S")
except ValueError:
key = "log_" + parts[0]
args = parts[1:]
else:
key = "log_" + parts[2]
args = parts[3:]
return m18n.n(key, *args)
except IndexError:
return name