Merge pull request #886 from YunoHost/cleaning

Cleaning / reorganizing the repo ?
This commit is contained in:
Alexandre Aubin 2020-04-12 15:30:47 +02:00 committed by GitHub
commit 6f1df16d76
2 changed files with 209 additions and 369 deletions

View file

@ -1,32 +1,209 @@
#!/usr/bin/env python2 #!/usr/bin/python3
import re
import os import os
import sys import re
import time
import json import json
import zlib
import argparse
import subprocess import subprocess
import yaml import yaml
import time
import requests now = time.time()
from dateutil.parser import parse
catalog = json.load(open("apps.json"))
my_env = os.environ.copy()
my_env["GIT_TERMINAL_PROMPT"] = "0"
os.makedirs(".apps_cache", exist_ok=True)
os.makedirs("builds/", exist_ok=True)
###################################
# App git clones cache management #
###################################
def app_cache_folder(app):
return os.path.join(".apps_cache", app)
# Regular expression patterns def git(cmd, in_folder=None):
re_commit_author = re.compile( if in_folder:
r'^author (?P<name>.+) <(?P<email>.+)> (?P<time>\d+) (?P<tz>[+-]\d+)$', cmd = "-C " + in_folder + " " + cmd
re.MULTILINE cmd = "git " + cmd
) return subprocess.check_output(cmd.split(), env=my_env).strip().decode("utf-8")
# Helpers def refresh_all_caches():
for app, infos in catalog.items():
app = app.lower()
print(app)
if not os.path.exists(app_cache_folder(app)):
try:
init_cache(app, infos)
except Exception as e:
error("Could not init cache for %s: %s" % (app, e))
else:
try:
refresh_cache(app, infos)
except Exception as e:
error("Could not refresh cache for %s: %s" % (app, e))
def init_cache(app, infos):
if infos["state"] == "notworking":
depth = 5
if infos["state"] == "inprogress":
depth = 20
else:
depth = 40
git("clone --depth {depth} --single-branch --branch master {url} {folder}".format(depth=depth, url=infos["url"], folder=app_cache_folder(app)))
def refresh_cache(app, infos):
# Don't refresh if already refreshed during last hour
fetch_head = app_cache_folder(app) + "/.git/FETCH_HEAD"
if os.path.exists(fetch_head) and now - os.path.getmtime(fetch_head) < 3600:
return
git("remote set-url origin " + infos["url"], in_folder=app_cache_folder(app))
git("fetch origin master --force", in_folder=app_cache_folder(app))
git("reset origin/master --hard", in_folder=app_cache_folder(app))
################################
# Actual list build management #
################################
def build_catalog():
result_dict = {}
for app, infos in catalog.items():
print("Processing '%s'..." % app)
app = app.lower()
try:
app_dict = build_app_dict(app, infos)
except Exception as e:
error("Processing %s failed: %s" % (app, str(e)))
continue
result_dict[app_dict["id"]] = app_dict
#####################
# Current version 2 #
#####################
categories = yaml.load(open("categories.yml").read())
with open("builds/v2.json", 'w') as f:
f.write(json.dumps({"apps": result_dict, "categories": categories}, sort_keys=True))
####################
# Legacy version 1 #
####################
with open("builds/v1.json", 'w') as f:
f.write(json.dumps(result_dict, sort_keys=True))
####################
# Legacy version 0 #
####################
official_apps = set(["agendav", "ampache", "baikal", "dokuwiki", "etherpad_mypads", "hextris", "jirafeau", "kanboard", "my_webapp", "nextcloud", "opensondage", "phpmyadmin", "piwigo", "rainloop", "roundcube", "searx", "shellinabox", "strut", "synapse", "transmission", "ttrss", "wallabag2", "wordpress", "zerobin"])
official_apps_dict = {k: v for k, v in result_dict.items() if k in official_apps}
community_apps_dict = {k: v for k, v in result_dict.items() if k not in official_apps}
# We need the official apps to have "validated" as state to be recognized as official
for app, infos in official_apps_dict.items():
infos["state"] = "validated"
with open("builds/v0-official.json", 'w') as f:
f.write(json.dumps(official_apps_dict, sort_keys=True))
with open("builds/v0-community.json", 'w') as f:
f.write(json.dumps(community_apps_dict, sort_keys=True))
def build_app_dict(app, infos):
assert infos["branch"] == "master"
# Make sure we have some cache
this_app_cache = app_cache_folder(app)
assert os.path.exists(this_app_cache), "No cache yet for %s" % app
# If using head, find the most recent meaningful commit in logs
if infos["revision"] == "HEAD":
relevant_files = ["manifest.json", "actions.json", "hooks/", "scripts/", "conf/", "sources/"]
most_recent_relevant_commit = "rev-list --full-history --all -n 1 -- " + " ".join(relevant_files)
infos["revision"] = git(most_recent_relevant_commit, in_folder=this_app_cache)
assert re.match(r"^[0-9a-f]+$", infos["revision"]), "Output was not a commit? '%s'" % infos["revision"]
# Otherwise, validate commit exists
else:
assert infos["revision"] in git("rev-list --all", in_folder=this_app_cache).split("\n"), "Revision ain't in history ? %s" % infos["revision"]
# Find timestamp corresponding to that commit
timestamp = git("show -s --format=%ct " + infos["revision"], in_folder=this_app_cache)
assert re.match(r"^[0-9]+$", timestamp), "Failed to get timestamp for revision ? '%s'" % timestamp
timestamp = int(timestamp)
# Build the dict with all the infos
manifest = json.load(open(this_app_cache + "/manifest.json"))
return {'id':manifest["id"],
'git': {
'branch': infos['branch'],
'revision': infos["revision"],
'url': infos["url"]
},
'lastUpdate': timestamp,
'manifest': include_translations_in_manifest(manifest),
'state': infos['state'],
'level': infos.get('level', '?'),
'maintained': infos.get("maintained", True),
'high_quality': infos.get("high_quality", False),
'featured': infos.get("featured", False),
'category': infos.get('category', None),
'subtags': infos.get('subtags', []),
}
def include_translations_in_manifest(manifest):
app_name = manifest["id"]
for locale in os.listdir("locales"):
if not locale.endswith("json"):
continue
if locale == "en.json":
continue
current_lang = locale.split(".")[0]
translations = json.load(open(os.path.join("locales", locale), "r"))
key = "%s_manifest_description" % app_name
if translations.get(key, None):
manifest["description"][current_lang] = translations[key]
for category, questions in manifest["arguments"].items():
for question in questions:
key = "%s_manifest_arguments_%s_%s" % (app_name, category, question["name"])
# don't overwrite already existing translation in manifests for now
if translations.get(key) and not current_lang not in question["ask"]:
#print("[ask]", current_lang, key)
question["ask"][current_lang] = translations[key]
key = "%s_manifest_arguments_%s_help_%s" % (app_name, category, question["name"])
# don't overwrite already existing translation in manifests for now
if translations.get(key) and not current_lang not in question.get("help", []):
#print("[help]", current_lang, key)
question["help"][current_lang] = translations[key]
return manifest
def fail(msg, retcode=1):
"""Show failure message and exit."""
print("Error: {0:s}".format(msg))
sys.exit(retcode)
def error(msg): def error(msg):
msg = "[Applist builder error] " + msg msg = "[Applist builder error] " + msg
@ -35,355 +212,8 @@ def error(msg):
print(msg) print(msg)
def include_translations_in_manifest(app_name, manifest): ######################
for i in os.listdir("locales"):
if not i.endswith("json"):
continue
if i == "en.json": if __name__ == "__main__":
continue refresh_all_caches()
build_catalog()
current_lang = i.split(".")[0]
translations = json.load(open(os.path.join("locales", i), "r"))
key = "%s_manifest_description" % app_name
if key in translations and translations[key]:
manifest["description"][current_lang] = translations[key]
for category, questions in manifest["arguments"].items():
for question in questions:
key = "%s_manifest_arguments_%s_%s" % (app_name, category, question["name"])
# don't overwrite already existing translation in manifests for now
if key in translations and translations[key] and not current_lang not in question["ask"]:
print("[ask]", current_lang, key)
question["ask"][current_lang] = translations[key]
key = "%s_manifest_arguments_%s_help_%s" % (app_name, category, question["name"])
# don't overwrite already existing translation in manifests for now
if key in translations and translations[key] and not current_lang not in question.get("help", []):
print("[help]", current_lang, key)
question["help"][current_lang] = translations[key]
return manifest
def get_json(url, verify=True):
try:
# Retrieve and load manifest
if ".github" in url:
r = requests.get(url, verify=verify, auth=token)
else:
r = requests.get(url, verify=verify)
r.raise_for_status()
return r.json()
except requests.exceptions.RequestException as e:
print("-> Error: unable to request %s, %s" % (url, e))
return None
except ValueError as e:
print("-> Error: unable to decode json from %s : %s" % (url, e))
return None
def get_zlib(url, verify=True):
try:
# Retrieve last commit information
r = requests.get(obj_url, verify=verify)
r.raise_for_status()
return zlib.decompress(r.content).decode('utf-8').split('\x00')
except requests.exceptions.RequestException as e:
print("-> Error: unable to request %s, %s" % (obj_url, e))
return None
except zlib.error as e:
print("-> Error: unable to decompress object from %s : %s" % (url, e))
return None
# Main
# Create argument parser
parser = argparse.ArgumentParser(description='Process YunoHost application list.')
# Add arguments and options
parser.add_argument("input", help="Path to json input file")
parser.add_argument("-o", "--output", help="Path to result file. If not specified, '-build' suffix will be added to input filename.")
parser.add_argument("-g", "--github", help="Github token <username>:<password>")
# Parse args
args = parser.parse_args()
try:
# Retrieve apps list from json file
with open(args.input) as f:
apps_list = json.load(f)
except IOError as e:
fail("%s file not found" % args.input)
# Get list name from filename
list_name = os.path.splitext(os.path.basename(args.input))[0]
print(":: Building %s list..." % list_name)
# Args default
if not args.output:
args.output = '%s-build.json' % list_name
already_built_file = {}
if os.path.exists(args.output):
try:
already_built_file = json.load(open(args.output))
except Exception as e:
print("Error while trying to load already built file: %s" % e)
# GitHub credentials
if args.github:
token = (args.github.split(':')[0], args.github.split(':')[1])
else:
token = None
# Loop through every apps
result_dict = {}
for app, info in apps_list.items():
print("---")
print("Processing '%s'..." % app)
app = app.lower()
# Store usefull values
app_branch = info['branch']
app_url = info['url']
app_rev = info['revision']
app_state = info["state"]
app_level = info.get("level")
app_maintained = info.get("maintained", True)
app_featured = info.get("featured", False)
app_high_quality = info.get("high_quality", False)
forge_site = app_url.split('/')[2]
owner = app_url.split('/')[3]
repo = app_url.split('/')[4]
if forge_site == "github.com":
forge_type = "github"
elif forge_site == "framagit.org":
forge_type = "gitlab"
elif forge_site == "code.ffdn.org":
forge_type = "gitlab"
elif forge_site == "code.antopie.org":
forge_type = "gitea"
else:
forge_type = "unknown"
previous_state = already_built_file.get(app, {}).get("state", {})
manifest = {}
timestamp = None
previous_rev = already_built_file.get(app, {}).get("git", {}).get("revision", None)
previous_url = already_built_file.get(app, {}).get("git", {}).get("url")
previous_level = already_built_file.get(app, {}).get("level")
previous_maintained = already_built_file.get(app, {}).get("maintained")
previous_featured = already_built_file.get(app, {}).get("featured")
previous_high_quality = already_built_file.get(app, {}).get("high_quality")
if app_rev == "HEAD":
app_rev = subprocess.check_output(["git", "ls-remote", app_url, "refs/heads/"+app_branch]).split()[0].decode('utf-8')
if not re.match(r"^[0-9a-f]+$", app_rev):
error("Revision for %s did not match expected regex" % app)
continue
if previous_rev is None:
previous_rev = app_rev
# If this is a github repo, we are able to optimize things a bit by looking at the diff
# and not actually updating the app if only README or other not-so-important files were edited
if previous_rev != app_rev and forge_type == "github":
url = "https://api.github.com/repos/{}/{}/compare/{}...{}".format(owner, repo, previous_rev, app_rev)
diff = get_json(url)
if diff and diff["commits"]:
# Only if those files got updated, do we want to update the
# commit (otherwise that would trigger an unecessary upgrade)
ignore_files = [ "README.md", "LICENSE", ".gitignore", "check_process", ".travis.yml" ]
diff_files = [ f for f in diff["files"] if f["filename"] not in ignore_files ]
if diff_files:
print("This app points to HEAD and significant changes where found between HEAD and previous commit")
app_rev = diff["commits"][-1]["sha"]
else:
print("This app points to HEAD but no significant changes where found compared to HEAD, so keeping the previous commit")
app_rev = previous_rev
print("Previous commit : %s" % previous_rev)
print("Current commit : %s" % app_rev)
if previous_rev == app_rev and previous_url == app_url and app in already_built_file:
print("Already up to date, ignoring")
result_dict[app] = already_built_file[app]
if previous_state != app_state:
result_dict[app]["state"] = app_state
print("... but has changed of state, updating it from '%s' to '%s'" % (previous_state, app_state))
if previous_level != app_level or app_level is None:
result_dict[app]["level"] = app_level
print("... but has changed of level, updating it from '%s' to '%s'" % (previous_level, app_level))
if previous_maintained != app_maintained:
result_dict[app]["maintained"] = app_maintained
print("... but maintained status changed, updating it from '%s' to '%s'" % (previous_maintained, app_maintained))
if previous_featured != app_featured:
result_dict[app]["featured"] = app_featured
print("... but featured status changed, updating it from '%s' to '%s'" % (previous_featured, app_featured))
if previous_high_quality != app_high_quality:
result_dict[app]["high_quality"] = app_high_quality
print("... but high_quality status changed, updating it from '%s' to '%s'" % (previous_high_quality, app_high_quality))
print("update translations but don't download anything")
result_dict[app]['manifest'] = include_translations_in_manifest(app, result_dict[app]['manifest'])
continue
print("Revision changed ! Updating...")
raw_url = 'https://%(forge_site)s/%(owner)s/%(repo)s/raw/%(app_rev)s/manifest.json' % {
"forge_site": forge_site, "owner": owner, "repo": repo, "app_rev": app_rev
}
manifest = get_json(raw_url, verify=True)
if manifest is None:
error("Manifest is empty for app %s ?" % app)
continue
# Hosted on GitHub
if forge_type == "github":
api_url = 'https://api.github.com/repos/%(owner)s/%(repo)s/commits/%(app_rev)s' % {
"owner": owner, "repo": repo, "app_rev": app_rev
}
info2 = get_json(api_url)
if info2 is None:
error("Commit info is empty for app %s ?" % app)
continue
commit_date = parse(info2['commit']['author']['date'])
timestamp = int(time.mktime(commit_date.timetuple()))
# Gitlab-type forge
elif forge_type == "gitlab":
api_url = 'https://%(forge_site)s/api/v4/projects/%(owner)s%%2F%(repo)s/repository/commits/%(app_rev)s' % {
"forge_site": forge_site, "owner": owner, "repo": repo, "app_rev": app_rev
}
commit = get_json(api_url)
if commit is None:
error("Commit info is empty for app %s ?" % app)
continue
commit_date = parse(commit["authored_date"])
timestamp = int(time.mktime(commit_date.timetuple()))
elif forge_type == "gitea":
api_url = 'https://%(forge_site)s/api/v1/repos/%(owner)s/%(repo)s/git/commits/%(app_rev)s' % {
"forge_site": forge_site, "owner": owner, "repo": repo, "app_rev": app_rev
}
info2 = get_json(api_url)
if info2 is None:
error("Commit info is empty for app %s ?" % app)
continue
commit_date = parse(info2['commit']['author']['date'])
timestamp = int(time.mktime(commit_date.timetuple()))
# Gogs-type forge
elif forge_type == "gogs":
if not app_url.endswith('.git'):
app_url += ".git"
obj_url = '%s/objects/%s/%s' % (
app_url, app_rev[0:2], app_rev[2:]
)
commit = get_zlib(obj_url, verify=False)
if commit is None or len(commit) < 2:
error("Commit info is empty for app %s ?" % app)
continue
else:
commit = commit[1]
# Extract author line and commit date
commit_author = re_commit_author.search(commit)
if not commit_author:
error("Author line in commit not found for app %s" % app)
continue
# Construct UTC timestamp
timestamp = int(commit_author.group('time'))
tz = commit_author.group('tz')
if len(tz) != 5:
error("Unexpected timezone length in commit for app %s" % app)
continue
elif tz != '+0000':
tdelta = (int(tz[1:3]) * 3600) + (int(tz[3:5]) * 60)
if tz[0] == '+':
timestamp -= tdelta
elif tz[0] == '-':
timestamp += tdelta
else:
error("Unexpected timezone format in commit for app %s" % app)
continue
else:
error("Unsupported VCS and/or protocol for app %s" % app)
continue
if manifest["id"] != app or manifest["id"] != repo.replace("_ynh", ""):
print("Warning: IDs different between list.json, manifest and repo name")
print(" Manifest id : %s" % manifest["id"])
print(" Name in json list : %s" % app)
print(" Repo name : %s" % repo.replace("_ynh", ""))
try:
result_dict[manifest['id']] = {
'git': {
'branch': info['branch'],
'revision': app_rev,
'url': app_url
},
'lastUpdate': timestamp,
'manifest': include_translations_in_manifest(manifest['id'], manifest),
'state': info['state'],
'level': info.get('level', '?'),
'maintained': app_maintained,
'high_quality': app_high_quality,
'featured': app_featured,
'category': info.get('category', None),
'subtags': info.get('subtags', []),
}
except KeyError as e:
error("Invalid app info or manifest for app %s, %s" % (app, e))
continue
## output version 2, including the categories
categories = yaml.load(open("categories.yml").read())
with open(args.output.replace(".json", "-v2.json"), 'w') as f:
f.write(json.dumps({"apps": result_dict, "categories": categories}, sort_keys=True))
## output version 1
with open(args.output, 'w') as f:
f.write(json.dumps(result_dict, sort_keys=True))
print("\nDone! Written in %s" % args.output)
## output version 0
print("\nAlso splitting the file into official and community-build.json for backward compatibility")
official_apps = set(["agendav", "ampache", "baikal", "dokuwiki", "etherpad_mypads", "hextris", "jirafeau", "kanboard", "my_webapp", "nextcloud", "opensondage", "phpmyadmin", "piwigo", "rainloop", "roundcube", "searx", "shellinabox", "strut", "synapse", "transmission", "ttrss", "wallabag2", "wordpress", "zerobin"])
official_apps_dict = {k: v for k, v in result_dict.items() if k in official_apps}
community_apps_dict = {k: v for k, v in result_dict.items() if k not in official_apps}
# We need the official apps to have "validated" as state to be recognized as official
for app, infos in official_apps_dict.items():
infos["state"] = "validated"
with open("official-build.json", 'w') as f:
f.write(json.dumps(official_apps_dict, sort_keys=True))
with open("community-build.json", 'w') as f:
f.write(json.dumps(community_apps_dict, sort_keys=True))
print("\nDone!")

10
rebuild.sh Normal file
View file

@ -0,0 +1,10 @@
#!/bin/bash
workdir=$(dirname "$0")
log=$workdir/app_list_auto_update.log
cd $workdir
date >> $log
git pull >/dev/null
./list_builder.py &>> $log || sendxmpppy "[listbuilder] Rebuilding the application list failed miserably"