From 90cfa0fe594a08fddf471910764ae5f101dff3e0 Mon Sep 17 00:00:00 2001 From: Alexandre Aubin Date: Sun, 5 Apr 2020 02:54:24 +0200 Subject: [PATCH] Rewrite list_builder to use a local git clone cache and simpler logic --- .gitignore | 7 +- list_builder.py | 542 +++++++++++++++++------------------------------- rebuild.sh | 2 +- 3 files changed, 189 insertions(+), 362 deletions(-) diff --git a/.gitignore b/.gitignore index bad46e6a..12dfc783 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,5 @@ -# temp files *.swp *~ -# builded -*-build.json - -.github_credentials +.apps_cache +builds diff --git a/list_builder.py b/list_builder.py index 07ec7bf3..9e2a0718 100755 --- a/list_builder.py +++ b/list_builder.py @@ -1,34 +1,198 @@ -#!/usr/bin/env python2 -import re +#!/usr/bin/python3 + import os -import sys -import time +import re import json -import zlib import subprocess import yaml +import time -import requests -from dateutil.parser import parse +now = time.time() + +catalog = json.load(open("apps.json")) + +my_env = os.environ.copy() +my_env["GIT_TERMINAL_PROMPT"] = "0" + +os.makedirs(".apps_cache", exist_ok=True) +os.makedirs("builds/", exist_ok=True) + +def app_cache_folder(app): + return os.path.join(".apps_cache", app) -# Regular expression patterns +def refresh_all_caches(): -list_name = "apps.json" + for app, infos in catalog.items(): + app = app.lower() + print(app) + if not os.path.exists(app_cache_folder(app)): + try: + init_cache(app, infos) + except Exception as e: + error("Could not init cache for %s: %s" % (app, e)) + else: + try: + refresh_cache(app, infos) + except Exception as e: + error("Could not refresh cache for %s: %s" % (app, e)) -assert os.path.exists(".github_credentials"), "You should add github credentials in .github_credentials first. (Format :)" -assert os.path.exists(list_name), "File %s doesn't exist ?" % list_name -# GitHub credentials from ./.github_credentials than should contain : -# For example: foobar:abcdef1234567890 -github_credentials = open(".github_credentials").read().strip().split(":") +def init_cache(app, infos): -# Helpers + if infos["state"] == "notworking": + depth = 5 + if infos["state"] == "inprogress": + depth = 20 + else: + depth = 40 + + git("clone --depth {depth} --single-branch --branch master {url} {folder}".format(depth=depth, url=infos["url"], folder=app_cache_folder(app))) + + +def refresh_cache(app, infos): + + # Don't refresh if already refreshed during last hour + fetch_head = app_cache_folder(app) + "/.git/FETCH_HEAD" + if os.path.exists(fetch_head) and now - os.path.getmtime(fetch_head) < 3600: + return + + git("remote set-url origin " + infos["url"], in_folder=app_cache_folder(app)) + git("fetch origin master --force", in_folder=app_cache_folder(app)) + git("reset origin/master --hard", in_folder=app_cache_folder(app)) + + +def git(cmd, in_folder=None): + + if in_folder: + cmd = "-C " + in_folder + " " + cmd + cmd = "git " + cmd + return subprocess.check_output(cmd.split()).strip().decode("utf-8") + + +def build_catalog(): + + result_dict = {} + + for app, infos in catalog.items(): + print("Processing '%s'..." % app) + + app = app.lower() + + try: + app_dict = build_app_dict(app, infos) + except Exception as e: + error("Adding %s failed: %s" % (app, str(e))) + continue + + result_dict[app_dict["id"]] = app_dict + + ##################### + # Current version 2 # + ##################### + categories = yaml.load(open("categories.yml").read()) + with open("builds/v2.json", 'w') as f: + f.write(json.dumps({"apps": result_dict, "categories": categories}, sort_keys=True)) + + #################### + # Legacy version 1 # + #################### + with open("builds/v1.json", 'w') as f: + f.write(json.dumps(result_dict, sort_keys=True)) + + #################### + # Legacy version 0 # + #################### + official_apps = set(["agendav", "ampache", "baikal", "dokuwiki", "etherpad_mypads", "hextris", "jirafeau", "kanboard", "my_webapp", "nextcloud", "opensondage", "phpmyadmin", "piwigo", "rainloop", "roundcube", "searx", "shellinabox", "strut", "synapse", "transmission", "ttrss", "wallabag2", "wordpress", "zerobin"]) + + official_apps_dict = {k: v for k, v in result_dict.items() if k in official_apps} + community_apps_dict = {k: v for k, v in result_dict.items() if k not in official_apps} + + # We need the official apps to have "validated" as state to be recognized as official + for app, infos in official_apps_dict.items(): + infos["state"] = "validated" + + with open("builds/v0-official.json", 'w') as f: + f.write(json.dumps(official_apps_dict, sort_keys=True)) + + with open("builds/v0-community.json", 'w') as f: + f.write(json.dumps(community_apps_dict, sort_keys=True)) + + +def build_app_dict(app, infos): + + assert infos["branch"] == "master" + + this_app_cache = app_cache_folder(app) + + assert os.path.exists(this_app_cache), "No cache yet for %s" % app + + manifest = json.load(open(this_app_cache + "/manifest.json")) + + if infos["revision"] == "HEAD": + relevant_files = ["manifest.json", "actions.json", "hooks/", "scripts/", "conf/", "sources/"] + most_recent_relevant_commit = "rev-list --full-history --all -n 1 -- " + " ".join(relevant_files) + infos["revision"] = git(most_recent_relevant_commit, in_folder=this_app_cache) + assert re.match(r"^[0-9a-f]+$", infos["revision"]), "Output was not a commit? '%s'" % infos["revision"] + else: + assert infos["revision"] in git("rev-list --all", in_folder=this_app_cache).split("\n"), "Revision ain't in history ? %s" % infos["revision"] + + timestamp = git("show -s --format=%ct " + infos["revision"], in_folder=this_app_cache) + assert re.match(r"^[0-9]+$", timestamp), "Failed to get timestamp for revision ? '%s'" % timestamp + timestamp = int(timestamp) + + return {'id':manifest["id"], + 'git': { + 'branch': infos['branch'], + 'revision': infos["revision"], + 'url': infos["url"] + }, + 'lastUpdate': timestamp, + 'manifest': include_translations_in_manifest(manifest), + 'state': infos['state'], + 'level': infos.get('level', '?'), + 'maintained': infos.get("maintained", True), + 'high_quality': infos.get("high_quality", False), + 'featured': infos.get("featured", False), + 'category': infos.get('category', None), + 'subtags': infos.get('subtags', []), + } + + +def include_translations_in_manifest(manifest): + + app_name = manifest["id"] + + for locale in os.listdir("locales"): + if not locale.endswith("json"): + continue + + if locale == "en.json": + continue + + current_lang = locale.split(".")[0] + translations = json.load(open(os.path.join("locales", locale), "r")) + + key = "%s_manifest_description" % app_name + if translations.get(key, None): + manifest["description"][current_lang] = translations[key] + + for category, questions in manifest["arguments"].items(): + for question in questions: + key = "%s_manifest_arguments_%s_%s" % (app_name, category, question["name"]) + # don't overwrite already existing translation in manifests for now + if translations.get(key) and not current_lang not in question["ask"]: + #print("[ask]", current_lang, key) + question["ask"][current_lang] = translations[key] + + key = "%s_manifest_arguments_%s_help_%s" % (app_name, category, question["name"]) + # don't overwrite already existing translation in manifests for now + if translations.get(key) and not current_lang not in question.get("help", []): + #print("[help]", current_lang, key) + question["help"][current_lang] = translations[key] + + return manifest -def fail(msg, retcode=1): - """Show failure message and exit.""" - print("Error: {0:s}".format(msg)) - sys.exit(retcode) def error(msg): msg = "[Applist builder error] " + msg @@ -37,342 +201,8 @@ def error(msg): print(msg) -def include_translations_in_manifest(app_name, manifest): - for i in os.listdir("locales"): - if not i.endswith("json"): - continue - - if i == "en.json": - continue - - current_lang = i.split(".")[0] - translations = json.load(open(os.path.join("locales", i), "r")) - - key = "%s_manifest_description" % app_name - if key in translations and translations[key]: - manifest["description"][current_lang] = translations[key] - - for category, questions in manifest["arguments"].items(): - for question in questions: - key = "%s_manifest_arguments_%s_%s" % (app_name, category, question["name"]) - # don't overwrite already existing translation in manifests for now - if key in translations and translations[key] and not current_lang not in question["ask"]: - print "[ask]", current_lang, key - question["ask"][current_lang] = translations[key] - - key = "%s_manifest_arguments_%s_help_%s" % (app_name, category, question["name"]) - # don't overwrite already existing translation in manifests for now - if key in translations and translations[key] and not current_lang not in question.get("help", []): - print "[help]", current_lang, key - question["help"][current_lang] = translations[key] - - return manifest - - -def get_json(url, verify=True): - - try: - # Retrieve and load manifest - if ".github" in url: - r = requests.get(url, verify=verify, auth=github_credentials) - else: - r = requests.get(url, verify=verify) - r.raise_for_status() - return r.json() - except requests.exceptions.RequestException as e: - print("-> Error: unable to request %s, %s" % (url, e)) - return None - except ValueError as e: - print("-> Error: unable to decode json from %s : %s" % (url, e)) - return None - -def get_zlib(url, verify=True): - - try: - # Retrieve last commit information - r = requests.get(obj_url, verify=verify) - r.raise_for_status() - return zlib.decompress(r.content).decode('utf-8').split('\x00') - except requests.exceptions.RequestException as e: - print("-> Error: unable to request %s, %s" % (obj_url, e)) - return None - except zlib.error as e: - print("-> Error: unable to decompress object from %s : %s" % (url, e)) - return None - -# Main - -# Get list name from filename -print(":: Building %s list..." % list_name) - -output = '%s-build.json' % list_name - -already_built_file = {} -if os.path.exists(output): - try: - already_built_file = json.load(open(output)) - except Exception as e: - print("Error while trying to load already built file: %s" % e) - -# Loop through every apps -result_dict = {} -for app, info in apps_list.items(): - print("---") - print("Processing '%s'..." % app) - app = app.lower() - - # Store usefull values - app_branch = info['branch'] - app_url = info['url'] - app_rev = info['revision'] - app_state = info["state"] - app_level = info.get("level") - app_maintained = info.get("maintained", True) - app_featured = info.get("featured", False) - app_high_quality = info.get("high_quality", False) - - forge_site = app_url.split('/')[2] - owner = app_url.split('/')[3] - repo = app_url.split('/')[4] - if forge_site == "github.com": - forge_type = "github" - elif forge_site == "framagit.org": - forge_type = "gitlab" - elif forge_site == "code.ffdn.org": - forge_type = "gitlab" - elif forge_site == "code.antopie.org": - forge_type = "gitea" - else: - forge_type = "unknown" - - previous_state = already_built_file.get(app, {}).get("state", {}) - - manifest = {} - timestamp = None - - previous_rev = already_built_file.get(app, {}).get("git", {}).get("revision", None) - previous_url = already_built_file.get(app, {}).get("git", {}).get("url") - previous_level = already_built_file.get(app, {}).get("level") - previous_maintained = already_built_file.get(app, {}).get("maintained") - previous_featured = already_built_file.get(app, {}).get("featured") - previous_high_quality = already_built_file.get(app, {}).get("high_quality") - - if app_rev == "HEAD": - app_rev = subprocess.check_output(["git", "ls-remote", app_url, "refs/heads/"+app_branch]).split()[0] - if not re.match(r"^[0-9a-f]+$", app_rev): - error("Revision for %s did not match expected regex" % app) - continue - - if previous_rev is None: - previous_rev = 'HEAD' - - # If this is a github repo, we are able to optimize things a bit by looking at the diff - # and not actually updating the app if only README or other not-so-important files were edited - if previous_rev != app_rev and forge_type == "github": - - url = "https://api.github.com/repos/{}/{}/compare/{}...{}".format(owner, repo, previous_rev, app_branch) - diff = get_json(url) - - if not diff or not diff["commits"]: - app_rev = previous_rev if previous_rev != 'HEAD' else app_rev - else: - # Only if those files got updated, do we want to update the - # commit (otherwise that would trigger an unecessary upgrade) - ignore_files = [ "README.md", "LICENSE", ".gitignore", "check_process", ".travis.yml" ] - diff_files = [ f for f in diff["files"] if f["filename"] not in ignore_files ] - - if diff_files: - print("This app points to HEAD and significant changes where found between HEAD and previous commit") - app_rev = diff["commits"][-1]["sha"] - else: - print("This app points to HEAD but no significant changes where found compared to HEAD, so keeping the previous commit") - app_rev = previous_rev if previous_rev != 'HEAD' else app_rev - - print("Previous commit : %s" % previous_rev) - print("Current commit : %s" % app_rev) - - if previous_rev == app_rev and previous_url == app_url: - print("Already up to date, ignoring") - result_dict[app] = already_built_file[app] - if previous_state != app_state: - result_dict[app]["state"] = app_state - print("... but has changed of state, updating it from '%s' to '%s'" % (previous_state, app_state)) - if previous_level != app_level or app_level is None: - result_dict[app]["level"] = app_level - print("... but has changed of level, updating it from '%s' to '%s'" % (previous_level, app_level)) - if previous_maintained != app_maintained: - result_dict[app]["maintained"] = app_maintained - print("... but maintained status changed, updating it from '%s' to '%s'" % (previous_maintained, app_maintained)) - if previous_featured != app_featured: - result_dict[app]["featured"] = app_featured - print("... but featured status changed, updating it from '%s' to '%s'" % (previous_featured, app_featured)) - if previous_high_quality != app_high_quality: - result_dict[app]["high_quality"] = app_high_quality - print("... but high_quality status changed, updating it from '%s' to '%s'" % (previous_high_quality, app_high_quality)) - - print "update translations but don't download anything" - result_dict[app]['manifest'] = include_translations_in_manifest(app, result_dict[app]['manifest']) - - continue - - print("Revision changed ! Updating...") - - raw_url = 'https://%(forge_site)s/%(owner)s/%(repo)s/raw/%(app_rev)s/manifest.json' % { - "forge_site": forge_site, "owner": owner, "repo": repo, "app_rev": app_rev - } - - manifest = get_json(raw_url, verify=True) - if manifest is None: - error("Manifest is empty for app %s ?" % app) - continue - - # Hosted on GitHub - if forge_type == "github": - api_url = 'https://api.github.com/repos/%(owner)s/%(repo)s/commits/%(app_rev)s' % { - "owner": owner, "repo": repo, "app_rev": app_rev - } - - info2 = get_json(api_url) - if info2 is None: - error("Commit info is empty for app %s ?" % app) - continue - - commit_date = parse(info2['commit']['author']['date']) - timestamp = int(time.mktime(commit_date.timetuple())) - - # Gitlab-type forge - elif forge_type == "gitlab": - api_url = 'https://%(forge_site)s/api/v4/projects/%(owner)s%%2F%(repo)s/repository/commits/%(app_rev)s' % { - "forge_site": forge_site, "owner": owner, "repo": repo, "app_rev": app_rev - } - commit = get_json(api_url) - if commit is None: - error("Commit info is empty for app %s ?" % app) - continue - - commit_date = parse(commit["authored_date"]) - timestamp = int(time.mktime(commit_date.timetuple())) - - elif forge_type == "gitea": - api_url = 'https://%(forge_site)s/api/v1/repos/%(owner)s/%(repo)s/git/commits/%(app_rev)s' % { - "forge_site": forge_site, "owner": owner, "repo": repo, "app_rev": app_rev - } - info2 = get_json(api_url) - if info2 is None: - error("Commit info is empty for app %s ?" % app) - continue - - commit_date = parse(info2['commit']['author']['date']) - timestamp = int(time.mktime(commit_date.timetuple())) - - # Gogs-type forge - elif forge_type == "gogs": - if not app_url.endswith('.git'): - app_url += ".git" - - obj_url = '%s/objects/%s/%s' % ( - app_url, app_rev[0:2], app_rev[2:] - ) - commit = get_zlib(obj_url, verify=False) - - if commit is None or len(commit) < 2: - error("Commit info is empty for app %s ?" % app) - continue - else: - commit = commit[1] - - re_commit_author = re.compile( - r'^author (?P.+) <(?P.+)> (?P