1
0
Fork 0
mirror of https://github.com/YunoHost/apps.git synced 2024-09-03 20:06:07 +02:00

Rewrite list_builder to use a local git clone cache and simpler logic

This commit is contained in:
Alexandre Aubin 2020-04-05 02:54:24 +02:00
parent 300e6abca2
commit 90cfa0fe59
3 changed files with 189 additions and 362 deletions

7
.gitignore vendored
View file

@ -1,8 +1,5 @@
# temp files
*.swp
*~
# builded
*-build.json
.github_credentials
.apps_cache
builds

View file

@ -1,34 +1,198 @@
#!/usr/bin/env python2
import re
#!/usr/bin/python3
import os
import sys
import time
import re
import json
import zlib
import subprocess
import yaml
import time
import requests
from dateutil.parser import parse
now = time.time()
catalog = json.load(open("apps.json"))
my_env = os.environ.copy()
my_env["GIT_TERMINAL_PROMPT"] = "0"
os.makedirs(".apps_cache", exist_ok=True)
os.makedirs("builds/", exist_ok=True)
def app_cache_folder(app):
return os.path.join(".apps_cache", app)
# Regular expression patterns
def refresh_all_caches():
list_name = "apps.json"
for app, infos in catalog.items():
app = app.lower()
print(app)
if not os.path.exists(app_cache_folder(app)):
try:
init_cache(app, infos)
except Exception as e:
error("Could not init cache for %s: %s" % (app, e))
else:
try:
refresh_cache(app, infos)
except Exception as e:
error("Could not refresh cache for %s: %s" % (app, e))
assert os.path.exists(".github_credentials"), "You should add github credentials in .github_credentials first. (Format <user>:<token>)"
assert os.path.exists(list_name), "File %s doesn't exist ?" % list_name
# GitHub credentials from ./.github_credentials than should contain <user>:<token>
# For example: foobar:abcdef1234567890
github_credentials = open(".github_credentials").read().strip().split(":")
def init_cache(app, infos):
# Helpers
if infos["state"] == "notworking":
depth = 5
if infos["state"] == "inprogress":
depth = 20
else:
depth = 40
git("clone --depth {depth} --single-branch --branch master {url} {folder}".format(depth=depth, url=infos["url"], folder=app_cache_folder(app)))
def refresh_cache(app, infos):
# Don't refresh if already refreshed during last hour
fetch_head = app_cache_folder(app) + "/.git/FETCH_HEAD"
if os.path.exists(fetch_head) and now - os.path.getmtime(fetch_head) < 3600:
return
git("remote set-url origin " + infos["url"], in_folder=app_cache_folder(app))
git("fetch origin master --force", in_folder=app_cache_folder(app))
git("reset origin/master --hard", in_folder=app_cache_folder(app))
def git(cmd, in_folder=None):
if in_folder:
cmd = "-C " + in_folder + " " + cmd
cmd = "git " + cmd
return subprocess.check_output(cmd.split()).strip().decode("utf-8")
def build_catalog():
result_dict = {}
for app, infos in catalog.items():
print("Processing '%s'..." % app)
app = app.lower()
try:
app_dict = build_app_dict(app, infos)
except Exception as e:
error("Adding %s failed: %s" % (app, str(e)))
continue
result_dict[app_dict["id"]] = app_dict
#####################
# Current version 2 #
#####################
categories = yaml.load(open("categories.yml").read())
with open("builds/v2.json", 'w') as f:
f.write(json.dumps({"apps": result_dict, "categories": categories}, sort_keys=True))
####################
# Legacy version 1 #
####################
with open("builds/v1.json", 'w') as f:
f.write(json.dumps(result_dict, sort_keys=True))
####################
# Legacy version 0 #
####################
official_apps = set(["agendav", "ampache", "baikal", "dokuwiki", "etherpad_mypads", "hextris", "jirafeau", "kanboard", "my_webapp", "nextcloud", "opensondage", "phpmyadmin", "piwigo", "rainloop", "roundcube", "searx", "shellinabox", "strut", "synapse", "transmission", "ttrss", "wallabag2", "wordpress", "zerobin"])
official_apps_dict = {k: v for k, v in result_dict.items() if k in official_apps}
community_apps_dict = {k: v for k, v in result_dict.items() if k not in official_apps}
# We need the official apps to have "validated" as state to be recognized as official
for app, infos in official_apps_dict.items():
infos["state"] = "validated"
with open("builds/v0-official.json", 'w') as f:
f.write(json.dumps(official_apps_dict, sort_keys=True))
with open("builds/v0-community.json", 'w') as f:
f.write(json.dumps(community_apps_dict, sort_keys=True))
def build_app_dict(app, infos):
assert infos["branch"] == "master"
this_app_cache = app_cache_folder(app)
assert os.path.exists(this_app_cache), "No cache yet for %s" % app
manifest = json.load(open(this_app_cache + "/manifest.json"))
if infos["revision"] == "HEAD":
relevant_files = ["manifest.json", "actions.json", "hooks/", "scripts/", "conf/", "sources/"]
most_recent_relevant_commit = "rev-list --full-history --all -n 1 -- " + " ".join(relevant_files)
infos["revision"] = git(most_recent_relevant_commit, in_folder=this_app_cache)
assert re.match(r"^[0-9a-f]+$", infos["revision"]), "Output was not a commit? '%s'" % infos["revision"]
else:
assert infos["revision"] in git("rev-list --all", in_folder=this_app_cache).split("\n"), "Revision ain't in history ? %s" % infos["revision"]
timestamp = git("show -s --format=%ct " + infos["revision"], in_folder=this_app_cache)
assert re.match(r"^[0-9]+$", timestamp), "Failed to get timestamp for revision ? '%s'" % timestamp
timestamp = int(timestamp)
return {'id':manifest["id"],
'git': {
'branch': infos['branch'],
'revision': infos["revision"],
'url': infos["url"]
},
'lastUpdate': timestamp,
'manifest': include_translations_in_manifest(manifest),
'state': infos['state'],
'level': infos.get('level', '?'),
'maintained': infos.get("maintained", True),
'high_quality': infos.get("high_quality", False),
'featured': infos.get("featured", False),
'category': infos.get('category', None),
'subtags': infos.get('subtags', []),
}
def include_translations_in_manifest(manifest):
app_name = manifest["id"]
for locale in os.listdir("locales"):
if not locale.endswith("json"):
continue
if locale == "en.json":
continue
current_lang = locale.split(".")[0]
translations = json.load(open(os.path.join("locales", locale), "r"))
key = "%s_manifest_description" % app_name
if translations.get(key, None):
manifest["description"][current_lang] = translations[key]
for category, questions in manifest["arguments"].items():
for question in questions:
key = "%s_manifest_arguments_%s_%s" % (app_name, category, question["name"])
# don't overwrite already existing translation in manifests for now
if translations.get(key) and not current_lang not in question["ask"]:
#print("[ask]", current_lang, key)
question["ask"][current_lang] = translations[key]
key = "%s_manifest_arguments_%s_help_%s" % (app_name, category, question["name"])
# don't overwrite already existing translation in manifests for now
if translations.get(key) and not current_lang not in question.get("help", []):
#print("[help]", current_lang, key)
question["help"][current_lang] = translations[key]
return manifest
def fail(msg, retcode=1):
"""Show failure message and exit."""
print("Error: {0:s}".format(msg))
sys.exit(retcode)
def error(msg):
msg = "[Applist builder error] " + msg
@ -37,342 +201,8 @@ def error(msg):
print(msg)
def include_translations_in_manifest(app_name, manifest):
for i in os.listdir("locales"):
if not i.endswith("json"):
continue
if i == "en.json":
continue
current_lang = i.split(".")[0]
translations = json.load(open(os.path.join("locales", i), "r"))
key = "%s_manifest_description" % app_name
if key in translations and translations[key]:
manifest["description"][current_lang] = translations[key]
for category, questions in manifest["arguments"].items():
for question in questions:
key = "%s_manifest_arguments_%s_%s" % (app_name, category, question["name"])
# don't overwrite already existing translation in manifests for now
if key in translations and translations[key] and not current_lang not in question["ask"]:
print "[ask]", current_lang, key
question["ask"][current_lang] = translations[key]
key = "%s_manifest_arguments_%s_help_%s" % (app_name, category, question["name"])
# don't overwrite already existing translation in manifests for now
if key in translations and translations[key] and not current_lang not in question.get("help", []):
print "[help]", current_lang, key
question["help"][current_lang] = translations[key]
return manifest
def get_json(url, verify=True):
try:
# Retrieve and load manifest
if ".github" in url:
r = requests.get(url, verify=verify, auth=github_credentials)
else:
r = requests.get(url, verify=verify)
r.raise_for_status()
return r.json()
except requests.exceptions.RequestException as e:
print("-> Error: unable to request %s, %s" % (url, e))
return None
except ValueError as e:
print("-> Error: unable to decode json from %s : %s" % (url, e))
return None
def get_zlib(url, verify=True):
try:
# Retrieve last commit information
r = requests.get(obj_url, verify=verify)
r.raise_for_status()
return zlib.decompress(r.content).decode('utf-8').split('\x00')
except requests.exceptions.RequestException as e:
print("-> Error: unable to request %s, %s" % (obj_url, e))
return None
except zlib.error as e:
print("-> Error: unable to decompress object from %s : %s" % (url, e))
return None
# Main
# Get list name from filename
print(":: Building %s list..." % list_name)
output = '%s-build.json' % list_name
already_built_file = {}
if os.path.exists(output):
try:
already_built_file = json.load(open(output))
except Exception as e:
print("Error while trying to load already built file: %s" % e)
# Loop through every apps
result_dict = {}
for app, info in apps_list.items():
print("---")
print("Processing '%s'..." % app)
app = app.lower()
# Store usefull values
app_branch = info['branch']
app_url = info['url']
app_rev = info['revision']
app_state = info["state"]
app_level = info.get("level")
app_maintained = info.get("maintained", True)
app_featured = info.get("featured", False)
app_high_quality = info.get("high_quality", False)
forge_site = app_url.split('/')[2]
owner = app_url.split('/')[3]
repo = app_url.split('/')[4]
if forge_site == "github.com":
forge_type = "github"
elif forge_site == "framagit.org":
forge_type = "gitlab"
elif forge_site == "code.ffdn.org":
forge_type = "gitlab"
elif forge_site == "code.antopie.org":
forge_type = "gitea"
else:
forge_type = "unknown"
previous_state = already_built_file.get(app, {}).get("state", {})
manifest = {}
timestamp = None
previous_rev = already_built_file.get(app, {}).get("git", {}).get("revision", None)
previous_url = already_built_file.get(app, {}).get("git", {}).get("url")
previous_level = already_built_file.get(app, {}).get("level")
previous_maintained = already_built_file.get(app, {}).get("maintained")
previous_featured = already_built_file.get(app, {}).get("featured")
previous_high_quality = already_built_file.get(app, {}).get("high_quality")
if app_rev == "HEAD":
app_rev = subprocess.check_output(["git", "ls-remote", app_url, "refs/heads/"+app_branch]).split()[0]
if not re.match(r"^[0-9a-f]+$", app_rev):
error("Revision for %s did not match expected regex" % app)
continue
if previous_rev is None:
previous_rev = 'HEAD'
# If this is a github repo, we are able to optimize things a bit by looking at the diff
# and not actually updating the app if only README or other not-so-important files were edited
if previous_rev != app_rev and forge_type == "github":
url = "https://api.github.com/repos/{}/{}/compare/{}...{}".format(owner, repo, previous_rev, app_branch)
diff = get_json(url)
if not diff or not diff["commits"]:
app_rev = previous_rev if previous_rev != 'HEAD' else app_rev
else:
# Only if those files got updated, do we want to update the
# commit (otherwise that would trigger an unecessary upgrade)
ignore_files = [ "README.md", "LICENSE", ".gitignore", "check_process", ".travis.yml" ]
diff_files = [ f for f in diff["files"] if f["filename"] not in ignore_files ]
if diff_files:
print("This app points to HEAD and significant changes where found between HEAD and previous commit")
app_rev = diff["commits"][-1]["sha"]
else:
print("This app points to HEAD but no significant changes where found compared to HEAD, so keeping the previous commit")
app_rev = previous_rev if previous_rev != 'HEAD' else app_rev
print("Previous commit : %s" % previous_rev)
print("Current commit : %s" % app_rev)
if previous_rev == app_rev and previous_url == app_url:
print("Already up to date, ignoring")
result_dict[app] = already_built_file[app]
if previous_state != app_state:
result_dict[app]["state"] = app_state
print("... but has changed of state, updating it from '%s' to '%s'" % (previous_state, app_state))
if previous_level != app_level or app_level is None:
result_dict[app]["level"] = app_level
print("... but has changed of level, updating it from '%s' to '%s'" % (previous_level, app_level))
if previous_maintained != app_maintained:
result_dict[app]["maintained"] = app_maintained
print("... but maintained status changed, updating it from '%s' to '%s'" % (previous_maintained, app_maintained))
if previous_featured != app_featured:
result_dict[app]["featured"] = app_featured
print("... but featured status changed, updating it from '%s' to '%s'" % (previous_featured, app_featured))
if previous_high_quality != app_high_quality:
result_dict[app]["high_quality"] = app_high_quality
print("... but high_quality status changed, updating it from '%s' to '%s'" % (previous_high_quality, app_high_quality))
print "update translations but don't download anything"
result_dict[app]['manifest'] = include_translations_in_manifest(app, result_dict[app]['manifest'])
continue
print("Revision changed ! Updating...")
raw_url = 'https://%(forge_site)s/%(owner)s/%(repo)s/raw/%(app_rev)s/manifest.json' % {
"forge_site": forge_site, "owner": owner, "repo": repo, "app_rev": app_rev
}
manifest = get_json(raw_url, verify=True)
if manifest is None:
error("Manifest is empty for app %s ?" % app)
continue
# Hosted on GitHub
if forge_type == "github":
api_url = 'https://api.github.com/repos/%(owner)s/%(repo)s/commits/%(app_rev)s' % {
"owner": owner, "repo": repo, "app_rev": app_rev
}
info2 = get_json(api_url)
if info2 is None:
error("Commit info is empty for app %s ?" % app)
continue
commit_date = parse(info2['commit']['author']['date'])
timestamp = int(time.mktime(commit_date.timetuple()))
# Gitlab-type forge
elif forge_type == "gitlab":
api_url = 'https://%(forge_site)s/api/v4/projects/%(owner)s%%2F%(repo)s/repository/commits/%(app_rev)s' % {
"forge_site": forge_site, "owner": owner, "repo": repo, "app_rev": app_rev
}
commit = get_json(api_url)
if commit is None:
error("Commit info is empty for app %s ?" % app)
continue
commit_date = parse(commit["authored_date"])
timestamp = int(time.mktime(commit_date.timetuple()))
elif forge_type == "gitea":
api_url = 'https://%(forge_site)s/api/v1/repos/%(owner)s/%(repo)s/git/commits/%(app_rev)s' % {
"forge_site": forge_site, "owner": owner, "repo": repo, "app_rev": app_rev
}
info2 = get_json(api_url)
if info2 is None:
error("Commit info is empty for app %s ?" % app)
continue
commit_date = parse(info2['commit']['author']['date'])
timestamp = int(time.mktime(commit_date.timetuple()))
# Gogs-type forge
elif forge_type == "gogs":
if not app_url.endswith('.git'):
app_url += ".git"
obj_url = '%s/objects/%s/%s' % (
app_url, app_rev[0:2], app_rev[2:]
)
commit = get_zlib(obj_url, verify=False)
if commit is None or len(commit) < 2:
error("Commit info is empty for app %s ?" % app)
continue
else:
commit = commit[1]
re_commit_author = re.compile(
r'^author (?P<name>.+) <(?P<email>.+)> (?P<time>\d+) (?P<tz>[+-]\d+)$',
re.MULTILINE
)
# Extract author line and commit date
commit_author = re_commit_author.search(commit)
if not commit_author:
error("Author line in commit not found for app %s" % app)
continue
# Construct UTC timestamp
timestamp = int(commit_author.group('time'))
tz = commit_author.group('tz')
if len(tz) != 5:
error("Unexpected timezone length in commit for app %s" % app)
continue
elif tz != '+0000':
tdelta = (int(tz[1:3]) * 3600) + (int(tz[3:5]) * 60)
if tz[0] == '+':
timestamp -= tdelta
elif tz[0] == '-':
timestamp += tdelta
else:
error("Unexpected timezone format in commit for app %s" % app)
continue
else:
error("Unsupported VCS and/or protocol for app %s" % app)
continue
if manifest["id"] != app or manifest["id"] != repo.replace("_ynh", ""):
print("Warning: IDs different between list.json, manifest and repo name")
print(" Manifest id : %s" % manifest["id"])
print(" Name in json list : %s" % app)
print(" Repo name : %s" % repo.replace("_ynh", ""))
try:
result_dict[manifest['id']] = {
'git': {
'branch': info['branch'],
'revision': app_rev,
'url': app_url
},
'lastUpdate': timestamp,
'manifest': include_translations_in_manifest(manifest['id'], manifest),
'state': info['state'],
'level': info.get('level', '?'),
'maintained': app_maintained,
'high_quality': app_high_quality,
'featured': app_featured,
'category': info.get('category', None),
'subtags': info.get('subtags', []),
}
except KeyError as e:
error("Invalid app info or manifest for app %s, %s" % (app, e))
continue
#######################
## Current version 2 ##
#######################
categories = yaml.load(open("categories.yml").read())
with open(output.replace(".json", "-v2.json"), 'w') as f:
f.write(json.dumps({"apps": result_dict, "categories": categories}, sort_keys=True))
######################
## Legacy version 1 ##
######################
with open(output, 'w') as f:
f.write(json.dumps(result_dict, sort_keys=True))
print("\nDone! Written in %s" % output)
######################
## Legacy version 0 ##
######################
print("\nAlso splitting the file into official and community-build.json for backward compatibility")
official_apps = set(["agendav", "ampache", "baikal", "dokuwiki", "etherpad_mypads", "hextris", "jirafeau", "kanboard", "my_webapp", "nextcloud", "opensondage", "phpmyadmin", "piwigo", "rainloop", "roundcube", "searx", "shellinabox", "strut", "synapse", "transmission", "ttrss", "wallabag2", "wordpress", "zerobin"])
official_apps_dict = {k: v for k, v in result_dict.items() if k in official_apps}
community_apps_dict = {k: v for k, v in result_dict.items() if k not in official_apps}
# We need the official apps to have "validated" as state to be recognized as official
for app, infos in official_apps_dict.items():
infos["state"] = "validated"
with open("official-build.json", 'w') as f:
f.write(json.dumps(official_apps_dict, sort_keys=True))
with open("community-build.json", 'w') as f:
f.write(json.dumps(community_apps_dict, sort_keys=True))
######################
print("\nDone!")
if __name__ == "__main__":
refresh_all_caches()
build_catalog()

View file

@ -7,4 +7,4 @@ cd $workdir
date >> $log
git pull >/dev/null
python ./list_builder.py &>> $log || sendxmpppy "[listbuilder] Rebuilding the application list failed miserably"
./list_builder.py &>> $log || sendxmpppy "[listbuilder] Rebuilding the application list failed miserably"