1
0
Fork 0
mirror of https://github.com/YunoHost/apps.git synced 2024-09-03 20:06:07 +02:00

Rewrite list_builder to use a local git clone cache and simpler logic

This commit is contained in:
Alexandre Aubin 2020-04-05 02:54:24 +02:00
parent 300e6abca2
commit 90cfa0fe59
3 changed files with 189 additions and 362 deletions

7
.gitignore vendored
View file

@ -1,8 +1,5 @@
# temp files
*.swp *.swp
*~ *~
# builded .apps_cache
*-build.json builds
.github_credentials

View file

@ -1,34 +1,198 @@
#!/usr/bin/env python2 #!/usr/bin/python3
import re
import os import os
import sys import re
import time
import json import json
import zlib
import subprocess import subprocess
import yaml import yaml
import time
import requests now = time.time()
from dateutil.parser import parse
catalog = json.load(open("apps.json"))
my_env = os.environ.copy()
my_env["GIT_TERMINAL_PROMPT"] = "0"
os.makedirs(".apps_cache", exist_ok=True)
os.makedirs("builds/", exist_ok=True)
def app_cache_folder(app):
return os.path.join(".apps_cache", app)
# Regular expression patterns def refresh_all_caches():
list_name = "apps.json" for app, infos in catalog.items():
app = app.lower()
print(app)
if not os.path.exists(app_cache_folder(app)):
try:
init_cache(app, infos)
except Exception as e:
error("Could not init cache for %s: %s" % (app, e))
else:
try:
refresh_cache(app, infos)
except Exception as e:
error("Could not refresh cache for %s: %s" % (app, e))
assert os.path.exists(".github_credentials"), "You should add github credentials in .github_credentials first. (Format <user>:<token>)"
assert os.path.exists(list_name), "File %s doesn't exist ?" % list_name
# GitHub credentials from ./.github_credentials than should contain <user>:<token> def init_cache(app, infos):
# For example: foobar:abcdef1234567890
github_credentials = open(".github_credentials").read().strip().split(":")
# Helpers if infos["state"] == "notworking":
depth = 5
if infos["state"] == "inprogress":
depth = 20
else:
depth = 40
git("clone --depth {depth} --single-branch --branch master {url} {folder}".format(depth=depth, url=infos["url"], folder=app_cache_folder(app)))
def refresh_cache(app, infos):
# Don't refresh if already refreshed during last hour
fetch_head = app_cache_folder(app) + "/.git/FETCH_HEAD"
if os.path.exists(fetch_head) and now - os.path.getmtime(fetch_head) < 3600:
return
git("remote set-url origin " + infos["url"], in_folder=app_cache_folder(app))
git("fetch origin master --force", in_folder=app_cache_folder(app))
git("reset origin/master --hard", in_folder=app_cache_folder(app))
def git(cmd, in_folder=None):
if in_folder:
cmd = "-C " + in_folder + " " + cmd
cmd = "git " + cmd
return subprocess.check_output(cmd.split()).strip().decode("utf-8")
def build_catalog():
result_dict = {}
for app, infos in catalog.items():
print("Processing '%s'..." % app)
app = app.lower()
try:
app_dict = build_app_dict(app, infos)
except Exception as e:
error("Adding %s failed: %s" % (app, str(e)))
continue
result_dict[app_dict["id"]] = app_dict
#####################
# Current version 2 #
#####################
categories = yaml.load(open("categories.yml").read())
with open("builds/v2.json", 'w') as f:
f.write(json.dumps({"apps": result_dict, "categories": categories}, sort_keys=True))
####################
# Legacy version 1 #
####################
with open("builds/v1.json", 'w') as f:
f.write(json.dumps(result_dict, sort_keys=True))
####################
# Legacy version 0 #
####################
official_apps = set(["agendav", "ampache", "baikal", "dokuwiki", "etherpad_mypads", "hextris", "jirafeau", "kanboard", "my_webapp", "nextcloud", "opensondage", "phpmyadmin", "piwigo", "rainloop", "roundcube", "searx", "shellinabox", "strut", "synapse", "transmission", "ttrss", "wallabag2", "wordpress", "zerobin"])
official_apps_dict = {k: v for k, v in result_dict.items() if k in official_apps}
community_apps_dict = {k: v for k, v in result_dict.items() if k not in official_apps}
# We need the official apps to have "validated" as state to be recognized as official
for app, infos in official_apps_dict.items():
infos["state"] = "validated"
with open("builds/v0-official.json", 'w') as f:
f.write(json.dumps(official_apps_dict, sort_keys=True))
with open("builds/v0-community.json", 'w') as f:
f.write(json.dumps(community_apps_dict, sort_keys=True))
def build_app_dict(app, infos):
assert infos["branch"] == "master"
this_app_cache = app_cache_folder(app)
assert os.path.exists(this_app_cache), "No cache yet for %s" % app
manifest = json.load(open(this_app_cache + "/manifest.json"))
if infos["revision"] == "HEAD":
relevant_files = ["manifest.json", "actions.json", "hooks/", "scripts/", "conf/", "sources/"]
most_recent_relevant_commit = "rev-list --full-history --all -n 1 -- " + " ".join(relevant_files)
infos["revision"] = git(most_recent_relevant_commit, in_folder=this_app_cache)
assert re.match(r"^[0-9a-f]+$", infos["revision"]), "Output was not a commit? '%s'" % infos["revision"]
else:
assert infos["revision"] in git("rev-list --all", in_folder=this_app_cache).split("\n"), "Revision ain't in history ? %s" % infos["revision"]
timestamp = git("show -s --format=%ct " + infos["revision"], in_folder=this_app_cache)
assert re.match(r"^[0-9]+$", timestamp), "Failed to get timestamp for revision ? '%s'" % timestamp
timestamp = int(timestamp)
return {'id':manifest["id"],
'git': {
'branch': infos['branch'],
'revision': infos["revision"],
'url': infos["url"]
},
'lastUpdate': timestamp,
'manifest': include_translations_in_manifest(manifest),
'state': infos['state'],
'level': infos.get('level', '?'),
'maintained': infos.get("maintained", True),
'high_quality': infos.get("high_quality", False),
'featured': infos.get("featured", False),
'category': infos.get('category', None),
'subtags': infos.get('subtags', []),
}
def include_translations_in_manifest(manifest):
app_name = manifest["id"]
for locale in os.listdir("locales"):
if not locale.endswith("json"):
continue
if locale == "en.json":
continue
current_lang = locale.split(".")[0]
translations = json.load(open(os.path.join("locales", locale), "r"))
key = "%s_manifest_description" % app_name
if translations.get(key, None):
manifest["description"][current_lang] = translations[key]
for category, questions in manifest["arguments"].items():
for question in questions:
key = "%s_manifest_arguments_%s_%s" % (app_name, category, question["name"])
# don't overwrite already existing translation in manifests for now
if translations.get(key) and not current_lang not in question["ask"]:
#print("[ask]", current_lang, key)
question["ask"][current_lang] = translations[key]
key = "%s_manifest_arguments_%s_help_%s" % (app_name, category, question["name"])
# don't overwrite already existing translation in manifests for now
if translations.get(key) and not current_lang not in question.get("help", []):
#print("[help]", current_lang, key)
question["help"][current_lang] = translations[key]
return manifest
def fail(msg, retcode=1):
"""Show failure message and exit."""
print("Error: {0:s}".format(msg))
sys.exit(retcode)
def error(msg): def error(msg):
msg = "[Applist builder error] " + msg msg = "[Applist builder error] " + msg
@ -37,342 +201,8 @@ def error(msg):
print(msg) print(msg)
def include_translations_in_manifest(app_name, manifest):
for i in os.listdir("locales"):
if not i.endswith("json"):
continue
if i == "en.json":
continue
current_lang = i.split(".")[0]
translations = json.load(open(os.path.join("locales", i), "r"))
key = "%s_manifest_description" % app_name
if key in translations and translations[key]:
manifest["description"][current_lang] = translations[key]
for category, questions in manifest["arguments"].items():
for question in questions:
key = "%s_manifest_arguments_%s_%s" % (app_name, category, question["name"])
# don't overwrite already existing translation in manifests for now
if key in translations and translations[key] and not current_lang not in question["ask"]:
print "[ask]", current_lang, key
question["ask"][current_lang] = translations[key]
key = "%s_manifest_arguments_%s_help_%s" % (app_name, category, question["name"])
# don't overwrite already existing translation in manifests for now
if key in translations and translations[key] and not current_lang not in question.get("help", []):
print "[help]", current_lang, key
question["help"][current_lang] = translations[key]
return manifest
def get_json(url, verify=True):
try:
# Retrieve and load manifest
if ".github" in url:
r = requests.get(url, verify=verify, auth=github_credentials)
else:
r = requests.get(url, verify=verify)
r.raise_for_status()
return r.json()
except requests.exceptions.RequestException as e:
print("-> Error: unable to request %s, %s" % (url, e))
return None
except ValueError as e:
print("-> Error: unable to decode json from %s : %s" % (url, e))
return None
def get_zlib(url, verify=True):
try:
# Retrieve last commit information
r = requests.get(obj_url, verify=verify)
r.raise_for_status()
return zlib.decompress(r.content).decode('utf-8').split('\x00')
except requests.exceptions.RequestException as e:
print("-> Error: unable to request %s, %s" % (obj_url, e))
return None
except zlib.error as e:
print("-> Error: unable to decompress object from %s : %s" % (url, e))
return None
# Main
# Get list name from filename
print(":: Building %s list..." % list_name)
output = '%s-build.json' % list_name
already_built_file = {}
if os.path.exists(output):
try:
already_built_file = json.load(open(output))
except Exception as e:
print("Error while trying to load already built file: %s" % e)
# Loop through every apps
result_dict = {}
for app, info in apps_list.items():
print("---")
print("Processing '%s'..." % app)
app = app.lower()
# Store usefull values
app_branch = info['branch']
app_url = info['url']
app_rev = info['revision']
app_state = info["state"]
app_level = info.get("level")
app_maintained = info.get("maintained", True)
app_featured = info.get("featured", False)
app_high_quality = info.get("high_quality", False)
forge_site = app_url.split('/')[2]
owner = app_url.split('/')[3]
repo = app_url.split('/')[4]
if forge_site == "github.com":
forge_type = "github"
elif forge_site == "framagit.org":
forge_type = "gitlab"
elif forge_site == "code.ffdn.org":
forge_type = "gitlab"
elif forge_site == "code.antopie.org":
forge_type = "gitea"
else:
forge_type = "unknown"
previous_state = already_built_file.get(app, {}).get("state", {})
manifest = {}
timestamp = None
previous_rev = already_built_file.get(app, {}).get("git", {}).get("revision", None)
previous_url = already_built_file.get(app, {}).get("git", {}).get("url")
previous_level = already_built_file.get(app, {}).get("level")
previous_maintained = already_built_file.get(app, {}).get("maintained")
previous_featured = already_built_file.get(app, {}).get("featured")
previous_high_quality = already_built_file.get(app, {}).get("high_quality")
if app_rev == "HEAD":
app_rev = subprocess.check_output(["git", "ls-remote", app_url, "refs/heads/"+app_branch]).split()[0]
if not re.match(r"^[0-9a-f]+$", app_rev):
error("Revision for %s did not match expected regex" % app)
continue
if previous_rev is None:
previous_rev = 'HEAD'
# If this is a github repo, we are able to optimize things a bit by looking at the diff
# and not actually updating the app if only README or other not-so-important files were edited
if previous_rev != app_rev and forge_type == "github":
url = "https://api.github.com/repos/{}/{}/compare/{}...{}".format(owner, repo, previous_rev, app_branch)
diff = get_json(url)
if not diff or not diff["commits"]:
app_rev = previous_rev if previous_rev != 'HEAD' else app_rev
else:
# Only if those files got updated, do we want to update the
# commit (otherwise that would trigger an unecessary upgrade)
ignore_files = [ "README.md", "LICENSE", ".gitignore", "check_process", ".travis.yml" ]
diff_files = [ f for f in diff["files"] if f["filename"] not in ignore_files ]
if diff_files:
print("This app points to HEAD and significant changes where found between HEAD and previous commit")
app_rev = diff["commits"][-1]["sha"]
else:
print("This app points to HEAD but no significant changes where found compared to HEAD, so keeping the previous commit")
app_rev = previous_rev if previous_rev != 'HEAD' else app_rev
print("Previous commit : %s" % previous_rev)
print("Current commit : %s" % app_rev)
if previous_rev == app_rev and previous_url == app_url:
print("Already up to date, ignoring")
result_dict[app] = already_built_file[app]
if previous_state != app_state:
result_dict[app]["state"] = app_state
print("... but has changed of state, updating it from '%s' to '%s'" % (previous_state, app_state))
if previous_level != app_level or app_level is None:
result_dict[app]["level"] = app_level
print("... but has changed of level, updating it from '%s' to '%s'" % (previous_level, app_level))
if previous_maintained != app_maintained:
result_dict[app]["maintained"] = app_maintained
print("... but maintained status changed, updating it from '%s' to '%s'" % (previous_maintained, app_maintained))
if previous_featured != app_featured:
result_dict[app]["featured"] = app_featured
print("... but featured status changed, updating it from '%s' to '%s'" % (previous_featured, app_featured))
if previous_high_quality != app_high_quality:
result_dict[app]["high_quality"] = app_high_quality
print("... but high_quality status changed, updating it from '%s' to '%s'" % (previous_high_quality, app_high_quality))
print "update translations but don't download anything"
result_dict[app]['manifest'] = include_translations_in_manifest(app, result_dict[app]['manifest'])
continue
print("Revision changed ! Updating...")
raw_url = 'https://%(forge_site)s/%(owner)s/%(repo)s/raw/%(app_rev)s/manifest.json' % {
"forge_site": forge_site, "owner": owner, "repo": repo, "app_rev": app_rev
}
manifest = get_json(raw_url, verify=True)
if manifest is None:
error("Manifest is empty for app %s ?" % app)
continue
# Hosted on GitHub
if forge_type == "github":
api_url = 'https://api.github.com/repos/%(owner)s/%(repo)s/commits/%(app_rev)s' % {
"owner": owner, "repo": repo, "app_rev": app_rev
}
info2 = get_json(api_url)
if info2 is None:
error("Commit info is empty for app %s ?" % app)
continue
commit_date = parse(info2['commit']['author']['date'])
timestamp = int(time.mktime(commit_date.timetuple()))
# Gitlab-type forge
elif forge_type == "gitlab":
api_url = 'https://%(forge_site)s/api/v4/projects/%(owner)s%%2F%(repo)s/repository/commits/%(app_rev)s' % {
"forge_site": forge_site, "owner": owner, "repo": repo, "app_rev": app_rev
}
commit = get_json(api_url)
if commit is None:
error("Commit info is empty for app %s ?" % app)
continue
commit_date = parse(commit["authored_date"])
timestamp = int(time.mktime(commit_date.timetuple()))
elif forge_type == "gitea":
api_url = 'https://%(forge_site)s/api/v1/repos/%(owner)s/%(repo)s/git/commits/%(app_rev)s' % {
"forge_site": forge_site, "owner": owner, "repo": repo, "app_rev": app_rev
}
info2 = get_json(api_url)
if info2 is None:
error("Commit info is empty for app %s ?" % app)
continue
commit_date = parse(info2['commit']['author']['date'])
timestamp = int(time.mktime(commit_date.timetuple()))
# Gogs-type forge
elif forge_type == "gogs":
if not app_url.endswith('.git'):
app_url += ".git"
obj_url = '%s/objects/%s/%s' % (
app_url, app_rev[0:2], app_rev[2:]
)
commit = get_zlib(obj_url, verify=False)
if commit is None or len(commit) < 2:
error("Commit info is empty for app %s ?" % app)
continue
else:
commit = commit[1]
re_commit_author = re.compile(
r'^author (?P<name>.+) <(?P<email>.+)> (?P<time>\d+) (?P<tz>[+-]\d+)$',
re.MULTILINE
)
# Extract author line and commit date
commit_author = re_commit_author.search(commit)
if not commit_author:
error("Author line in commit not found for app %s" % app)
continue
# Construct UTC timestamp
timestamp = int(commit_author.group('time'))
tz = commit_author.group('tz')
if len(tz) != 5:
error("Unexpected timezone length in commit for app %s" % app)
continue
elif tz != '+0000':
tdelta = (int(tz[1:3]) * 3600) + (int(tz[3:5]) * 60)
if tz[0] == '+':
timestamp -= tdelta
elif tz[0] == '-':
timestamp += tdelta
else:
error("Unexpected timezone format in commit for app %s" % app)
continue
else:
error("Unsupported VCS and/or protocol for app %s" % app)
continue
if manifest["id"] != app or manifest["id"] != repo.replace("_ynh", ""):
print("Warning: IDs different between list.json, manifest and repo name")
print(" Manifest id : %s" % manifest["id"])
print(" Name in json list : %s" % app)
print(" Repo name : %s" % repo.replace("_ynh", ""))
try:
result_dict[manifest['id']] = {
'git': {
'branch': info['branch'],
'revision': app_rev,
'url': app_url
},
'lastUpdate': timestamp,
'manifest': include_translations_in_manifest(manifest['id'], manifest),
'state': info['state'],
'level': info.get('level', '?'),
'maintained': app_maintained,
'high_quality': app_high_quality,
'featured': app_featured,
'category': info.get('category', None),
'subtags': info.get('subtags', []),
}
except KeyError as e:
error("Invalid app info or manifest for app %s, %s" % (app, e))
continue
#######################
## Current version 2 ##
#######################
categories = yaml.load(open("categories.yml").read())
with open(output.replace(".json", "-v2.json"), 'w') as f:
f.write(json.dumps({"apps": result_dict, "categories": categories}, sort_keys=True))
######################
## Legacy version 1 ##
######################
with open(output, 'w') as f:
f.write(json.dumps(result_dict, sort_keys=True))
print("\nDone! Written in %s" % output)
######################
## Legacy version 0 ##
######################
print("\nAlso splitting the file into official and community-build.json for backward compatibility")
official_apps = set(["agendav", "ampache", "baikal", "dokuwiki", "etherpad_mypads", "hextris", "jirafeau", "kanboard", "my_webapp", "nextcloud", "opensondage", "phpmyadmin", "piwigo", "rainloop", "roundcube", "searx", "shellinabox", "strut", "synapse", "transmission", "ttrss", "wallabag2", "wordpress", "zerobin"])
official_apps_dict = {k: v for k, v in result_dict.items() if k in official_apps}
community_apps_dict = {k: v for k, v in result_dict.items() if k not in official_apps}
# We need the official apps to have "validated" as state to be recognized as official
for app, infos in official_apps_dict.items():
infos["state"] = "validated"
with open("official-build.json", 'w') as f:
f.write(json.dumps(official_apps_dict, sort_keys=True))
with open("community-build.json", 'w') as f:
f.write(json.dumps(community_apps_dict, sort_keys=True))
###################### ######################
print("\nDone!") if __name__ == "__main__":
refresh_all_caches()
build_catalog()

View file

@ -7,4 +7,4 @@ cd $workdir
date >> $log date >> $log
git pull >/dev/null git pull >/dev/null
python ./list_builder.py &>> $log || sendxmpppy "[listbuilder] Rebuilding the application list failed miserably" ./list_builder.py &>> $log || sendxmpppy "[listbuilder] Rebuilding the application list failed miserably"