mirror of
https://github.com/YunoHost/yunorunner.git
synced 2024-09-03 20:05:52 +02:00
Fixes after tests on the battlefield
This commit is contained in:
parent
a435d2973f
commit
8f58109973
1 changed files with 101 additions and 61 deletions
162
run.py
162
run.py
|
@ -11,6 +11,7 @@ import traceback
|
||||||
import itertools
|
import itertools
|
||||||
import tracemalloc
|
import tracemalloc
|
||||||
import string
|
import string
|
||||||
|
import shutil
|
||||||
|
|
||||||
import hmac
|
import hmac
|
||||||
import hashlib
|
import hashlib
|
||||||
|
@ -432,25 +433,32 @@ async def jobs_dispatcher():
|
||||||
|
|
||||||
async def cleanup_old_package_check_if_lock_exists(worker, job, ignore_error=False):
|
async def cleanup_old_package_check_if_lock_exists(worker, job, ignore_error=False):
|
||||||
|
|
||||||
await asyncio.sleep(3)
|
await asyncio.sleep(1)
|
||||||
|
|
||||||
if not os.path.exists(app.config.PACKAGE_CHECK_LOCK_PER_WORKER.format(worker_id=worker.id)):
|
if not os.path.exists(app.config.PACKAGE_CHECK_LOCK_PER_WORKER.format(worker_id=worker.id)):
|
||||||
return
|
return
|
||||||
|
|
||||||
job.log += f"Lock for worker {worker.id} still exist ... trying to cleanup old check process ...\n"
|
job.log += f"Lock for worker {worker.id} still exist ... trying to cleanup the old package check still running ...\n"
|
||||||
job.save()
|
job.save()
|
||||||
|
await broadcast({
|
||||||
|
"action": "update_job",
|
||||||
|
"id": job.id,
|
||||||
|
"data": model_to_dict(job),
|
||||||
|
}, ["jobs", f"job-{job.id}", f"app-jobs-{job.url_or_path}"])
|
||||||
|
|
||||||
task_logger.info(f"Lock for worker {worker.id} still exist ... trying to cleanup old check process ...")
|
task_logger.info(f"Lock for worker {worker.id} still exist ... trying to cleanup old check process ...")
|
||||||
|
|
||||||
cwd = os.path.split(app.config.PACKAGE_CHECK_PATH)[0]
|
cwd = os.path.split(app.config.PACKAGE_CHECK_PATH)[0]
|
||||||
env = {
|
env = {
|
||||||
"WORKER_ID": worker.id,
|
"IN_YUNORUNNER": "1",
|
||||||
|
"WORKER_ID": str(worker.id),
|
||||||
"ARCH": app.config.ARCH,
|
"ARCH": app.config.ARCH,
|
||||||
"DIST": app.config.DIST,
|
"DIST": app.config.DIST,
|
||||||
"YNH_BRANCH": app.config.YNH_BRANCH,
|
"YNH_BRANCH": app.config.YNH_BRANCH,
|
||||||
"PATH": os.environ["PATH"] + ":/usr/local/bin", # This is because lxc/lxd is in /usr/local/bin
|
"PATH": os.environ["PATH"] + ":/usr/local/bin", # This is because lxc/lxd is in /usr/local/bin
|
||||||
}
|
}
|
||||||
|
|
||||||
cmd = f"{app.config.PACKAGE_CHECK_PATH} --force-stop"
|
cmd = f"script -qefc '{app.config.PACKAGE_CHECK_PATH} --force-stop 2>&1'"
|
||||||
try:
|
try:
|
||||||
command = await asyncio.create_subprocess_shell(cmd,
|
command = await asyncio.create_subprocess_shell(cmd,
|
||||||
cwd=cwd,
|
cwd=cwd,
|
||||||
|
@ -458,6 +466,7 @@ async def cleanup_old_package_check_if_lock_exists(worker, job, ignore_error=Fal
|
||||||
stdout=asyncio.subprocess.PIPE,
|
stdout=asyncio.subprocess.PIPE,
|
||||||
stderr=asyncio.subprocess.PIPE)
|
stderr=asyncio.subprocess.PIPE)
|
||||||
while not command.stdout.at_eof():
|
while not command.stdout.at_eof():
|
||||||
|
data = await command.stdout.readline()
|
||||||
await asyncio.sleep(1)
|
await asyncio.sleep(1)
|
||||||
except Exception:
|
except Exception:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
@ -475,7 +484,7 @@ async def cleanup_old_package_check_if_lock_exists(worker, job, ignore_error=Fal
|
||||||
command.terminate()
|
command.terminate()
|
||||||
|
|
||||||
if not ignore_error:
|
if not ignore_error:
|
||||||
job.log += "\nFailed to lill old check process?"
|
job.log += "\nFailed to kill old check process?"
|
||||||
job.state = "canceled"
|
job.state = "canceled"
|
||||||
|
|
||||||
task_logger.info(f"Job '{job.name} #{job.id}' has been canceled")
|
task_logger.info(f"Job '{job.name} #{job.id}' has been canceled")
|
||||||
|
@ -486,6 +495,11 @@ async def cleanup_old_package_check_if_lock_exists(worker, job, ignore_error=Fal
|
||||||
return True
|
return True
|
||||||
finally:
|
finally:
|
||||||
job.save()
|
job.save()
|
||||||
|
await broadcast({
|
||||||
|
"action": "update_job",
|
||||||
|
"id": job.id,
|
||||||
|
"data": model_to_dict(job),
|
||||||
|
}, ["jobs", f"job-{job.id}", f"app-jobs-{job.url_or_path}"])
|
||||||
|
|
||||||
|
|
||||||
async def run_job(worker, job):
|
async def run_job(worker, job):
|
||||||
|
@ -495,6 +509,8 @@ async def run_job(worker, job):
|
||||||
"data": model_to_dict(job),
|
"data": model_to_dict(job),
|
||||||
}, ["jobs", f"job-{job.id}", f"app-jobs-{job.url_or_path}"])
|
}, ["jobs", f"job-{job.id}", f"app-jobs-{job.url_or_path}"])
|
||||||
|
|
||||||
|
await asyncio.sleep(5)
|
||||||
|
|
||||||
cleanup_ret = await cleanup_old_package_check_if_lock_exists(worker, job)
|
cleanup_ret = await cleanup_old_package_check_if_lock_exists(worker, job)
|
||||||
if cleanup_ret is False:
|
if cleanup_ret is False:
|
||||||
return
|
return
|
||||||
|
@ -505,7 +521,8 @@ async def run_job(worker, job):
|
||||||
|
|
||||||
cwd = os.path.split(app.config.PACKAGE_CHECK_PATH)[0]
|
cwd = os.path.split(app.config.PACKAGE_CHECK_PATH)[0]
|
||||||
env = {
|
env = {
|
||||||
"WORKER_ID": worker.id,
|
"IN_YUNORUNNER": "1",
|
||||||
|
"WORKER_ID": str(worker.id),
|
||||||
"ARCH": app.config.ARCH,
|
"ARCH": app.config.ARCH,
|
||||||
"DIST": app.config.DIST,
|
"DIST": app.config.DIST,
|
||||||
"YNH_BRANCH": app.config.YNH_BRANCH,
|
"YNH_BRANCH": app.config.YNH_BRANCH,
|
||||||
|
@ -513,24 +530,29 @@ async def run_job(worker, job):
|
||||||
}
|
}
|
||||||
|
|
||||||
now = datetime.now().strftime("%d/%m/%Y - %H:%M:%S")
|
now = datetime.now().strftime("%d/%m/%Y - %H:%M:%S")
|
||||||
msg = now + " - Starting test for {job.name} on arch {app.config.ARCH}, distrib {app.config.DIST}, with YunoHost {app.config.YNH_BRANCH}"
|
msg = now + f" - Starting test for {job.name} on arch {app.config.ARCH}, distrib {app.config.DIST}, with YunoHost {app.config.YNH_BRANCH}"
|
||||||
job.log += "=" * len(msg) + "\n"
|
job.log += "=" * len(msg) + "\n"
|
||||||
job.log += msg + "\n"
|
job.log += msg + "\n"
|
||||||
job.log += "=" * len(msg) + "\n"
|
job.log += "=" * len(msg) + "\n"
|
||||||
job.save()
|
job.save()
|
||||||
|
await broadcast({
|
||||||
|
"action": "update_job",
|
||||||
|
"id": job.id,
|
||||||
|
"data": model_to_dict(job),
|
||||||
|
}, ["jobs", f"job-{job.id}", f"app-jobs-{job.url_or_path}"])
|
||||||
|
|
||||||
result_json = app.config.PACKAGE_CHECK_RESULT_JSON_PER_WORKER.format(worker_id=worker.id)
|
result_json = app.config.PACKAGE_CHECK_RESULT_JSON_PER_WORKER.format(worker_id=worker.id)
|
||||||
full_log = app.config.PACKAGE_CHECK_FULL_LOG_PER_WORKER.format(worker_id=worker.id)
|
full_log = app.config.PACKAGE_CHECK_FULL_LOG_PER_WORKER.format(worker_id=worker.id)
|
||||||
summary_png = app.config.PACKAGE_CHECK_SUMMARY_PNG_PER_WORKER.format(worker_id=worker.id)
|
summary_png = app.config.PACKAGE_CHECK_SUMMARY_PNG_PER_WORKER.format(worker_id=worker.id)
|
||||||
|
|
||||||
if os.exists(result_json):
|
if os.path.exists(result_json):
|
||||||
os.remove(result_json)
|
os.remove(result_json)
|
||||||
if os.exists(full_log):
|
if os.path.exists(full_log):
|
||||||
os.remove(full_log)
|
os.remove(full_log)
|
||||||
if os.exists(summary_png):
|
if os.path.exists(summary_png):
|
||||||
os.remove(summary_png)
|
os.remove(summary_png)
|
||||||
|
|
||||||
cmd = f"timeout {app.config.TIMEOUT} --signal=TERM nice --adjustment=10 /bin/bash {app.config.PACKAGE_CHECK_PATH} {job.url_or_path}"
|
cmd = f"timeout --signal TERM {app.config.TIMEOUT} nice --adjustment=10 script -qefc '/bin/bash {app.config.PACKAGE_CHECK_PATH} {job.url_or_path} 2>&1'"
|
||||||
task_logger.info(f"Launching command: {cmd}")
|
task_logger.info(f"Launching command: {cmd}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -585,16 +607,16 @@ async def run_job(worker, job):
|
||||||
job.log += f"\nJob failed ? Return code is {comman.returncode} / Or maybe the json result doesnt exist...\n"
|
job.log += f"\nJob failed ? Return code is {comman.returncode} / Or maybe the json result doesnt exist...\n"
|
||||||
job.state = "error"
|
job.state = "error"
|
||||||
else:
|
else:
|
||||||
job.log += f"\nPackage check completed"
|
job.log += f"\nPackage check completed\n"
|
||||||
results = json.load(open(result_json))
|
results = json.load(open(result_json))
|
||||||
level = results["level"]
|
level = results["level"]
|
||||||
job.state = "done" if level > 4 else "failure"
|
job.state = "done" if level > 4 else "failure"
|
||||||
|
|
||||||
log.log += f"\nThe full log is available at {app.config.BASE_URL}/logs/{job.id}.log\n"
|
job.log += f"\nThe full log is available at {app.config.BASE_URL}/logs/{job.id}.log\n"
|
||||||
|
|
||||||
os.copy(full_log, yunorunner_dir + f"/results/logs/{job.id}.log")
|
shutil.copy(full_log, yunorunner_dir + f"/results/logs/{job.id}.log")
|
||||||
os.copy(result_json, yunorunner_dir + f"/results/logs/{job_app}_{app.config.ARCH}_{app.config.YNH_BRANCH}_results.json")
|
shutil.copy(result_json, yunorunner_dir + f"/results/logs/{job_app}_{app.config.ARCH}_{app.config.YNH_BRANCH}_results.json")
|
||||||
os.copy(summary_png, yunorunner_dir + f"/results/summary/{job.id}.png")
|
shutil.copy(summary_png, yunorunner_dir + f"/results/summary/{job.id}.png")
|
||||||
|
|
||||||
public_result_json_path = yunorunner_dir + f"/results/logs/list_level_{app.config.YNH_BRANCH}_{app.config.ARCH}.json"
|
public_result_json_path = yunorunner_dir + f"/results/logs/list_level_{app.config.YNH_BRANCH}_{app.config.ARCH}.json"
|
||||||
if os.path.exists(public_result_json_path) or not open(public_result_json_path).read().strip():
|
if os.path.exists(public_result_json_path) or not open(public_result_json_path).read().strip():
|
||||||
|
@ -607,61 +629,75 @@ async def run_job(worker, job):
|
||||||
finally:
|
finally:
|
||||||
job.end_time = datetime.now()
|
job.end_time = datetime.now()
|
||||||
|
|
||||||
now = datetime.datetime.now().strftime("%d/%m/%Y - %H:%M:%S")
|
now = datetime.now().strftime("%d/%m/%Y - %H:%M:%S")
|
||||||
msg = now + " - Finished job for {job.name}"
|
msg = now + f" - Finished job for {job.name} ({job.state})"
|
||||||
job.log += "=" * len(msg) + "\n"
|
job.log += "=" * len(msg) + "\n"
|
||||||
job.log += msg + "\n"
|
job.log += msg + "\n"
|
||||||
job.log += "=" * len(msg) + "\n"
|
job.log += "=" * len(msg) + "\n"
|
||||||
|
|
||||||
job.save()
|
job.save()
|
||||||
|
await broadcast({
|
||||||
|
"action": "update_job",
|
||||||
|
"id": job.id,
|
||||||
|
"data": model_to_dict(job),
|
||||||
|
}, ["jobs", f"job-{job.id}", f"app-jobs-{job.url_or_path}"])
|
||||||
|
|
||||||
|
|
||||||
if "ci-apps.yunohost.org" in app.config.BASE_URL:
|
if "ci-apps.yunohost.org" in app.config.BASE_URL:
|
||||||
async with aiohttp.ClientSession() as session:
|
try:
|
||||||
async with session.get(APPS_LIST) as resp:
|
async with aiohttp.ClientSession() as session:
|
||||||
data = await resp.json()
|
async with session.get(APPS_LIST) as resp:
|
||||||
data = data["apps"]
|
data = await resp.json()
|
||||||
public_level = data.get(job_app, {}).get("level")
|
data = data["apps"]
|
||||||
|
public_level = data.get(job_app, {}).get("level")
|
||||||
|
|
||||||
job_url = app.config.BASE_URL + "/job/" + job.id
|
job_url = app.config.BASE_URL + "/job/" + job.id
|
||||||
job_id_with_url = f"[#{job.id}]({job_url})"
|
job_id_with_url = f"[#{job.id}]({job_url})"
|
||||||
if job.state == "error":
|
if job.state == "error":
|
||||||
msg = f"Job {job_id_with_url} for {job_app} failed miserably :("
|
msg = f"Job {job_id_with_url} for {job_app} failed miserably :("
|
||||||
elif not level == 0:
|
elif not level == 0:
|
||||||
msg = f"App {job_app} failed all tests in job {job_id_with_url} :("
|
msg = f"App {job_app} failed all tests in job {job_id_with_url} :("
|
||||||
elif public_level is None:
|
elif public_level is None:
|
||||||
msg = f"App {job_app} rises from level (unknown) to {level} in job {job_id_with_url} !"
|
msg = f"App {job_app} rises from level (unknown) to {level} in job {job_id_with_url} !"
|
||||||
elif level > public_level:
|
elif level > public_level:
|
||||||
msg = f"App {job_app} rises from level {public_level} to {level} in job {job_id_with_url} !"
|
msg = f"App {job_app} rises from level {public_level} to {level} in job {job_id_with_url} !"
|
||||||
elif level < public_level:
|
elif level < public_level:
|
||||||
msg = f"App {job_app} goes down from level {public_level} to {level} in job {job_id_with_url}"
|
msg = f"App {job_app} goes down from level {public_level} to {level} in job {job_id_with_url}"
|
||||||
elif level < 6:
|
elif level < 6:
|
||||||
msg = f"App {job_app} stays at level {level} in job {job_id_with_url}"
|
msg = f"App {job_app} stays at level {level} in job {job_id_with_url}"
|
||||||
else:
|
else:
|
||||||
# Dont notify anything, reduce CI flood on app chatroom if app is already level 6+
|
# Dont notify anything, reduce CI flood on app chatroom if app is already level 6+
|
||||||
msg = ""
|
msg = ""
|
||||||
|
|
||||||
if msg:
|
if msg:
|
||||||
cmd = f"{yunorunner_dir}/maintenance/chat_notify.sh '{msg}'"
|
cmd = f"{yunorunner_dir}/maintenance/chat_notify.sh '{msg}'"
|
||||||
try:
|
try:
|
||||||
command = await asyncio.create_subprocess_shell(cmd)
|
command = await asyncio.create_subprocess_shell(cmd)
|
||||||
while not command.stdout.at_eof():
|
while not command.stdout.at_eof():
|
||||||
await asyncio.sleep(1)
|
await asyncio.sleep(1)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
|
task_logger.exception(f"ERROR in job '{job.name} #{job.id}'")
|
||||||
|
|
||||||
|
job.log += "\n"
|
||||||
|
job.log += "Exception:\n"
|
||||||
|
job.log += traceback.format_exc()
|
||||||
|
|
||||||
await cleanup_old_package_check_if_lock_exists(worker, job, ignore_error=True)
|
await cleanup_old_package_check_if_lock_exists(worker, job, ignore_error=True)
|
||||||
|
|
||||||
# remove ourself from the state
|
# remove ourself from the state
|
||||||
del jobs_in_memory_state[job.id]
|
del jobs_in_memory_state[job.id]
|
||||||
|
|
||||||
worker.state = "available"
|
worker.state = "available"
|
||||||
worker.save()
|
worker.save()
|
||||||
|
|
||||||
await broadcast({
|
await broadcast({
|
||||||
"action": "update_job",
|
"action": "update_job",
|
||||||
"id": job.id,
|
"id": job.id,
|
||||||
"data": model_to_dict(job),
|
"data": model_to_dict(job),
|
||||||
}, ["jobs", f"job-{job.id}", f"app-jobs-{job.url_or_path}"])
|
}, ["jobs", f"job-{job.id}", f"app-jobs-{job.url_or_path}"])
|
||||||
|
|
||||||
|
|
||||||
async def broadcast(message, channels):
|
async def broadcast(message, channels):
|
||||||
|
@ -1030,6 +1066,7 @@ async def stop_job(job_id):
|
||||||
|
|
||||||
if job.state == "running":
|
if job.state == "running":
|
||||||
api_logger.info(f"Cancel running job '{job.name}' [job.id] on request")
|
api_logger.info(f"Cancel running job '{job.name}' [job.id] on request")
|
||||||
|
|
||||||
job.state = "canceled"
|
job.state = "canceled"
|
||||||
job.end_time = datetime.now()
|
job.end_time = datetime.now()
|
||||||
job.save()
|
job.save()
|
||||||
|
@ -1037,6 +1074,7 @@ async def stop_job(job_id):
|
||||||
jobs_in_memory_state[job.id]["task"].cancel()
|
jobs_in_memory_state[job.id]["task"].cancel()
|
||||||
|
|
||||||
worker = Worker.select().where(Worker.id == jobs_in_memory_state[job.id]["worker"])[0]
|
worker = Worker.select().where(Worker.id == jobs_in_memory_state[job.id]["worker"])[0]
|
||||||
|
|
||||||
worker.state = "available"
|
worker.state = "available"
|
||||||
worker.save()
|
worker.save()
|
||||||
|
|
||||||
|
@ -1045,6 +1083,8 @@ async def stop_job(job_id):
|
||||||
"data": model_to_dict(job),
|
"data": model_to_dict(job),
|
||||||
}, ["jobs", f"job-{job.id}", f"app-jobs-{job.url_or_path}"])
|
}, ["jobs", f"job-{job.id}", f"app-jobs-{job.url_or_path}"])
|
||||||
|
|
||||||
|
await cleanup_old_package_check_if_lock_exists(worker, job, ignore_error=True)
|
||||||
|
|
||||||
return response.text("ok")
|
return response.text("ok")
|
||||||
|
|
||||||
if job.state in ("done", "canceled", "failure", "error"):
|
if job.state in ("done", "canceled", "failure", "error"):
|
||||||
|
@ -1061,7 +1101,7 @@ async def stop_job(job_id):
|
||||||
@app.route("/api/job/<job_id:int>/stop", methods=['POST'])
|
@app.route("/api/job/<job_id:int>/stop", methods=['POST'])
|
||||||
async def api_stop_job(request, job_id):
|
async def api_stop_job(request, job_id):
|
||||||
# TODO auth or some kind
|
# TODO auth or some kind
|
||||||
await stop_job(job_id)
|
return await stop_job(job_id)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/job/<job_id:int>/restart", methods=['POST'])
|
@app.route("/api/job/<job_id:int>/restart", methods=['POST'])
|
||||||
|
@ -1185,7 +1225,7 @@ async def html_index(request):
|
||||||
return {'relative_path_to_root': '', 'path': request.path}
|
return {'relative_path_to_root': '', 'path': request.path}
|
||||||
|
|
||||||
|
|
||||||
@always_relaunch(sleep=2)
|
@always_relaunch(sleep=10)
|
||||||
async def number_of_tasks():
|
async def number_of_tasks():
|
||||||
print("Number of tasks: %s" % len(asyncio_all_tasks()))
|
print("Number of tasks: %s" % len(asyncio_all_tasks()))
|
||||||
|
|
||||||
|
@ -1201,7 +1241,7 @@ async def monitor(request):
|
||||||
"top_20_trace": [str(x) for x in top_stats[:20]],
|
"top_20_trace": [str(x) for x in top_stats[:20]],
|
||||||
"tasks": {
|
"tasks": {
|
||||||
"number": len(tasks),
|
"number": len(tasks),
|
||||||
"array": map(show_coro, tasks),
|
"array": [show_coro(t) for t in tasks],
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -1416,7 +1456,7 @@ def main(config="./config.py"):
|
||||||
"WORKER_COUNT": 1,
|
"WORKER_COUNT": 1,
|
||||||
"ARCH": "amd64",
|
"ARCH": "amd64",
|
||||||
"DIST": "bullseye",
|
"DIST": "bullseye",
|
||||||
"PACKAGE_CHECK_DIR": "./package_check/",
|
"PACKAGE_CHECK_DIR": yunorunner_dir + "/package_check/",
|
||||||
"WEBHOOK_TRIGGERS": ["!testme", "!gogogadgetoci", "By the power of systemd, I invoke The Great App CI to test this Pull Request!"],
|
"WEBHOOK_TRIGGERS": ["!testme", "!gogogadgetoci", "By the power of systemd, I invoke The Great App CI to test this Pull Request!"],
|
||||||
"WEBHOOK_CATCHPHRASES": ["Alrighty!", "Fingers crossed!", "May the CI gods be with you!", ":carousel_horse:", ":rocket:", ":sunflower:", "Meow :cat2:", ":v:", ":stuck_out_tongue_winking_eye:"],
|
"WEBHOOK_CATCHPHRASES": ["Alrighty!", "Fingers crossed!", "May the CI gods be with you!", ":carousel_horse:", ":rocket:", ":sunflower:", "Meow :cat2:", ":v:", ":stuck_out_tongue_winking_eye:"],
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue