From a97d0bdc01e1c9601794fb05b7c2fdf2914dfd56 Mon Sep 17 00:00:00 2001 From: selfhoster1312 Date: Sun, 25 Aug 2024 18:30:32 +0200 Subject: [PATCH 1/2] Fail startup/network timeout earlier. Exit whole test suite. --- lib/lxc.sh | 100 +++++++++++++++++++++-------------------------------- 1 file changed, 39 insertions(+), 61 deletions(-) diff --git a/lib/lxc.sh b/lib/lxc.sh index 597fadf..41876af 100644 --- a/lib/lxc.sh +++ b/lib/lxc.sh @@ -52,7 +52,13 @@ LXC_CREATE () { log_critical "Failed to create the new LXC :/" fi - _LXC_START_AND_WAIT $LXC_NAME + # The first time around when we create the VM, if it fails, we want to abort early + # instead of keeping the timeout game until the end of times + if ! _LXC_START_AND_WAIT $LXC_NAME; then + log_error "Fatal error creating VM. See logs above." + exit 1 + fi + sleep 3 if ! $lxc exec $LXC_NAME -- test -e /etc/yunohost @@ -215,70 +221,42 @@ LXC_RESET () { _LXC_START_AND_WAIT() { + # Try to start the container - restart_container() - { - LXC_STOP $1 - $lxc start "$1" - } + # Wait for container to start, we are using systemd to check this, + # for the sake of brevity. + for j in $(seq 1 5); do + log_debug "Start VM attempt $j" + if timeout -k 10 4 $lxc exec "$1" -- timeout 4 systemctl isolate multi-user.target >/dev/null 2>/dev/null; then + break + fi - # Try to start the container 3 times. - local max_try=3 - local i=0 - while [ $i -lt $max_try ] - do - i=$(( i +1 )) - local failstart=0 - - # Wait for container to start, we are using systemd to check this, - # for the sake of brevity. - for j in $(seq 1 10); do - if $lxc exec "$1" -- timeout 30 systemctl isolate multi-user.target >/dev/null 2>/dev/null; then - break - fi - - if [ "$j" == "10" ]; then - log_debug 'Failed to start the container ... restarting ...' - failstart=1 - - restart_container "$1" - fi - - sleep 1s - done - - # Wait for container to access the internet - for j in $(seq 1 10); do - if $lxc exec "$1" -- timeout 10 curl -s http://wikipedia.org > /dev/null 2>/dev/null; then - break - fi - - if [ "$j" == "10" ]; then - log_debug 'Failed to access the internet ... restarting' - failstart=1 - - restart_container "$1" - fi - - sleep 1s - done - - # Has started and has access to the internet - if [ $failstart -eq 0 ] - then - break - fi - - # Fail if the container failed to start - if [ $i -eq $max_try ] && [ $failstart -eq 1 ] - then - log_error "The container miserably failed to start or to connect to the internet" + if [ "$j" == "5" ]; then + log_error 'Failed to start the container.' $lxc info --show-log $1 - return 1 - fi - done + return 1 + fi + done - sleep 3 + log_info "Container started successfully. Now waiting for internet access." + + # Wait for container to access the internet + for j in $(seq 1 5); do + log_debug "Connect VM internet attempt $j" + # Note: Sometimes this uses X00% CPU and never times out, so we use timeout SIGKILL + if timeout -k 10 4 $lxc exec "$1" -- timeout 4 curl -s http://wikipedia.org > /dev/null 2>/dev/null; then + break + fi + + if [ "$j" == "5" ]; then + log_error 'Failed to access the internet' + $lxc info --show-log $1 + return 1 + fi + + sleep 1s + done + log_debug "Container fully started" LXC_IP=$($lxc exec $1 -- hostname -I | cut -d' ' -f1 | grep -E -o "\<[0-9.]{8,}\>") } From 7e893baef457c82d2efcfb8000c9dd10f8645d6e Mon Sep 17 00:00:00 2001 From: Alexandre Aubin <4533074+alexAubin@users.noreply.github.com> Date: Fri, 30 Aug 2024 16:38:47 +0200 Subject: [PATCH 2/2] Misc wording / log helper usage --- lib/lxc.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/lxc.sh b/lib/lxc.sh index 41876af..0edf91c 100644 --- a/lib/lxc.sh +++ b/lib/lxc.sh @@ -55,8 +55,7 @@ LXC_CREATE () { # The first time around when we create the VM, if it fails, we want to abort early # instead of keeping the timeout game until the end of times if ! _LXC_START_AND_WAIT $LXC_NAME; then - log_error "Fatal error creating VM. See logs above." - exit 1 + log_critical "Fatal error starting container. See logs above." fi sleep 3 @@ -226,7 +225,7 @@ _LXC_START_AND_WAIT() { # Wait for container to start, we are using systemd to check this, # for the sake of brevity. for j in $(seq 1 5); do - log_debug "Start VM attempt $j" + log_debug "Start container attempt $j" if timeout -k 10 4 $lxc exec "$1" -- timeout 4 systemctl isolate multi-user.target >/dev/null 2>/dev/null; then break fi @@ -242,7 +241,7 @@ _LXC_START_AND_WAIT() { # Wait for container to access the internet for j in $(seq 1 5); do - log_debug "Connect VM internet attempt $j" + log_debug "Connect container internet attempt $j" # Note: Sometimes this uses X00% CPU and never times out, so we use timeout SIGKILL if timeout -k 10 4 $lxc exec "$1" -- timeout 4 curl -s http://wikipedia.org > /dev/null 2>/dev/null; then break