diff --git a/lib/lxc.sh b/lib/lxc.sh index b33f3e2..bb5a8a8 100644 --- a/lib/lxc.sh +++ b/lib/lxc.sh @@ -57,12 +57,7 @@ LXC_CREATE () { log_critical "Failed to create the new LXC :/" fi - # The first time around when we create the VM, if it fails, we want to abort early - # instead of keeping the timeout game until the end of times - if ! _LXC_START_AND_WAIT $LXC_NAME; then - log_critical "Fatal error starting container. See logs above." - fi - + _LXC_START_AND_WAIT $LXC_NAME sleep 3 if ! $lxc exec $LXC_NAME -- test -e /etc/yunohost @@ -225,42 +220,70 @@ LXC_RESET () { _LXC_START_AND_WAIT() { - # Try to start the container - # Wait for container to start, we are using systemd to check this, - # for the sake of brevity. - for j in $(seq 1 5); do - log_debug "Start container attempt $j" - if timeout -k 10 4 $lxc exec "$1" -- timeout 4 systemctl isolate multi-user.target >/dev/null 2>/dev/null; then - break - fi + restart_container() + { + LXC_STOP $1 + $lxc start "$1" + } - if [ "$j" == "5" ]; then - log_error 'Failed to start the container.' + # Try to start the container 3 times. + local max_try=3 + local i=0 + while [ $i -lt $max_try ] + do + i=$(( i +1 )) + local failstart=0 + + # Wait for container to start, we are using systemd to check this, + # for the sake of brevity. + for j in $(seq 1 10); do + if $lxc exec "$1" -- timeout 30 systemctl isolate multi-user.target >/dev/null 2>/dev/null; then + break + fi + + if [ "$j" == "10" ]; then + log_debug 'Failed to start the container ... restarting ...' + failstart=1 + + restart_container "$1" + fi + + sleep 1s + done + + # Wait for container to access the internet + for j in $(seq 1 10); do + if $lxc exec "$1" -- timeout 10 curl -s http://wikipedia.org > /dev/null 2>/dev/null; then + break + fi + + if [ "$j" == "10" ]; then + log_debug 'Failed to access the internet ... restarting' + failstart=1 + + restart_container "$1" + fi + + sleep 1s + done + + # Has started and has access to the internet + if [ $failstart -eq 0 ] + then + break + fi + + # Fail if the container failed to start + if [ $i -eq $max_try ] && [ $failstart -eq 1 ] + then + log_error "The container miserably failed to start or to connect to the internet" $lxc info --show-log $1 - return 1 - fi - done + return 1 + fi + done - log_info "Container started successfully. Now waiting for internet access." - - # Wait for container to access the internet - for j in $(seq 1 5); do - log_debug "Connect container internet attempt $j" - # Note: Sometimes this uses X00% CPU and never times out, so we use timeout SIGKILL - if timeout -k 10 4 $lxc exec "$1" -- timeout 4 curl -s http://wikipedia.org > /dev/null 2>/dev/null; then - break - fi - - if [ "$j" == "5" ]; then - log_error 'Failed to access the internet' - $lxc info --show-log $1 - return 1 - fi - - sleep 1s - done - log_debug "Container fully started" + sleep 3 LXC_IP=$($lxc exec $1 -- hostname -I | cut -d' ' -f1 | grep -E -o "\<[0-9.]{8,}\>") }