diff --git a/sub_scripts/launcher.sh b/sub_scripts/launcher.sh index 1341375..d8f922a 100755 --- a/sub_scripts/launcher.sh +++ b/sub_scripts/launcher.sh @@ -152,8 +152,9 @@ LXC_START () { # Try to start the container 3 times. local max_try=3 local i=0 - for i in `seq 1 $max_try` + while [ $i -lt $max_try ] do + i=$(( $i +1 )) # Start the container and log the booting process in $script_dir/lxc_boot.log # Try to start only if the container is not already started if ! is_lxc_running; then @@ -165,13 +166,13 @@ LXC_START () { local avoid_witness=1 fi - # Check during 20 seconds if the container has finished to start. + # Try to connect 5 times local j=0 - for j in `seq 1 20` + for j in `seq 1 5` do echo -n . # Try to connect with ssh to check if the container is ready to work. - if ssh $arg_ssh $lxc_name "exit 0" > /dev/null 2>&1; then + if ssh $arg_ssh -o ConnectTimeout=10 $lxc_name "exit 0" > /dev/null 2>&1; then # Break the for loop if the container is ready. break fi @@ -188,7 +189,7 @@ LXC_START () { ECHO_FORMAT "Rebooting the container...\n" "red" "bold" fi LXC_STOP # Stop the LXC container - elif ! ssh $arg_ssh $lxc_name "sudo ping -q -c 2 security.debian.org > /dev/null 2>&1; exit \$?" >> "$test_result" 2>&1 + elif ! ssh $arg_ssh -o ConnectTimeout=60 $lxc_name "sudo ping -q -c 2 security.debian.org > /dev/null 2>&1; exit \$?" >> "$test_result" 2>&1 then # Try to ping security.debian.org to check the connectivity from the container ECHO_FORMAT "The container failed to connect to internet...\n" "red" "bold" @@ -208,13 +209,66 @@ LXC_START () { # Fail if the container failed to start if [ $i -eq $max_try ] && [ $failstart -eq 1 ] then - ECHO_FORMAT "The container failed to start $max_try times...\nIf this problem is persistent, try to fix it with lxc_check.sh.\n" "red" "bold" + send_email () { + # Send an email only if it's a CI environment + if [ $type_exec_env -ne 0 ] + then + ci_path=$(grep "CI_URL=" "$script_dir/../config" | cut -d= -f2) + local subject="[YunoHost] Container in trouble on $ci_path." + local message="The container failed to start $max_try times on $ci_path. +$lxc_check_result + +Please have a look to the log of lxc_check: +$(cat "$script_dir/lxc_check.log")" + if [ $lxc_check -eq 2 ]; then + # Add the log of lxc_build + message="$message + +Here the log of lxc_build: +$(cat "$script_dir/sub_scripts/Build_lxc.log")" + fi + + dest=$(grep 'dest=' "$script_dir/../config" | cut -d= -f2) + mail -s "$subject" "$dest" <<< "$message" + fi + } + + ECHO_FORMAT "The container failed to start $max_try times...\n" "red" "bold" ECHO_FORMAT "Boot log:\n" clog cat "$script_dir/lxc_boot.log" | tee --append "$test_result" - stop_timer 1 - return 1 + ECHO_FORMAT "lxc_check will try to fix the container...\n" "red" "bold" + $script_dir/sub_scripts/lxc_check.sh --no-lock | tee "$script_dir/lxc_check.log" + # PIPESTATUS is an array with the exit code of each command followed by a pipe + local lxc_check=${PIPESTATUS[0]} + LXC_INIT + if [ $lxc_check -eq 0 ]; then + local lxc_check_result="The container seems to be ok, according to lxc_check." + ECHO_FORMAT "$lxc_check_result\n" "lgreen" "bold" + send_email + i=0 + elif [ $lxc_check -eq 1 ]; then + local lxc_check_result="An error has happened with the host. Please check the configuration." + ECHO_FORMAT "$lxc_check_result\n" "red" "bold" + send_email + stop_timer 1 + return 1 + elif [ $lxc_check -eq 2 ]; then + local lxc_check_result="The container is broken, it will be rebuilt." + ECHO_FORMAT "$lxc_check_result\n" "red" "bold" + $script_dir/sub_scripts/lxc_build.sh + LXC_INIT + send_email + i=0 + elif [ $lxc_check -eq 3 ]; then + local lxc_check_result="The container has been fixed by lxc_check." + ECHO_FORMAT "$lxc_check_result\n" "lgreen" "bold" + send_email + i=0 + fi fi done + stop_timer 1 + start_timer # Count the number of lines of the current yunohost log file. COPY_LOG 1 diff --git a/sub_scripts/lxc_check.sh b/sub_scripts/lxc_check.sh index 4073e1e..7714a26 100755 --- a/sub_scripts/lxc_check.sh +++ b/sub_scripts/lxc_check.sh @@ -6,6 +6,11 @@ # Récupère le dossier du script if [ "${0:0:1}" == "/" ]; then script_dir="$(dirname "$0")"; else script_dir="$(echo $PWD/$(dirname "$0" | cut -d '.' -f2) | sed 's@/$@@')"; fi +no_lock=0 +if [ "$1" == "--no-lock" ]; then + no_lock=1 +fi + ARG_SSH="-t" # Récupère les informations depuis le fichier de conf (Ou le complète le cas échéant) pcheck_config="$script_dir/../config" @@ -20,6 +25,34 @@ then main_iface=$(cat "$pcheck_config" | grep iface= | cut -d '=' -f2) fi +# Exit with the correct exit code +remove_lock () { + if [ $no_lock -eq 1 ] + then + sudo rm -f "$script_dir/../pcheck.lock" + fi +} + +exit_failure () { + remove_lock + exit 1 +} + +exit_rebuild () { + remove_lock + exit 2 +} + +exit_retry () { + remove_lock + exit 3 +} + +exit_sane () { + remove_lock + exit 0 +} + # Use the default value and set it in the config file replace_default_value () { CONFIG_KEY=$1 @@ -54,7 +87,7 @@ if [ -z "$main_iface" ]; then main_iface=$(sudo ip route | grep default | awk '{print $5;}') # Prend l'interface réseau défini par default if [ -z $main_iface ]; then echo -e "\e[91mImpossible de déterminer le nom de l'interface réseau de l'hôte.\e[0m" - exit 1 + exit_failure fi # Store the main iface in the config file if grep -q iface= "$pcheck_config" @@ -157,8 +190,7 @@ RESTORE_CONTAINER () { # Résultats finaux if [ $START_STATUS -eq 1 ]; then echo -e "\e[91m\n> Le conteneur $LXC_NAME1 n'a pas pu être réparé...\nIl est nécessaire de détruire et de reconstruire le conteneur.\e[0m" - sudo rm "$script_dir/../pcheck.lock" # Retire le lock - exit 1 + exit_rebuild else echo -e "\e[92m\n> Le conteneur démarre correctement.\e[0m" fi @@ -240,7 +272,9 @@ LXC_NETWORK_CONFIG () { fi } -touch "$script_dir/../pcheck.lock" # Met en place le lock de Package check +if [ $no_lock -eq 0 ]; then + touch "$script_dir/../pcheck.lock" # Met en place le lock de Package check +fi STOP_CONTAINER STOP_NETWORK @@ -250,12 +284,12 @@ check_repair=0 echo -e "\e[1m> Test de la configuration réseau du côté de l'hôte:\e[0m" CREATE_BRIDGE () { echo | sudo tee /etc/network/interfaces.d/$LXC_BRIDGE < /dev/null 2>&1 if [ "$?" -ne 0 ]; then # En cas de nouvel échec de connexion. On considère que la connexion est down... echo -e "\e[91mL'hôte semble ne pas avoir accès à internet. La connexion internet est indispensable.\e[0m" - sudo rm "$script_dir/../pcheck.lock" # Retire le lock - exit 1 + exit_failure fi fi echo -e "\e[92mL'hôte dispose d'un accès à internet.\e[0m" @@ -384,8 +414,7 @@ do if [ "$lxc_net_check" -eq 4 ] then echo -e "\e[91mImpossible de rétablir la connexion internet du conteneur.\e[0m" - sudo rm "$script_dir/../pcheck.lock" # Retire le lock - exit 1 + exit_rebuild fi echo -e "\e[91mLe conteneur LXC n'accède pas à internet...\e[0m" check_repair=1 @@ -446,8 +475,7 @@ echo -e "\e[1m\n> Test de l'accès ssh:\e[0m" # Check user if [ "$(whoami)" != "$(cat "$script_dir/setup_user")" ] && test -e "$script_dir/setup_user"; then echo -e "\e[91mPour tester l'accès ssh, le script doit être exécuté avec l'utilisateur $(cat "$script_dir/setup_user") !\nL'utilisateur actuel est $(whoami).\e[0m" - sudo rm "$script_dir/../pcheck.lock" # Retire le lock - exit 1 + exit_failure fi sudo lxc-ls -f @@ -496,8 +524,7 @@ echo -e "\e[1m\n> Vérifie que Yunohost est installé dans le conteneur:\e[0m" sudo lxc-attach -n $LXC_NAME -- sudo yunohost -v if [ "$?" -ne 0 ]; then # Si la commande échoue, il y a un problème avec Yunohost echo -e "\e[91mYunohost semble mal installé. Il est nécessaire de détruire et de reconstruire le conteneur.\e[0m" - sudo rm "$script_dir/../pcheck.lock" # Retire le lock - exit 1 + exit_rebuild else echo -e "\e[92mYunohost est installé correctement.\e[0m" fi @@ -508,6 +535,7 @@ STOP_NETWORK echo -e "\e[92m\nLe conteneur ne présente aucune erreur.\e[0m" if [ "$check_repair" -eq 1 ]; then echo -e "\e[91mMais des réparations ont été nécessaires. Refaire un test pour s'assurer que tout est correct...\e[0m" + exit_retry fi -sudo rm "$script_dir/../pcheck.lock" # Retire le lock +exit_sane