mirror of
https://github.com/YunoHost-Apps/flohmarkt_ynh.git
synced 2024-09-03 18:36:30 +02:00
780 lines
31 KiB
Bash
Executable file
780 lines
31 KiB
Bash
Executable file
#!/bin/bash
|
|
##
|
|
# AUTHOR: DANIELE BAILO
|
|
# https://github.com/danielebailo
|
|
# www.danielebailo.it
|
|
#
|
|
# Contributors:
|
|
# * dalgibbard - http://github.com/dalgibbard
|
|
# * epos-eu - http://github.com/epos-eu
|
|
# * maximilianhuber - http://github.com/maximilianhuber
|
|
# * ahodgkinson - http://github.com/ahodgkinson (quiet-mode, timestamp, compress)
|
|
##
|
|
|
|
## This script allow for the Backup and Restore of a CouchDB Database.
|
|
## Backups are produced in a format that can be later uploaded with the bulk docs directive (as used by this script)
|
|
|
|
## USAGE
|
|
## * To Backup:
|
|
## ** example: ./couchdb-dump.sh -b -H 127.0.0.1 -d mydb -u admin -p password -f mydb.json
|
|
## * To Restore:
|
|
## ** example: ./couchdb-dump.sh -r -H 127.0.0.1 -d mydb -u admin -p password -f mydb.json
|
|
|
|
|
|
###################### CODE STARTS HERE ###################
|
|
scriptversionnumber="1.1.10"
|
|
|
|
##START: FUNCTIONS
|
|
usage(){
|
|
echo
|
|
echo "Usage: $0 [-b|-r] -H <COUCHDB_HOST> -d <DB_NAME> -f <BACKUP_FILE> [-u <username>] [-p <password>] [-P <port>] [-l <lines>] [-t <threads>] [-a <import_attempts>]"
|
|
echo -e "\t-b Run script in BACKUP mode."
|
|
echo -e "\t-r Run script in RESTORE mode."
|
|
echo -e "\t-H CouchDB Hostname or IP. Can be provided with or without 'http(s)://'"
|
|
echo -e "\t-d CouchDB Database name to backup/restore."
|
|
echo -e "\t-f File to Backup-to/Restore-from."
|
|
echo -e "\t-P Provide a port number for CouchDB [Default: 5984]"
|
|
echo -e "\t-u Provide a username for auth against CouchDB [Default: blank]"
|
|
echo -e "\t -- can also set with 'COUCHDB_USER' environment var"
|
|
echo -e "\t-p Provide a password for auth against CouchDB [Default: blank]"
|
|
echo -e "\t -- can also set with 'COUCHDB_PASS' environment var"
|
|
echo -e "\t-l Number of lines (documents) to Restore at a time. [Default: 5000] (Restore Only)"
|
|
echo -e "\t-t Number of CPU threads to use when parsing data [Default: nProcs-1] (Backup Only)"
|
|
echo -e "\t-a Number of times to Attempt import before failing [Default: 3] (Restore Only)"
|
|
echo -e "\t-c Create DB on demand, if they are not listed."
|
|
echo -e "\t-q Run in quiet mode. Suppress output, except for errors and warnings."
|
|
echo -e "\t-z Compress output file (Backup Only)"
|
|
echo -e "\t-T Add datetime stamp to output file name (Backup Only)"
|
|
echo -e "\t-V Display version information."
|
|
echo -e "\t-h Display usage information."
|
|
echo
|
|
echo "Example: $0 -b -H 127.0.0.1 -d mydb -f dumpedDB.json -u admin -p password"
|
|
echo
|
|
exit 1
|
|
}
|
|
|
|
scriptversion(){
|
|
echo
|
|
echo -e "\t** couchdb-dump version: $scriptversionnumber **"
|
|
echo
|
|
echo -e "\t URL:\thttps://github.com/danielebailo/couchdb-dump"
|
|
echo
|
|
echo -e "\t Authors:"
|
|
echo -e "\t Daniele Bailo (bailo.daniele@gmail.com)"
|
|
echo -e "\t Darren Gibbard (dalgibbard@gmail.com)"
|
|
echo -e "\t Maximilian Huber (maximilian.huber@tngtech.com)"
|
|
echo
|
|
exit 1
|
|
}
|
|
|
|
checkdiskspace(){
|
|
## This function checks available diskspace for a required path, vs space required
|
|
## Example call: checkdiskspace /path/to/file/to/create 1024
|
|
location=$1
|
|
KBrequired=$2
|
|
if [ "x$location" = "x" ]||[ "x$KBrequired" = "x" ]; then
|
|
echo "... ERROR: checkdiskspace() was not passed the correct arguments."
|
|
exit 1
|
|
fi
|
|
|
|
stripdir=${location%/*}
|
|
KBavail=$(df -P -k ${stripdir} | tail -n 1 | awk '{print$4}' | $sed_cmd -e 's/K$//')
|
|
|
|
if [ $KBavail -ge $KBrequired ]; then
|
|
return 0
|
|
else
|
|
echo
|
|
echo "... ERROR: Insufficient Disk Space Available:"
|
|
echo " * Full Path: ${location}"
|
|
echo " * Affected Directory: ${stripdir}"
|
|
echo " * Space Available: ${KBavail} KB"
|
|
echo " * Total Space Required: ${KBrequired} KB"
|
|
echo " * Additional Space Req: $(expr $KBrequired - $KBavail) KB"
|
|
echo
|
|
exit 1
|
|
fi
|
|
}
|
|
## END FUNCTIONS
|
|
|
|
# Catch no args:
|
|
if [ "x$1" = "x" ]; then
|
|
usage
|
|
fi
|
|
|
|
# Default Args
|
|
username=""
|
|
password=""
|
|
backup=false
|
|
restore=false
|
|
port=5984
|
|
OPTIND=1
|
|
lines=5000
|
|
attempts=3
|
|
createDBsOnDemand=false
|
|
verboseMode=true
|
|
compress=false
|
|
timestamp=false
|
|
|
|
while getopts ":h?H:d:f:u:p:P:l:t:a:c?q?z?T?V?b?B?r?R?" opt; do
|
|
case "$opt" in
|
|
h) usage;;
|
|
b|B) backup=true ;;
|
|
r|R) restore=true ;;
|
|
H) url="$OPTARG" ;;
|
|
d) db_name="$OPTARG" ;;
|
|
f) file_name="$OPTARG" ;;
|
|
u) username="${OPTARG}";;
|
|
p) password="${OPTARG}";;
|
|
P) port="${OPTARG}";;
|
|
l) lines="${OPTARG}" ;;
|
|
t) threads="${OPTARG}" ;;
|
|
a) attempts="${OPTARG}";;
|
|
c) createDBsOnDemand=true;;
|
|
q) verboseMode=false;;
|
|
z) compress=true;;
|
|
T) timestamp=true;;
|
|
V) scriptversion;;
|
|
:) echo "... ERROR: Option \"-${OPTARG}\" requires an argument"; usage ;;
|
|
*|\?) echo "... ERROR: Unknown Option \"-${OPTARG}\""; usage;;
|
|
esac
|
|
done
|
|
|
|
# If quiet option: Setup echo mode and curl '--silent' opt
|
|
if [ "$verboseMode" = true ]; then
|
|
curlSilentOpt=""
|
|
echoVerbose=true
|
|
else
|
|
curlSilentOpt="--silent"
|
|
echoVerbose=false
|
|
fi
|
|
|
|
# Trap unexpected extra args
|
|
shift $((OPTIND-1))
|
|
[ "$1" = "--" ] && shift
|
|
if [ ! "x$@" = "x" ]; then
|
|
echo "... ERROR: Unknown Option \"$@\""
|
|
usage
|
|
fi
|
|
|
|
# Handle invalid backup/restore states:
|
|
if [ $backup = true ]&&[ $restore = true ]; then
|
|
echo "... ERROR: Cannot pass both '-b' and '-r'"
|
|
usage
|
|
elif [ $backup = false ]&&[ $restore = false ]; then
|
|
echo "... ERROR: Missing argument '-b' (Backup), or '-r' (Restore)"
|
|
usage
|
|
fi
|
|
# Handle empty args
|
|
# url
|
|
if [ "x$url" = "x" ]; then
|
|
echo "... ERROR: Missing argument '-H <COUCHDB_HOST>'"
|
|
usage
|
|
fi
|
|
# db_name
|
|
if [ "x$db_name" = "x" ]; then
|
|
echo "... ERROR: Missing argument '-d <DB_NAME>'"
|
|
usage
|
|
fi
|
|
# file_name
|
|
if [ "x$file_name" = "x" ]; then
|
|
echo "... ERROR: Missing argument '-f <FILENAME>'"
|
|
usage
|
|
fi
|
|
file_name_orig=$file_name
|
|
|
|
# Get OS TYPE (Linux for Linux, Darwin for MacOSX)
|
|
os_type=`uname -s`
|
|
|
|
# Pick sed or gsed
|
|
if [ "$os_type" = "FreeBSD" ]||[ "$os_type" = "Darwin" ]; then
|
|
sed_cmd="gsed";
|
|
else
|
|
sed_cmd="sed";
|
|
fi
|
|
## Make sure it's installed
|
|
echo | $sed_cmd 's/a//' >/dev/null 2>&1
|
|
if [ ! $? = 0 ]; then
|
|
echo "... ERROR: please install $sed_cmd (gnu-sed) and ensure it is in your path"
|
|
exit 1
|
|
fi
|
|
|
|
# Validate thread count
|
|
## If we're on a Mac, use sysctl
|
|
if [ "$os_type" = "Darwin" ]; then
|
|
cores=`sysctl -n hw.ncpu`
|
|
## If we're on FreeBSD, use sysctl
|
|
elif [ "$os_type" = "FreeBSD" ]; then
|
|
cores=`sysctl kern.smp.cpus | awk -F ": " '{print $2}'`;
|
|
## Check if nproc available- set cores=1 if not
|
|
elif ! type nproc >/dev/null; then
|
|
cores=1
|
|
## Otherwise use nproc
|
|
else
|
|
cores=`nproc`
|
|
fi
|
|
if [ ! "x$threads" = "x" ]; then
|
|
if [ $threads -gt $cores ]; then
|
|
echo "... WARN: Thread setting of $threads is more than CPU count. Setting to $cores"
|
|
threads=$cores
|
|
else
|
|
$echoVerbose && echo "... INFO: Setting parser threads to $threads"
|
|
fi
|
|
else
|
|
threads=`expr $cores - 1`
|
|
fi
|
|
|
|
# Validate Attempts, set to no-retry if zero/invalid.
|
|
case $attempts in
|
|
''|0|*[!0-9]*) echo "... WARN: Retry Attempt value of \"$attempts\" is invalid. Disabling Retry-on-Error."; attempts=1 ;;
|
|
*) true ;;
|
|
esac
|
|
|
|
## Manage the passing of http/https for $url:
|
|
# Note; if the user wants to use 'https://' on a non-443 port they must specify it exclusively in the '-H <HOSTNAME>' arg.
|
|
if [ ! "`echo $url | grep -c http`" = 1 ]; then
|
|
if [ "$port" == "443" ]; then
|
|
url="https://$url";
|
|
else
|
|
url="http://$url";
|
|
fi
|
|
fi
|
|
|
|
# Manage the addition of port
|
|
# If a port isn't already on our URL...
|
|
if [ ! "`echo $url | egrep -c ":[0-9]*$"`" = "1" ]; then
|
|
# add it.
|
|
url="$url:$port"
|
|
fi
|
|
|
|
# Check for empty user/pass and try reading in from Envvars
|
|
if [ "x$username" = "x" ]; then
|
|
username="$COUCHDB_USER"
|
|
fi
|
|
if [ "x$password" = "x" ]; then
|
|
password="$COUCHDB_PASS"
|
|
fi
|
|
|
|
## Manage the addition of user+pass if needed:
|
|
# Ensure, if one is set, both are set.
|
|
if [ ! "x${username}" = "x" ]; then
|
|
if [ "x${password}" = "x" ]; then
|
|
echo "... ERROR: Password cannot be blank, if username is specified."
|
|
usage
|
|
fi
|
|
elif [ ! "x${password}" = "x" ]; then
|
|
if [ "x${username}" = "x" ]; then
|
|
echo "... ERROR: Username cannot be blank, if password is specified."
|
|
usage
|
|
fi
|
|
fi
|
|
|
|
# Check for sed option
|
|
sed_edit_in_place='-i.sedtmp'
|
|
if [ "$os_type" = "Darwin" ]; then
|
|
sed_regexp_option='E'
|
|
else
|
|
sed_regexp_option='r'
|
|
fi
|
|
# Allow for self-signed/invalid certs if method is HTTPS:
|
|
if [ "`echo $url | grep -ic "^https://"`" = "1" ]; then
|
|
curlopt="-k"
|
|
fi
|
|
|
|
if [ ! "x${username}" = "x" ]&&[ ! "x${password}" = "x" ]; then
|
|
curlopt="${curlopt} -u ${username}:${password}"
|
|
fi
|
|
|
|
## Check for curl
|
|
curl --version >/dev/null 2>&1 || ( echo "... ERROR: This script requires 'curl' to be present."; exit 1 )
|
|
|
|
# Check for tr
|
|
echo | tr -d "" >/dev/null 2>&1 || ( echo "... ERROR: This script requires 'tr' to be present."; exit 1 )
|
|
|
|
##### SETUP OUR LARGE VARS FOR SPLIT PROCESSING (due to limitations in split on Darwin/BSD)
|
|
AZ2="`echo {a..z}{a..z}`"
|
|
AZ3="`echo {a..z}{a..z}{a..z}`"
|
|
|
|
### If user selected BACKUP, run the following code:
|
|
if [ $backup = true ]&&[ $restore = false ]; then
|
|
#################################################################
|
|
##################### BACKUP START ##############################
|
|
#################################################################
|
|
|
|
# If -T (timestamp) option, append datetime stamp ("-YYYYMMDD-hhmmss") before file extension
|
|
if [ "$timestamp" = true ]; then
|
|
datetime=`date "+%Y%m%d-%H%M%S"` # Format: YYYYMMDD-hhmmss
|
|
# Check for file_name extension, if so add the timestamp before it
|
|
if [[ $file_name =~ \.[a-zA-Z0-9][a-zA-Z0-9_]* ]]; then
|
|
file_name_ext=` echo "$file_name" | $sed_cmd 's/.*\.//'` # Get text after last '.'
|
|
file_name_base=`echo "$file_name" | $sed_cmd "s/\.${file_name_ext}$//"` # file_name without '.' & extension
|
|
file_name="$file_name_base-$datetime.$file_name_ext"
|
|
else # Otherwise add timestamp to the end of file_name
|
|
file_name="$file_name-$datetime"
|
|
fi
|
|
fi
|
|
$echoVerbose && echo "... INFO: Output file ${file_name}"
|
|
|
|
# Check if output already exists:
|
|
if [ -f ${file_name} ]; then
|
|
echo "... ERROR: Output file ${file_name} already exists."
|
|
exit 1
|
|
fi
|
|
|
|
# Grab our data from couchdb
|
|
curl ${curlSilentOpt} ${curlopt} -X GET "$url/$db_name/_all_docs?include_docs=true&attachments=true" -o ${file_name}
|
|
# Check for curl errors
|
|
if [ ! $? = 0 ]; then
|
|
echo "... ERROR: Curl encountered an issue whilst dumping the database."
|
|
rm -f ${file_name} 2>/dev/null
|
|
exit 1
|
|
fi
|
|
# Check for export errors
|
|
ERR_CHECK="`head -n 1 ${file_name} | grep '^{"error'`"
|
|
if [ ! "x${ERR_CHECK}" = "x" ]; then
|
|
echo "... ERROR: CouchDB reported: $ERR_CHECK"
|
|
exit 1
|
|
fi
|
|
|
|
# CouchDB has a tendancy to output Windows carriage returns in it's output -
|
|
# This messes up us trying to sed things at the end of lines!
|
|
if grep -qU $'\x0d' $file_name; then
|
|
$echoVerbose && echo "... INFO: File may contain Windows carriage returns- converting..."
|
|
filesize=$(du -P -k ${file_name} | awk '{print$1}')
|
|
checkdiskspace "${file_name}" $filesize
|
|
tr -d '\r' < ${file_name} > ${file_name}.tmp
|
|
if [ $? = 0 ]; then
|
|
mv ${file_name}.tmp ${file_name}
|
|
if [ $? = 0 ]; then
|
|
$echoVerbose && echo "... INFO: Completed successfully."
|
|
else
|
|
echo "... ERROR: Failed to overwrite ${file_name} with ${file_name}.tmp"
|
|
exit 1
|
|
fi
|
|
else
|
|
echo ".. ERROR: Failed to convert file."
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
## Now we parse the output file to make it suitable for re-import.
|
|
$echoVerbose && echo "... INFO: Amending file to make it suitable for Import."
|
|
$echoVerbose && echo "... INFO: Stage 1 - Document filtering"
|
|
|
|
# If the input file is larger than 250MB, multi-thread the parsing:
|
|
if [ $(du -P -k ${file_name} | awk '{print$1}') -ge 256000 ]&&[ ! $threads -le 1 ]; then
|
|
filesize=$(du -P -k ${file_name} | awk '{print$1}')
|
|
KBreduction=$(($((`wc -l ${file_name} | awk '{print$1}'` * 80)) / 1024))
|
|
filesize=`expr $filesize + $(expr $filesize - $KBreduction)`
|
|
checkdiskspace "${file_name}" $filesize
|
|
$echoVerbose && echo "... INFO: Multi-Threaded Parsing Enabled."
|
|
if [ -f ${file_name}.thread000000 ]; then
|
|
echo "... ERROR: Split files \"${file_name}.thread*\" already present. Please remove before continuing."
|
|
exit 1
|
|
elif [ -f ${file_name}.tmp ]; then
|
|
echo "... ERROR: Tempfile ${file_name}.tmp already present. Please remove before continuing."
|
|
exit 1
|
|
fi
|
|
|
|
### SPLIT INTO THREADS
|
|
split_cal=$(( $((`wc -l ${file_name} | awk '{print$1}'` / $threads)) + $threads ))
|
|
#split --numeric-suffixes --suffix-length=6 -l ${split_cal} ${file_name} ${file_name}.thread
|
|
split -a 2 -l ${split_cal} ${file_name} ${file_name}.thread
|
|
if [ ! "$?" = "0" ]; then
|
|
echo "... ERROR: Unable to create split files."
|
|
exit 1
|
|
fi
|
|
|
|
# Capture if someone happens to breach the defined limits of AZ2 var. If this happens, we'll need to switch it out for AZ3 ...
|
|
if [[ $threads -gt 650 ]]; then
|
|
echo "Whoops- we hit a maximum limit here... \$AZ2 only allows for a maximum of 650 cores..."
|
|
exit 1
|
|
fi
|
|
|
|
count=0
|
|
for suffix in ${AZ2}; do
|
|
(( count++ ))
|
|
if [[ $count -gt $threads ]]; then
|
|
break
|
|
fi
|
|
PADNAME="${file_name}.thread${suffix}"
|
|
$sed_cmd ${sed_edit_in_place} 's/{"id".*,"doc"://g' ${PADNAME} &
|
|
done
|
|
wait
|
|
count=0
|
|
for suffix in ${AZ2}; do
|
|
(( count++ ))
|
|
if [[ $count -gt $threads ]]; then
|
|
break
|
|
fi
|
|
PADNAME="${file_name}.thread${suffix}"
|
|
cat ${PADNAME} >> ${file_name}.tmp
|
|
rm -f ${PADNAME} ${PADNAME}.sedtmp
|
|
(( NUM++ ))
|
|
done
|
|
if [ `wc -l ${file_name} | awk '{print$1}'` = `wc -l ${file_name}.tmp | awk '{print$1}'` ]; then
|
|
mv ${file_name}{.tmp,}
|
|
if [ ! $? = 0 ]; then
|
|
echo "... ERROR: Failed to overwrite ${file_name}"
|
|
exit 1
|
|
fi
|
|
else
|
|
echo "... ERROR: Multi-threaded data parsing encountered an error."
|
|
exit 1
|
|
fi
|
|
|
|
else
|
|
# Estimating 80byte saving per line... probably a little conservative depending on keysize.
|
|
KBreduction=$(($((`wc -l ${file_name} | awk '{print$1}'` * 80)) / 1024))
|
|
filesize=$(du -P -k ${file_name} | awk '{print$1}')
|
|
filesize=`expr $filesize - $KBreduction`
|
|
checkdiskspace "${file_name}" $filesize
|
|
$sed_cmd ${sed_edit_in_place} 's/{"id".*,"doc"://g' $file_name && rm -f ${file_name}.sedtmp
|
|
if [ ! $? = 0 ];then
|
|
echo "Stage failed."
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
$echoVerbose && echo "... INFO: Stage 2 - Duplicate curly brace removal"
|
|
# Approx 1Byte per line removed
|
|
KBreduction=$((`wc -l ${file_name} | awk '{print$1}'` / 1024))
|
|
filesize=$(du -P -k ${file_name} | awk '{print$1}')
|
|
filesize=`expr $filesize - $KBreduction`
|
|
checkdiskspace "${file_name}" $filesize
|
|
$sed_cmd ${sed_edit_in_place} 's/}},$/},/g' ${file_name} && rm -f ${file_name}.sedtmp
|
|
if [ ! $? = 0 ];then
|
|
echo "Stage failed."
|
|
exit 1
|
|
fi
|
|
$echoVerbose && echo "... INFO: Stage 3 - Header Correction"
|
|
filesize=$(du -P -k ${file_name} | awk '{print$1}')
|
|
checkdiskspace "${file_name}" $filesize
|
|
$sed_cmd ${sed_edit_in_place} '1s/^.*/{"new_edits":false,"docs":[/' ${file_name} && rm -f ${file_name}.sedtmp
|
|
if [ ! $? = 0 ];then
|
|
echo "Stage failed."
|
|
exit 1
|
|
fi
|
|
$echoVerbose && echo "... INFO: Stage 4 - Final document line correction"
|
|
filesize=$(du -P -k ${file_name} | awk '{print$1}')
|
|
checkdiskspace "${file_name}" $filesize
|
|
$sed_cmd ${sed_edit_in_place} 's/}}$/}/g' ${file_name} && rm -f ${file_name}.sedtmp
|
|
if [ ! $? = 0 ];then
|
|
echo "Stage failed."
|
|
exit 1
|
|
fi
|
|
|
|
# If -z (compress) option then compress output file
|
|
if [ "$compress" = true ]; then
|
|
$echoVerbose && echo "... INFO: Stage 5 - File compression"
|
|
gzip $file_name
|
|
file_name="$file_name.gz"
|
|
fi
|
|
|
|
$echoVerbose && echo "... INFO: Export completed successfully. File available at: ${file_name}"
|
|
exit 0
|
|
|
|
### Else if user selected Restore:
|
|
elif [ $restore = true ]&&[ $backup = false ]; then
|
|
#################################################################
|
|
##################### RESTORE START #############################
|
|
#################################################################
|
|
# Check if input exists:
|
|
if [ ! -f ${file_name} ]; then
|
|
echo "... ERROR: Input file ${file_name} not found."
|
|
exit 1
|
|
fi
|
|
|
|
#### VALIDATION END
|
|
|
|
$echoVerbose && echo "... INFO: Checking for database"
|
|
attemptcount=0
|
|
A=0
|
|
until [ $A = 1 ]; do
|
|
(( attemptcount++ ))
|
|
existing_dbs=$(curl $curlSilentOpt $curlopt -X GET "${url}/_all_dbs")
|
|
if [ ! $? = 0 ]; then
|
|
if [ $attemptcount = $attempts ]; then
|
|
echo "... ERROR: Curl failed to get the list of databases - Stopping"
|
|
exit 1
|
|
else
|
|
echo "... WARN: Curl failed to get the list of databases - Attempt ${attemptcount}/${attempts}. Retrying..."
|
|
sleep 1
|
|
fi
|
|
else
|
|
A=1
|
|
fi
|
|
done
|
|
if [[ ! "$existing_dbs" = "["*"]" ]]; then
|
|
echo "... WARN: Curl failed to get the list of databases - Continuing"
|
|
if [ "x$existing_dbs" = "x" ]; then
|
|
echo "... WARN: Curl just returned: $existing_dbs"
|
|
fi
|
|
elif [[ ! "$existing_dbs" = *"\"${db_name}\""* ]]; then
|
|
# database was not listed as existing databasa
|
|
if [ $createDBsOnDemand = true ]; then
|
|
attemptcount=0
|
|
A=0
|
|
until [ $A = 1 ]; do
|
|
(( attemptcount++ ))
|
|
curl $curlSilentOpt $curlopt -X PUT "${url}/${db_name}" -o tmp.out
|
|
# If curl threw an error:
|
|
if [ ! $? = 0 ]; then
|
|
if [ $attemptcount = $attempts ]; then
|
|
echo "... ERROR: Curl failed to create the database ${db_name} - Stopping"
|
|
if [ -f tmp.out ]; then
|
|
echo -n "... ERROR: Error message was: "
|
|
cat tmp.out
|
|
else
|
|
echo ".. ERROR: See above for any errors"
|
|
fi
|
|
exit 1
|
|
else
|
|
echo "... WARN: Curl failed to create the database ${db_name} - Attempt ${attemptcount}/${attempts}. Retrying..."
|
|
sleep 1
|
|
fi
|
|
# If curl was happy, but CouchDB returned an error in the return JSON:
|
|
elif [ ! "`head -n 1 tmp.out | grep -c '^{"error":'`" = 0 ]; then
|
|
if [ $attemptcount = $attempts ]; then
|
|
echo "... ERROR: CouchDB Reported: `head -n 1 tmp.out`"
|
|
exit 1
|
|
else
|
|
echo "... WARN: CouchDB Reported an error during db creation - Attempt ${attemptcount}/${attempts} - Retrying..."
|
|
sleep 1
|
|
fi
|
|
# Otherwise, if everything went well, delete our temp files.
|
|
else
|
|
rm tmp.out
|
|
A=1
|
|
fi
|
|
done
|
|
else
|
|
echo "... ERROR: corresponding database ${db_name} not yet created - Stopping"
|
|
$echoVerbose && echo "... HINT: you could add the -c flag to create the database automatically"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
## Stop bash mangling wildcard...
|
|
set -o noglob
|
|
# Manage Design Documents as a priority, and remove them from the main import job
|
|
$echoVerbose && echo "... INFO: Checking for Design documents"
|
|
# Find all _design docs, put them into another file
|
|
design_file_name=${file_name}-design
|
|
grep '^{"_id":"_design' ${file_name} > ${design_file_name}
|
|
|
|
# Count the design file (if it even exists)
|
|
DESIGNS="`wc -l ${design_file_name} 2>/dev/null | awk '{print$1}'`"
|
|
# If there's no design docs for import...
|
|
if [ "x$DESIGNS" = "x" ]||[ "$DESIGNS" = "0" ]; then
|
|
# Cleanup any null files
|
|
rm -f ${design_file_name} 2>/dev/null
|
|
$echoVerbose && echo "... INFO: No Design Documents found for import."
|
|
else
|
|
$echoVerbose && echo "... INFO: Duplicating original file for alteration"
|
|
# Duplicate the original DB file, so we don't mangle the user's input file:
|
|
filesize=$(du -P -k ${file_name} | awk '{print$1}')
|
|
checkdiskspace "${file_name}" $filesize
|
|
cp -f ${file_name}{,-nodesign}
|
|
# Re-set file_name to be our new file.
|
|
file_name=${file_name}-nodesign
|
|
# Remove these design docs from (our new) main file.
|
|
$echoVerbose && echo "... INFO: Stripping _design elements from regular documents"
|
|
checkdiskspace "${file_name}" $filesize
|
|
$sed_cmd ${sed_edit_in_place} '/^{"_id":"_design/d' ${file_name} && rm -f ${file_name}.sedtmp
|
|
# Remove the final document's trailing comma
|
|
$echoVerbose && echo "... INFO: Fixing end document"
|
|
line=$(expr `wc -l ${file_name} | awk '{print$1}'` - 1)
|
|
filesize=$(du -P -k ${file_name} | awk '{print$1}')
|
|
checkdiskspace "${file_name}" $filesize
|
|
$sed_cmd ${sed_edit_in_place} "${line}s/,$//" ${file_name} && rm -f ${file_name}.sedtmp
|
|
|
|
$echoVerbose && echo "... INFO: Inserting Design documents"
|
|
designcount=0
|
|
# For each design doc...
|
|
while IFS="" read -r; do
|
|
line="${REPLY}"
|
|
# Split the ID out for use as the import URL path
|
|
URLPATH=$(echo $line | awk -F'"' '{print$4}')
|
|
# Scrap the ID and Rev from the main data, as well as any trailing ','
|
|
echo "${line}" | $sed_cmd -${sed_regexp_option}e "s@^\{\"_id\":\"${URLPATH}\",\"_rev\":\"[0-9]*-[0-9a-zA-Z_\-]*\",@\{@" | $sed_cmd -e 's/,$//' > ${design_file_name}.${designcount}
|
|
# Fix Windows CRLF
|
|
if grep -qU $'\x0d' ${design_file_name}.${designcount}; then
|
|
$echoVerbose && echo "... INFO: File contains Windows carriage returns- converting..."
|
|
filesize=$(du -P -k ${design_file_name}.${designcount} | awk '{print$1}')
|
|
checkdiskspace "${file_name}" $filesize
|
|
tr -d '\r' < ${design_file_name}.${designcount} > ${design_file_name}.${designcount}.tmp
|
|
if [ $? = 0 ]; then
|
|
mv ${design_file_name}.${designcount}.tmp ${design_file_name}.${designcount}
|
|
if [ $? = 0 ]; then
|
|
$echoVerbose && echo "... INFO: Completed successfully."
|
|
else
|
|
echo "... ERROR: Failed to overwrite ${design_file_name}.${designcount} with ${design_file_name}.${designcount}.tmp"
|
|
exit 1
|
|
fi
|
|
else
|
|
echo ".. ERROR: Failed to convert file."
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
# Insert this file into the DB
|
|
A=0
|
|
attemptcount=0
|
|
until [ $A = 1 ]; do
|
|
(( attemptcount++ ))
|
|
curl $curlSilentOpt ${curlopt} -T ${design_file_name}.${designcount} -X PUT "${url}/${db_name}/${URLPATH}" -H 'Content-Type: application/json' -o ${design_file_name}.out.${designcount}
|
|
# If curl threw an error:
|
|
if [ ! $? = 0 ]; then
|
|
if [ $attemptcount = $attempts ]; then
|
|
echo "... ERROR: Curl failed trying to restore ${design_file_name}.${designcount} - Stopping"
|
|
exit 1
|
|
else
|
|
echo "... WARN: Import of ${design_file_name}.${designcount} failed - Attempt ${attemptcount}/${attempts}. Retrying..."
|
|
sleep 1
|
|
fi
|
|
# If curl was happy, but CouchDB returned an error in the return JSON:
|
|
elif [ ! "`head -n 1 ${design_file_name}.out.${designcount} | grep -c '^{"error":'`" = 0 ]; then
|
|
if [ $attemptcount = $attempts ]; then
|
|
echo "... ERROR: CouchDB Reported: `head -n 1 ${design_file_name}.out.${designcount}`"
|
|
exit 1
|
|
else
|
|
echo "... WARN: CouchDB Reported an error during import - Attempt ${attemptcount}/${attempts} - Retrying..."
|
|
sleep 1
|
|
fi
|
|
# Otherwise, if everything went well, delete our temp files.
|
|
else
|
|
A=1
|
|
rm -f ${design_file_name}.out.${designcount}
|
|
rm -f ${design_file_name}.${designcount}
|
|
fi
|
|
done
|
|
# Increase design count - mainly used for the INFO at the end.
|
|
(( designcount++ ))
|
|
# NOTE: This is where we insert the design lines exported from the main block
|
|
done < <(cat ${design_file_name})
|
|
$echoVerbose && echo "... INFO: Successfully imported ${designcount} Design Documents"
|
|
fi
|
|
set +o noglob
|
|
|
|
# If the size of the file to import is less than our $lines size, don't worry about splitting
|
|
if [ `wc -l $file_name | awk '{print$1}'` -lt $lines ]; then
|
|
$echoVerbose && echo "... INFO: Small dataset. Importing as a single file."
|
|
A=0
|
|
attemptcount=0
|
|
until [ $A = 1 ]; do
|
|
(( attemptcount++ ))
|
|
curl $curlSilentOpt $curlopt -T $file_name -X POST "$url/$db_name/_bulk_docs" -H 'Content-Type: application/json' -o tmp.out
|
|
if [ "`head -n 1 tmp.out | grep -c '^{"error":'`" -eq 0 ]; then
|
|
$echoVerbose && echo "... INFO: Imported ${file_name_orig} Successfully."
|
|
rm -f tmp.out
|
|
rm -f ${file_name_orig}-design
|
|
rm -f ${file_name_orig}-nodesign
|
|
exit 0
|
|
else
|
|
if [ $attemptcount = $attempts ]; then
|
|
echo "... ERROR: Import of ${file_name_orig} failed."
|
|
if [ -f tmp.out ]; then
|
|
echo -n "... ERROR: Error message was: "
|
|
cat tmp.out
|
|
else
|
|
echo ".. ERROR: See above for any errors"
|
|
fi
|
|
rm -f tmp.out
|
|
exit 1
|
|
else
|
|
echo "... WARN: Import of ${file_name_orig} failed - Attempt ${attemptcount}/${attempts} - Retrying..."
|
|
sleep 1
|
|
fi
|
|
fi
|
|
done
|
|
# Otherwise, it's a large import that requires bulk insertion.
|
|
else
|
|
$echoVerbose && echo "... INFO: Block import set to ${lines} lines."
|
|
if [ -f ${file_name}.splitaaa ]; then
|
|
echo "... ERROR: Split files \"${file_name}.split*\" already present. Please remove before continuing."
|
|
exit 1
|
|
fi
|
|
importlines=`cat ${file_name} | grep -c .`
|
|
|
|
# Due to the file limit imposed by the pre-calculated AZ3 variable, max split files is 15600 (alpha x 3positions)
|
|
if [[ `expr ${importlines} / ${lines}` -gt 15600 ]]; then
|
|
echo "... ERROR: Pre-processed split variable limit of 15600 files reached."
|
|
echo " Please increase the '-l' parameter (Currently: $lines) and try again."
|
|
exit 1
|
|
fi
|
|
|
|
$echoVerbose && echo "... INFO: Generating files to import"
|
|
filesize=$(du -P -k ${file_name} | awk '{print$1}')
|
|
checkdiskspace "${file_name}" $filesize
|
|
### Split the file into many
|
|
split -a 3 -l ${lines} ${file_name} ${file_name}.split
|
|
if [ ! "$?" = "0" ]; then
|
|
echo "... ERROR: Unable to create split files."
|
|
exit 1
|
|
fi
|
|
HEADER="`head -n 1 $file_name`"
|
|
FOOTER="`tail -n 1 $file_name`"
|
|
|
|
count=0
|
|
for PADNUM in $AZ3; do
|
|
PADNAME="${file_name}.split${PADNUM}"
|
|
if [ ! -f ${PADNAME} ]; then
|
|
echo "... INFO: Import Cycle Completed."
|
|
break
|
|
fi
|
|
|
|
if [ ! "`head -n 1 ${PADNAME}`" = "${HEADER}" ]; then
|
|
$echoVerbose && echo "... INFO: Adding header to ${PADNAME}"
|
|
filesize=$(du -P -k ${PADNAME} | awk '{print$1}')
|
|
checkdiskspace "${PADNAME}" $filesize
|
|
$sed_cmd ${sed_edit_in_place} "1i${HEADER}" ${PADNAME} && rm -f ${PADNAME}.sedtmp
|
|
else
|
|
$echoVerbose && echo "... INFO: Header already applied to ${PADNAME}"
|
|
fi
|
|
if [ ! "`tail -n 1 ${PADNAME}`" = "${FOOTER}" ]; then
|
|
$echoVerbose && echo "... INFO: Adding footer to ${PADNAME}"
|
|
filesize=$(du -P -k ${PADNAME} | awk '{print$1}')
|
|
checkdiskspace "${PADNAME}" $filesize
|
|
$sed_cmd ${sed_edit_in_place} '$s/,$//g' ${PADNAME} && rm -f ${PADNAME}.sedtmp
|
|
echo "${FOOTER}" >> ${PADNAME}
|
|
else
|
|
$echoVerbose && echo "... INFO: Footer already applied to ${PADNAME}"
|
|
fi
|
|
|
|
$echoVerbose && echo "... INFO: Inserting ${PADNAME}"
|
|
A=0
|
|
attemptcount=0
|
|
until [ $A = 1 ]; do
|
|
(( attemptcount++ ))
|
|
curl $curlSilentOpt $curlopt -T ${PADNAME} -X POST "$url/$db_name/_bulk_docs" -H 'Content-Type: application/json' -o tmp.out
|
|
if [ ! $? = 0 ]; then
|
|
if [ $attemptcount = $attempts ]; then
|
|
echo "... ERROR: Curl failed trying to restore ${PADNAME} - Stopping"
|
|
exit 1
|
|
else
|
|
echo "... WARN: Failed to import ${PADNAME} - Attempt ${attemptcount}/${attempts} - Retrying..."
|
|
sleep 1
|
|
fi
|
|
elif [ ! "`head -n 1 tmp.out | grep -c '^{"error":'`" = 0 ]; then
|
|
if [ $attemptcount = $attempts ]; then
|
|
echo "... ERROR: CouchDB Reported: `head -n 1 tmp.out`"
|
|
exit 1
|
|
else
|
|
echo "... WARN: CouchDB Reported and error during import - Attempt ${attemptcount}/${attempts} - Retrying..."
|
|
sleep 1
|
|
fi
|
|
else
|
|
A=1
|
|
rm -f ${PADNAME}
|
|
rm -f tmp.out
|
|
(( count++ ))
|
|
fi
|
|
done
|
|
|
|
$echoVerbose && echo "... INFO: Successfully Imported `expr ${count}` Files"
|
|
A=1
|
|
rm -f ${file_name_orig}-design
|
|
rm -f ${file_name_orig}-nodesign
|
|
done
|
|
fi
|
|
fi
|