Fix docker-image-extract script name (#1532)

* Fix docker-image-extract script name

* Update docker-image-extract

Use code from 95b3db8af8
This commit is contained in:
tituspijean 2022-11-18 20:11:58 +01:00 committed by GitHub
parent a772153b64
commit 079c3fab00
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 262 additions and 215 deletions

View file

@ -0,0 +1,262 @@
#!/bin/sh
#
# This script pulls and extracts all files from an image in Docker Hub.
#
# Copyright (c) 2020-2022, Jeremy Lin
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
PLATFORM_DEFAULT="linux/amd64"
PLATFORM="${PLATFORM_DEFAULT}"
OUT_DIR="./output"
usage() {
echo "This script pulls and extracts all files from an image in Docker Hub."
echo
echo "$0 [OPTIONS...] IMAGE[:REF]"
echo
echo "IMAGE can be a community user image (like 'some-user/some-image') or a"
echo "Docker official image (like 'hello-world', which contains no '/')."
echo
echo "REF is either a tag name or a full SHA-256 image digest (with a 'sha256:' prefix)."
echo "The default ref is the 'latest' tag."
echo
echo "Options:"
echo
echo " -p PLATFORM Pull image for the specified platform (default: ${PLATFORM})"
echo " For a given image on Docker Hub, the 'Tags' tab lists the"
echo " platforms supported for that image."
echo " -o OUT_DIR Extract image to the specified output dir (default: ${OUT_DIR})"
echo " -h Show help with usage examples"
}
usage_detailed() {
usage
echo
echo "Examples:"
echo
echo "# Pull and extract all files in the 'hello-world' image tagged 'latest'."
echo "\$ $0 hello-world:latest"
echo
echo "# Same as above; ref defaults to the 'latest' tag."
echo "\$ $0 hello-world"
echo
echo "# Pull the 'hello-world' image for the 'linux/arm64/v8' platform."
echo "\$ $0 -p linux/arm64/v8 hello-world"
echo
echo "# Pull an image by digest."
echo "\$ $0 hello-world:sha256:90659bf80b44ce6be8234e6ff90a1ac34acbeb826903b02cfa0da11c82cbc042"
}
if [ $# -eq 0 ]; then
usage_detailed
exit 0
fi
while getopts ':ho:p:' opt; do
case $opt in
o)
OUT_DIR="${OPTARG}"
;;
p)
PLATFORM="${OPTARG}"
;;
h)
usage_detailed
exit 0
;;
\?)
echo "ERROR: Invalid option '-$OPTARG'."
echo
usage
exit 1
;;
\:) echo "ERROR: Argument required for option '-$OPTARG'."
echo
usage
exit 1
;;
esac
done
shift $(($OPTIND - 1))
if [ $# -eq 0 ]; then
echo "ERROR: Image to pull must be specified."
echo
usage
exit 1
fi
have_curl() {
command -v curl >/dev/null
}
have_wget() {
command -v wget >/dev/null
}
if ! have_curl && ! have_wget; then
echo "This script requires either curl or wget."
exit 1
fi
image_spec="$1"
image="${image_spec%%:*}"
if [ "${image#*/}" = "${image}" ]; then
# Docker official images are in the 'library' namespace.
image="library/${image}"
fi
ref="${image_spec#*:}"
if [ "${ref}" = "${image_spec}" ]; then
echo "Defaulting ref to tag 'latest'..."
ref=latest
fi
# Split platform (OS/arch/variant) into separate variables.
# A platform specifier doesn't always include the `variant` component.
OLD_IFS="${IFS}"
IFS=/ read -r OS ARCH VARIANT <<EOF
${PLATFORM}
EOF
IFS="${OLD_IFS}"
# Given a JSON input on stdin, extract the string value associated with the
# specified key. This avoids an extra dependency on a tool like `jq`.
extract() {
local key="$1"
# Extract "<key>":"<val>" (assumes key/val won't contain double quotes).
# The colon may have whitespace on either side.
grep -o "\"${key}\"[[:space:]]*:[[:space:]]*\"[^\"]\+\"" |
# Extract just <val> by deleting the last '"', and then greedily deleting
# everything up to '"'.
sed -e 's/"$//' -e 's/.*"//'
}
# Fetch a URL to stdout. Up to two header arguments may be specified:
#
# fetch <url> [name1: value1] [name2: value2]
#
fetch() {
if have_curl; then
if [ $# -eq 2 ]; then
set -- -H "$2" "$1"
elif [ $# -eq 3 ]; then
set -- -H "$2" -H "$3" "$1"
fi
curl -sSL "$@"
else
if [ $# -eq 2 ]; then
set -- --header "$2" "$1"
elif [ $# -eq 3 ]; then
set -- --header "$2" --header "$3" "$1"
fi
wget -qO- "$@"
fi
}
# https://docs.docker.com/docker-hub/api/latest/#tag/repositories
manifest_list_url="https://hub.docker.com/v2/repositories/${image}/tags/${ref}"
# If we're pulling the image for the default platform, or the ref is already
# a SHA-256 image digest, then we don't need to look up anything.
if [ "${PLATFORM}" = "${PLATFORM_DEFAULT}" ] || [ -z "${ref##sha256:*}" ]; then
digest="${ref}"
else
echo "Getting multi-arch manifest list..."
digest=$(fetch "${manifest_list_url}" |
# Break up the single-line JSON output into separate lines by adding
# newlines before and after the chars '[', ']', '{', and '}'.
sed -e 's/\([][{}]\)/\n\1\n/g' |
# Extract the "images":[...] list.
sed -n '/"images":/,/]/ p' |
# Each image's details are now on a separate line, e.g.
# "architecture":"arm64","features":"","variant":"v8","digest":"sha256:054c85801c4cb41511b176eb0bf13a2c4bbd41611ddd70594ec3315e88813524","os":"linux","os_features":"","os_version":null,"size":828724,"status":"active","last_pulled":"2022-09-02T22:46:48.240632Z","last_pushed":"2022-09-02T00:42:45.69226Z"
# The image details are interspersed with lines of stray punctuation,
# so grep for an arbitrary string that must be in these lines.
grep architecture |
# Search for an image that matches the platform.
while read -r image; do
# Arch is probably most likely to be unique, so check that first.
arch="$(echo ${image} | extract 'architecture')"
if [ "${arch}" != "${ARCH}" ]; then continue; fi
os="$(echo ${image} | extract 'os')"
if [ "${os}" != "${OS}" ]; then continue; fi
variant="$(echo ${image} | extract 'variant')"
if [ "${variant}" = "${VARIANT}" ]; then
echo ${image} | extract 'digest'
break
fi
done)
fi
if [ -n "${digest}" ]; then
echo "Platform ${PLATFORM} resolved to '${digest}'..."
else
echo "No image digest found. Verify that the image, ref, and platform are valid."
exit 1
fi
# https://docs.docker.com/registry/spec/auth/token/#how-to-authenticate
api_token_url="https://auth.docker.io/token?service=registry.docker.io&scope=repository:$image:pull"
# https://github.com/docker/distribution/blob/master/docs/spec/api.md#pulling-an-image-manifest
manifest_url="https://registry-1.docker.io/v2/${image}/manifests/${digest}"
# https://github.com/docker/distribution/blob/master/docs/spec/api.md#pulling-a-layer
blobs_base_url="https://registry-1.docker.io/v2/${image}/blobs"
echo "Getting API token..."
token=$(fetch "${api_token_url}" | extract 'token')
auth_header="Authorization: Bearer $token"
v2_header="Accept: application/vnd.docker.distribution.manifest.v2+json"
echo "Getting image manifest for $image:$ref..."
layers=$(fetch "${manifest_url}" "${auth_header}" "${v2_header}" |
# Extract `digest` values only after the `layers` section appears.
sed -n '/"layers":/,$ p' |
extract 'digest')
if [ -z "${layers}" ]; then
echo "No layers returned. Verify that the image and ref are valid."
exit 1
fi
mkdir -p "${OUT_DIR}"
for layer in $layers; do
hash="${layer#sha256:}"
echo "Fetching and extracting layer ${hash}..."
fetch "${blobs_base_url}/${layer}" "${auth_header}" | gzip -d | tar -C "${OUT_DIR}" -xf -
# Ref: https://github.com/moby/moby/blob/master/image/spec/v1.2.md#creating-an-image-filesystem-changeset
# https://github.com/moby/moby/blob/master/pkg/archive/whiteouts.go
# Search for "whiteout" files to indicate files deleted in this layer.
OLD_IFS="${IFS}"
find "${OUT_DIR}" -name '.wh.*' | while IFS= read -r f; do
dir="${f%/*}"
wh_file="${f##*/}"
file="${wh_file#.wh.}"
# Delete both the whiteout file and the whited-out file.
rm -rf "${dir}/${wh_file}" "${dir}/${file}"
done
IFS="${OLD_IFS}"
done
echo "Image contents extracted into ${OUT_DIR}."

View file

@ -1,215 +0,0 @@
#!/bin/sh
# If editing from Windows. Choose LF as line-ending
set -eu
# Set this to 1 for more verbosity (on stderr)
EXTRACT_VERBOSE=${EXTRACT_VERBOSE:-0}
# Destination directory, some %-surrounded keywords will be dynamically replaced
# by elements of the fully-qualified image name.
EXTRACT_DEST=${EXTRACT_DEST:-"$(pwd)"}
# Pull if the image does not exist. If the image had to be pulled, it will
# automatically be removed once done to conserve space.
EXTRACT_PULL=${EXTRACT_PULL:-1}
# Docker client command to use
EXTRACT_DOCKER=${EXTRACT_DOCKER:-"docker"}
# Export PATHs to binaries and libraries
EXTRACT_EXPORT=${EXTRACT_EXPORT:-0}
# Name of manifest file containing the description of the layers
EXTRACT_MANIFEST=${EXTRACT_MANIFEST:-"manifest.json"}
# This uses the comments behind the options to show the help. Not extremly
# correct, but effective and simple.
usage() {
echo "$0 extracts all layers from a Docker image to a directory, will pull if necessary" && \
grep "[[:space:]].)\ #" "$0" |
sed 's/#//' |
sed -r 's/([a-z])\)/-\1/'
exit "${1:-0}"
}
while getopts "t:d:vneh-" opt; do
case "$opt" in
d) # How to run the Docker client
EXTRACT_DOCKER=$OPTARG;;
e) # Print out commands for PATH extraction
EXTRACT_EXPORT=1;;
n) # Do not pull if the image does not exist
EXTRACT_PULL=0;;
h) # Print help and exit
usage;;
t) # Target directory, will be created if necessary, %-surrounded keywords will be resolved (see manual). Default: current directory
EXTRACT_DEST=$OPTARG;;
v) # Turn on verbosity
EXTRACT_VERBOSE=1;;
-)
break;;
*)
usage 1;;
esac
done
shift $((OPTIND-1))
_verbose() {
if [ "$EXTRACT_VERBOSE" = "1" ]; then
printf %s\\n "$1" >&2
fi
}
_error() {
printf %s\\n "$1" >&2
}
# This will unfold JSON onliners to arrange for having fields and their values
# on separated lines. It's sed and grep, don't expect miracles, but this should
# work against most well-formatted JSON.
json_unfold() {
sed -E \
-e 's/\}\s*,\s*\{/\n\},\n\{\n/g' \
-e 's/\{\s*"/\{\n"/g' \
-e 's/(.+)\}/\1\n\}/g' \
-e 's/"\s*:\s*(("[^"]+")|([a-zA-Z0-9]+))\s*([,$])/": \1\4\n/g' \
-e 's/"\s*:\s*(("[^"]+")|([a-zA-Z0-9]+))\s*\}/": \1\n\}/g' | \
grep -vEe '^\s*$'
}
extract() {
# Extract details out of image name
fullname=$1
tag=""
if printf %s\\n "$1"|grep -Eq '@sha256:[a-f0-9A-F]{64}$'; then
tag=$(printf %s\\n "$1"|grep -Eo 'sha256:[a-f0-9A-F]{64}$')
fullname=$(printf %s\\n "$1"|sed -E 's/(.*)@sha256:[a-f0-9A-F]{64}$/\1/')
elif printf %s\\n "$1"|grep -Eq ':[[:alnum:]_][[:alnum:]_.-]{0,127}$'; then
tag=$(printf %s\\n "$1"|grep -Eo ':[[:alnum:]_][[:alnum:]_.-]{0,127}$'|cut -c 2-)
fullname=$(printf %s\\n "$1"|sed -E 's/(.*):[[:alnum:]_][[:alnum:]_.-]{0,127}$/\1/')
fi
shortname=$(printf %s\\n "$fullname" | awk -F / '{printf $NF}')
fullname_flat=$(printf %s\\n "$fullname" | sed 's~/~_~g')
if [ -z "$tag" ]; then
fullyqualified_flat=$(printf %s_%s\\n "$fullname_flat" "latest")
else
fullyqualified_flat=$(printf %s_%s\\n "$fullname_flat" "$tag")
fi
# Generate the name of the destination directory, replacing the
# sugared-strings by their values. We use the ~ character as a separator in
# the sed expressions as / might appear in the values.
dst=$(printf %s\\n "$EXTRACT_DEST" |
sed -E \
-e "s~%tag%~${tag}~" \
-e "s~%fullname%~${fullname}~" \
-e "s~%shortname%~${shortname}~" \
-e "s~%fullname_flat%~${fullname_flat}~" \
-e "s~%fullyqualified_flat%~${fullyqualified_flat}~" \
-e "s~%name%~${1}~" \
)
# Pull image on demand, if necessary and when EXTRACT_PULL was set to 1
imgrm=0
if ! ${EXTRACT_DOCKER} image inspect "$1" >/dev/null 2>&1 && [ "$EXTRACT_PULL" = "1" ]; then
_verbose "Pulling image '$1', will remove it upon completion"
${EXTRACT_DOCKER} image pull "$1"
imgrm=1
fi
if ${EXTRACT_DOCKER} image inspect "$1" >/dev/null 2>&1 ; then
# Create a temporary directory to store the content of the image itself, i.e.
# the result of docker image save on the image.
TMPD=$(mktemp -t -d image-XXXXX)
# Extract image to the temporary directory
_verbose "Extracting content of '$1' to temporary storage"
${EXTRACT_DOCKER} image save "$1" | tar -C "$TMPD" -xf -
# Create destination directory, if necessary
if ! [ -d "$dst" ]; then
_verbose "Creating destination directory: '$dst' (resolved from '$EXTRACT_DEST')"
mkdir -p "$dst"
fi
# Extract all layers of the image, in the order specified by the manifest,
# into the destination directory.
if [ -f "${TMPD}/${EXTRACT_MANIFEST}" ]; then
json_unfold < "${TMPD}/${EXTRACT_MANIFEST}" |
grep -oE '[a-fA-F0-9]{64}/[[:alnum:]]+\.tar' |
while IFS= read -r layer; do
_verbose "Extracting layer $(printf %s\\n "$layer" | awk -F '/' '{print $1}')"
tar -C "$dst" -xf "${TMPD}/${layer}"
done
else
_error "Cannot find $EXTRACT_MANIFEST in image content!"
fi
# Remove temporary content of image save.
rm -rf "$TMPD"
if [ "$EXTRACT_EXPORT" = "1" ]; then
# Resolve destination directory to absolute path
rdst=$(cd -P -- "$dst" && pwd -P)
for top in "" /usr /usr/local; do
# Add binaries
for sub in /sbin /bin; do
bdir=${rdst%/}${top%/}${sub}
if [ -d "$bdir" ] \
&& [ "$(find "$bdir" -maxdepth 1 -mindepth 1 -type f -executable | wc -l)" -gt "0" ]; then
if [ -z "${GITHUB_PATH+x}" ]; then
BPATH="${bdir}:${BPATH}"
else
printf %s\\n "$bdir" >> "$GITHUB_PATH"
fi
fi
done
# Add libraries
for sub in /lib; do
ldir=${rdst%/}${top%/}${sub}
if [ -d "$ldir" ] \
&& [ "$(find "$ldir" -maxdepth 1 -mindepth 1 -type f -executable -name '*.so*'| wc -l)" -gt "0" ]; then
LPATH="${ldir}:${LPATH}"
fi
done
done
fi
else
_error "Image $1 not present at Docker daemon"
fi
if [ "$imgrm" = "1" ]; then
_verbose "Removing image $1 from host"
${EXTRACT_DOCKER} image rm "$1"
fi
}
# We need at least one image
if [ "$#" = "0" ]; then
usage
fi
# Extract all images, one by one, to the target directory
BPATH=$(printf %s\\n "$PATH" | sed 's/ /\\ /g')
LPATH=$(printf %s\\n "${LD_LIBRARY_PATH:-}" | sed 's/ /\\ /g')
for i in "$@"; do
extract "$i"
done
if [ "$EXTRACT_EXPORT" = "1" ]; then
if [ -z "${GITHUB_PATH+x}" ]; then
printf "PATH=\"%s\"\n" "$BPATH"
if [ -n "$LPATH" ]; then
printf "LD_LIBRARY_PATH=\"%s\"\n" "$LPATH"
fi
elif [ -n "$LPATH" ]; then
printf "LD_LIBRARY_PATH=\"%s\"\n" "$LPATH" >> "$GITHUB_ENV"
fi
fi