From 079c3fab006e71489016ea011c77d87908dfc471 Mon Sep 17 00:00:00 2001 From: tituspijean Date: Fri, 18 Nov 2022 20:11:58 +0100 Subject: [PATCH] Fix docker-image-extract script name (#1532) * Fix docker-image-extract script name * Update docker-image-extract Use code from https://github.com/jjlin/docker-image-extract/commit/95b3db8af8e10b852d1d5121426d9bf91b8aae45 --- .../docker-image-extract/docker-image-extract | 262 ++++++++++++++++++ .../vendor/docker-image-extract/extract.sh | 215 -------------- 2 files changed, 262 insertions(+), 215 deletions(-) create mode 100755 helpers/vendor/docker-image-extract/docker-image-extract delete mode 100755 helpers/vendor/docker-image-extract/extract.sh diff --git a/helpers/vendor/docker-image-extract/docker-image-extract b/helpers/vendor/docker-image-extract/docker-image-extract new file mode 100755 index 000000000..4842a8e04 --- /dev/null +++ b/helpers/vendor/docker-image-extract/docker-image-extract @@ -0,0 +1,262 @@ +#!/bin/sh +# +# This script pulls and extracts all files from an image in Docker Hub. +# +# Copyright (c) 2020-2022, Jeremy Lin +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +PLATFORM_DEFAULT="linux/amd64" +PLATFORM="${PLATFORM_DEFAULT}" +OUT_DIR="./output" + +usage() { + echo "This script pulls and extracts all files from an image in Docker Hub." + echo + echo "$0 [OPTIONS...] IMAGE[:REF]" + echo + echo "IMAGE can be a community user image (like 'some-user/some-image') or a" + echo "Docker official image (like 'hello-world', which contains no '/')." + echo + echo "REF is either a tag name or a full SHA-256 image digest (with a 'sha256:' prefix)." + echo "The default ref is the 'latest' tag." + echo + echo "Options:" + echo + echo " -p PLATFORM Pull image for the specified platform (default: ${PLATFORM})" + echo " For a given image on Docker Hub, the 'Tags' tab lists the" + echo " platforms supported for that image." + echo " -o OUT_DIR Extract image to the specified output dir (default: ${OUT_DIR})" + echo " -h Show help with usage examples" +} + +usage_detailed() { + usage + echo + echo "Examples:" + echo + echo "# Pull and extract all files in the 'hello-world' image tagged 'latest'." + echo "\$ $0 hello-world:latest" + echo + echo "# Same as above; ref defaults to the 'latest' tag." + echo "\$ $0 hello-world" + echo + echo "# Pull the 'hello-world' image for the 'linux/arm64/v8' platform." + echo "\$ $0 -p linux/arm64/v8 hello-world" + echo + echo "# Pull an image by digest." + echo "\$ $0 hello-world:sha256:90659bf80b44ce6be8234e6ff90a1ac34acbeb826903b02cfa0da11c82cbc042" +} + +if [ $# -eq 0 ]; then + usage_detailed + exit 0 +fi + +while getopts ':ho:p:' opt; do + case $opt in + o) + OUT_DIR="${OPTARG}" + ;; + p) + PLATFORM="${OPTARG}" + ;; + h) + usage_detailed + exit 0 + ;; + \?) + echo "ERROR: Invalid option '-$OPTARG'." + echo + usage + exit 1 + ;; + \:) echo "ERROR: Argument required for option '-$OPTARG'." + echo + usage + exit 1 + ;; + esac +done +shift $(($OPTIND - 1)) + +if [ $# -eq 0 ]; then + echo "ERROR: Image to pull must be specified." + echo + usage + exit 1 +fi + +have_curl() { + command -v curl >/dev/null +} + +have_wget() { + command -v wget >/dev/null +} + +if ! have_curl && ! have_wget; then + echo "This script requires either curl or wget." + exit 1 +fi + +image_spec="$1" +image="${image_spec%%:*}" +if [ "${image#*/}" = "${image}" ]; then + # Docker official images are in the 'library' namespace. + image="library/${image}" +fi +ref="${image_spec#*:}" +if [ "${ref}" = "${image_spec}" ]; then + echo "Defaulting ref to tag 'latest'..." + ref=latest +fi + +# Split platform (OS/arch/variant) into separate variables. +# A platform specifier doesn't always include the `variant` component. +OLD_IFS="${IFS}" +IFS=/ read -r OS ARCH VARIANT <":"" (assumes key/val won't contain double quotes). + # The colon may have whitespace on either side. + grep -o "\"${key}\"[[:space:]]*:[[:space:]]*\"[^\"]\+\"" | + # Extract just by deleting the last '"', and then greedily deleting + # everything up to '"'. + sed -e 's/"$//' -e 's/.*"//' +} + +# Fetch a URL to stdout. Up to two header arguments may be specified: +# +# fetch [name1: value1] [name2: value2] +# +fetch() { + if have_curl; then + if [ $# -eq 2 ]; then + set -- -H "$2" "$1" + elif [ $# -eq 3 ]; then + set -- -H "$2" -H "$3" "$1" + fi + curl -sSL "$@" + else + if [ $# -eq 2 ]; then + set -- --header "$2" "$1" + elif [ $# -eq 3 ]; then + set -- --header "$2" --header "$3" "$1" + fi + wget -qO- "$@" + fi +} + +# https://docs.docker.com/docker-hub/api/latest/#tag/repositories +manifest_list_url="https://hub.docker.com/v2/repositories/${image}/tags/${ref}" + +# If we're pulling the image for the default platform, or the ref is already +# a SHA-256 image digest, then we don't need to look up anything. +if [ "${PLATFORM}" = "${PLATFORM_DEFAULT}" ] || [ -z "${ref##sha256:*}" ]; then + digest="${ref}" +else + echo "Getting multi-arch manifest list..." + digest=$(fetch "${manifest_list_url}" | + # Break up the single-line JSON output into separate lines by adding + # newlines before and after the chars '[', ']', '{', and '}'. + sed -e 's/\([][{}]\)/\n\1\n/g' | + # Extract the "images":[...] list. + sed -n '/"images":/,/]/ p' | + # Each image's details are now on a separate line, e.g. + # "architecture":"arm64","features":"","variant":"v8","digest":"sha256:054c85801c4cb41511b176eb0bf13a2c4bbd41611ddd70594ec3315e88813524","os":"linux","os_features":"","os_version":null,"size":828724,"status":"active","last_pulled":"2022-09-02T22:46:48.240632Z","last_pushed":"2022-09-02T00:42:45.69226Z" + # The image details are interspersed with lines of stray punctuation, + # so grep for an arbitrary string that must be in these lines. + grep architecture | + # Search for an image that matches the platform. + while read -r image; do + # Arch is probably most likely to be unique, so check that first. + arch="$(echo ${image} | extract 'architecture')" + if [ "${arch}" != "${ARCH}" ]; then continue; fi + + os="$(echo ${image} | extract 'os')" + if [ "${os}" != "${OS}" ]; then continue; fi + + variant="$(echo ${image} | extract 'variant')" + if [ "${variant}" = "${VARIANT}" ]; then + echo ${image} | extract 'digest' + break + fi + done) +fi + +if [ -n "${digest}" ]; then + echo "Platform ${PLATFORM} resolved to '${digest}'..." +else + echo "No image digest found. Verify that the image, ref, and platform are valid." + exit 1 +fi + +# https://docs.docker.com/registry/spec/auth/token/#how-to-authenticate +api_token_url="https://auth.docker.io/token?service=registry.docker.io&scope=repository:$image:pull" + +# https://github.com/docker/distribution/blob/master/docs/spec/api.md#pulling-an-image-manifest +manifest_url="https://registry-1.docker.io/v2/${image}/manifests/${digest}" + +# https://github.com/docker/distribution/blob/master/docs/spec/api.md#pulling-a-layer +blobs_base_url="https://registry-1.docker.io/v2/${image}/blobs" + +echo "Getting API token..." +token=$(fetch "${api_token_url}" | extract 'token') +auth_header="Authorization: Bearer $token" +v2_header="Accept: application/vnd.docker.distribution.manifest.v2+json" + +echo "Getting image manifest for $image:$ref..." +layers=$(fetch "${manifest_url}" "${auth_header}" "${v2_header}" | + # Extract `digest` values only after the `layers` section appears. + sed -n '/"layers":/,$ p' | + extract 'digest') + +if [ -z "${layers}" ]; then + echo "No layers returned. Verify that the image and ref are valid." + exit 1 +fi + +mkdir -p "${OUT_DIR}" + +for layer in $layers; do + hash="${layer#sha256:}" + echo "Fetching and extracting layer ${hash}..." + fetch "${blobs_base_url}/${layer}" "${auth_header}" | gzip -d | tar -C "${OUT_DIR}" -xf - + # Ref: https://github.com/moby/moby/blob/master/image/spec/v1.2.md#creating-an-image-filesystem-changeset + # https://github.com/moby/moby/blob/master/pkg/archive/whiteouts.go + # Search for "whiteout" files to indicate files deleted in this layer. + OLD_IFS="${IFS}" + find "${OUT_DIR}" -name '.wh.*' | while IFS= read -r f; do + dir="${f%/*}" + wh_file="${f##*/}" + file="${wh_file#.wh.}" + # Delete both the whiteout file and the whited-out file. + rm -rf "${dir}/${wh_file}" "${dir}/${file}" + done + IFS="${OLD_IFS}" +done + +echo "Image contents extracted into ${OUT_DIR}." diff --git a/helpers/vendor/docker-image-extract/extract.sh b/helpers/vendor/docker-image-extract/extract.sh deleted file mode 100755 index cab06cb53..000000000 --- a/helpers/vendor/docker-image-extract/extract.sh +++ /dev/null @@ -1,215 +0,0 @@ -#!/bin/sh - -# If editing from Windows. Choose LF as line-ending - - -set -eu - - -# Set this to 1 for more verbosity (on stderr) -EXTRACT_VERBOSE=${EXTRACT_VERBOSE:-0} - -# Destination directory, some %-surrounded keywords will be dynamically replaced -# by elements of the fully-qualified image name. -EXTRACT_DEST=${EXTRACT_DEST:-"$(pwd)"} - -# Pull if the image does not exist. If the image had to be pulled, it will -# automatically be removed once done to conserve space. -EXTRACT_PULL=${EXTRACT_PULL:-1} - -# Docker client command to use -EXTRACT_DOCKER=${EXTRACT_DOCKER:-"docker"} - -# Export PATHs to binaries and libraries -EXTRACT_EXPORT=${EXTRACT_EXPORT:-0} - -# Name of manifest file containing the description of the layers -EXTRACT_MANIFEST=${EXTRACT_MANIFEST:-"manifest.json"} - -# This uses the comments behind the options to show the help. Not extremly -# correct, but effective and simple. -usage() { - echo "$0 extracts all layers from a Docker image to a directory, will pull if necessary" && \ - grep "[[:space:]].)\ #" "$0" | - sed 's/#//' | - sed -r 's/([a-z])\)/-\1/' - exit "${1:-0}" -} - -while getopts "t:d:vneh-" opt; do - case "$opt" in - d) # How to run the Docker client - EXTRACT_DOCKER=$OPTARG;; - e) # Print out commands for PATH extraction - EXTRACT_EXPORT=1;; - n) # Do not pull if the image does not exist - EXTRACT_PULL=0;; - h) # Print help and exit - usage;; - t) # Target directory, will be created if necessary, %-surrounded keywords will be resolved (see manual). Default: current directory - EXTRACT_DEST=$OPTARG;; - v) # Turn on verbosity - EXTRACT_VERBOSE=1;; - -) - break;; - *) - usage 1;; - esac -done -shift $((OPTIND-1)) - - -_verbose() { - if [ "$EXTRACT_VERBOSE" = "1" ]; then - printf %s\\n "$1" >&2 - fi -} - -_error() { - printf %s\\n "$1" >&2 -} - - -# This will unfold JSON onliners to arrange for having fields and their values -# on separated lines. It's sed and grep, don't expect miracles, but this should -# work against most well-formatted JSON. -json_unfold() { - sed -E \ - -e 's/\}\s*,\s*\{/\n\},\n\{\n/g' \ - -e 's/\{\s*"/\{\n"/g' \ - -e 's/(.+)\}/\1\n\}/g' \ - -e 's/"\s*:\s*(("[^"]+")|([a-zA-Z0-9]+))\s*([,$])/": \1\4\n/g' \ - -e 's/"\s*:\s*(("[^"]+")|([a-zA-Z0-9]+))\s*\}/": \1\n\}/g' | \ - grep -vEe '^\s*$' -} - -extract() { - # Extract details out of image name - fullname=$1 - tag="" - if printf %s\\n "$1"|grep -Eq '@sha256:[a-f0-9A-F]{64}$'; then - tag=$(printf %s\\n "$1"|grep -Eo 'sha256:[a-f0-9A-F]{64}$') - fullname=$(printf %s\\n "$1"|sed -E 's/(.*)@sha256:[a-f0-9A-F]{64}$/\1/') - elif printf %s\\n "$1"|grep -Eq ':[[:alnum:]_][[:alnum:]_.-]{0,127}$'; then - tag=$(printf %s\\n "$1"|grep -Eo ':[[:alnum:]_][[:alnum:]_.-]{0,127}$'|cut -c 2-) - fullname=$(printf %s\\n "$1"|sed -E 's/(.*):[[:alnum:]_][[:alnum:]_.-]{0,127}$/\1/') - fi - shortname=$(printf %s\\n "$fullname" | awk -F / '{printf $NF}') - fullname_flat=$(printf %s\\n "$fullname" | sed 's~/~_~g') - if [ -z "$tag" ]; then - fullyqualified_flat=$(printf %s_%s\\n "$fullname_flat" "latest") - else - fullyqualified_flat=$(printf %s_%s\\n "$fullname_flat" "$tag") - fi - - # Generate the name of the destination directory, replacing the - # sugared-strings by their values. We use the ~ character as a separator in - # the sed expressions as / might appear in the values. - dst=$(printf %s\\n "$EXTRACT_DEST" | - sed -E \ - -e "s~%tag%~${tag}~" \ - -e "s~%fullname%~${fullname}~" \ - -e "s~%shortname%~${shortname}~" \ - -e "s~%fullname_flat%~${fullname_flat}~" \ - -e "s~%fullyqualified_flat%~${fullyqualified_flat}~" \ - -e "s~%name%~${1}~" \ - ) - - # Pull image on demand, if necessary and when EXTRACT_PULL was set to 1 - imgrm=0 - if ! ${EXTRACT_DOCKER} image inspect "$1" >/dev/null 2>&1 && [ "$EXTRACT_PULL" = "1" ]; then - _verbose "Pulling image '$1', will remove it upon completion" - ${EXTRACT_DOCKER} image pull "$1" - imgrm=1 - fi - - if ${EXTRACT_DOCKER} image inspect "$1" >/dev/null 2>&1 ; then - # Create a temporary directory to store the content of the image itself, i.e. - # the result of docker image save on the image. - TMPD=$(mktemp -t -d image-XXXXX) - - # Extract image to the temporary directory - _verbose "Extracting content of '$1' to temporary storage" - ${EXTRACT_DOCKER} image save "$1" | tar -C "$TMPD" -xf - - - # Create destination directory, if necessary - if ! [ -d "$dst" ]; then - _verbose "Creating destination directory: '$dst' (resolved from '$EXTRACT_DEST')" - mkdir -p "$dst" - fi - - # Extract all layers of the image, in the order specified by the manifest, - # into the destination directory. - if [ -f "${TMPD}/${EXTRACT_MANIFEST}" ]; then - json_unfold < "${TMPD}/${EXTRACT_MANIFEST}" | - grep -oE '[a-fA-F0-9]{64}/[[:alnum:]]+\.tar' | - while IFS= read -r layer; do - _verbose "Extracting layer $(printf %s\\n "$layer" | awk -F '/' '{print $1}')" - tar -C "$dst" -xf "${TMPD}/${layer}" - done - else - _error "Cannot find $EXTRACT_MANIFEST in image content!" - fi - - # Remove temporary content of image save. - rm -rf "$TMPD" - - if [ "$EXTRACT_EXPORT" = "1" ]; then - # Resolve destination directory to absolute path - rdst=$(cd -P -- "$dst" && pwd -P) - for top in "" /usr /usr/local; do - # Add binaries - for sub in /sbin /bin; do - bdir=${rdst%/}${top%/}${sub} - if [ -d "$bdir" ] \ - && [ "$(find "$bdir" -maxdepth 1 -mindepth 1 -type f -executable | wc -l)" -gt "0" ]; then - if [ -z "${GITHUB_PATH+x}" ]; then - BPATH="${bdir}:${BPATH}" - else - printf %s\\n "$bdir" >> "$GITHUB_PATH" - fi - fi - done - - # Add libraries - for sub in /lib; do - ldir=${rdst%/}${top%/}${sub} - if [ -d "$ldir" ] \ - && [ "$(find "$ldir" -maxdepth 1 -mindepth 1 -type f -executable -name '*.so*'| wc -l)" -gt "0" ]; then - LPATH="${ldir}:${LPATH}" - fi - done - done - fi - else - _error "Image $1 not present at Docker daemon" - fi - - if [ "$imgrm" = "1" ]; then - _verbose "Removing image $1 from host" - ${EXTRACT_DOCKER} image rm "$1" - fi -} - -# We need at least one image -if [ "$#" = "0" ]; then - usage -fi - -# Extract all images, one by one, to the target directory -BPATH=$(printf %s\\n "$PATH" | sed 's/ /\\ /g') -LPATH=$(printf %s\\n "${LD_LIBRARY_PATH:-}" | sed 's/ /\\ /g') -for i in "$@"; do - extract "$i" -done - -if [ "$EXTRACT_EXPORT" = "1" ]; then - if [ -z "${GITHUB_PATH+x}" ]; then - printf "PATH=\"%s\"\n" "$BPATH" - if [ -n "$LPATH" ]; then - printf "LD_LIBRARY_PATH=\"%s\"\n" "$LPATH" - fi - elif [ -n "$LPATH" ]; then - printf "LD_LIBRARY_PATH=\"%s\"\n" "$LPATH" >> "$GITHUB_ENV" - fi -fi