diskimage-builder/elements/source-repositories/extra-data.d/98-source-repositories
Ian Wienand 74d0185dde Cleanup source-repositories output
This element takes up a *huge* part of the logs of openstack-infra
builds as we go and cache every git tree.

This silences most of the noise which will reduce the mess
in the logs considerably.

Note that we've had logging on this turned down since
I91c5e55814ba9840769357261d203f4850e2eba6 but it has been
ineffective in stopping the log-spew output, see the
dependency change.

Change-Id: I60f06f84d57087c82b3907575bff125015d35171
Depends-On: I1e39822f218dc0322e2490a770f3dc867a55802c
2016-06-06 15:57:39 +10:00

272 lines
12 KiB
Bash
Executable File

#!/bin/bash
if [ ${DIB_DEBUG_TRACE:-0} -gt 1 ]; then
set -x
fi
set -eu
set -o pipefail
# Don't provide any source repositories in environments where they are not allowed
if [ -n "${NO_SOURCE_REPOSITORIES:-}" ]; then
exit 0
fi
# If the old cache exists, move it to the new name
function make_new_cache(){
local OLD_CACHE_BASE=$1
local CACHE_BASE=$2
# If the old cache name exists, move it to the new cache name
if [ -e "$OLD_CACHE_BASE" ] ; then
if [ ! -e "$CACHE_BASE" ] ; then
mv -n $OLD_CACHE_BASE $CACHE_BASE
else
echo "Not replacing new cache location with old cache"
fi
fi
}
# Gets repositories or individual files listed in the a repository file
# and places them in the specified destination path.
# The format of the repository file is one or more lines matching
# <name> <type> <destination> <location> [<ref>]
function get_repos_for_element(){
local REPO_SOURCES=$1
local CACHE_URL=$TMP_HOOKS_PATH/bin/cache-url
local REGEX="^([^ ]+) (git|tar|file|package) ?(/[^ ]+)? ?([^ ]+)? ?([^ ]*)$"
# this can be a rather long list (e.g. caching ~1000 openstack
# repos), so we make a rough count for progress display
local COUNT=1
local REPO_SOURCES_COUNT=$(grep -v '^$\|^\s*\#' $REPO_SOURCES | wc -l)
while read line; do
# temporarily turn off globbing '*' (since it may be used as
# the REPOREF for tarballs) and expand variables
set -f; line=$(eval echo $line); set +f
# ignore blank lines and lines beginning in '#'
[[ "$line" == \#* ]] || [[ -z "$line" ]] && continue
# rough progress counter
printf "(%04d / %04d)\n" ${COUNT} ${REPO_SOURCES_COUNT}
COUNT=$(( COUNT + 1))
if [[ "$line" =~ $REGEX ]] ; then
local REPONAME=${BASH_REMATCH[1]}
local REPOTYPE=${BASH_REMATCH[2]}
local REPOPATH=${BASH_REMATCH[3]}
local REPOLOCATION=${BASH_REMATCH[4]}
local REPO_ORIG_LOCATION=$REPOLOCATION
if [ $REPONAME = "tar" -a -z "${BASH_REMATCH[5]:-}" ] ; then
echo "Warning: Default tarball REPOREF of '*' is deprecated; do not rely on it."
fi
# Default of '*' for tar repositories is deprecated; do not rely on it.
local REPOREF_DEFAULT_TAR=*
local REPOREF_DEFAULT_GIT=master
local REPOREF_LOOKUP_DEFAULT=REPOREF_DEFAULT_${REPOTYPE^^}
local REPOREF=${BASH_REMATCH[5]:-${!REPOREF_LOOKUP_DEFAULT:-}}
local REPO_DEST=$TMP_MOUNT_PATH$REPOPATH
local REPO_SUB_DIRECTORY=$(dirname $REPO_DEST)
# REPOTYPE can be overridden with DIB_REPOTYPE_{name}
local REPOTYPE_OVERRIDE=DIB_REPOTYPE_${REPONAME//[^A-Za-z0-9]/_}
REPOTYPE=${!REPOTYPE_OVERRIDE:-$REPOTYPE}
# REPOLOCATION can be overridden with DIB_REPOLOCATION_{name}
local REPOLOCATION_OVERRIDE=DIB_REPOLOCATION_${REPONAME//[^A-Za-z0-9]/_}
REPOLOCATION=${!REPOLOCATION_OVERRIDE:-$REPOLOCATION}
# REPOREF can be overridden with DIB_REPOREF_{name}
local REPOREF_OVERRIDE=DIB_REPOREF_${REPONAME//[^A-Za-z0-9]/_}
REPOREF=${!REPOREF_OVERRIDE:-$REPOREF}
# Determine a unique cache path for this repo
CACHE_NAME=$(echo "${REPOTYPE}_${REPOLOCATION}" | sha1sum | awk '{ print $1 }' )
OLD_CACHE_PATH=${CACHE_BASE}/${CACHE_NAME}
# Add the repo name to the sha1sum for readability
CACHE_NAME=${REPONAME//[^A-Za-z0-9]/_}_${CACHE_NAME}
CACHE_PATH=${CACHE_BASE}/$CACHE_NAME
make_new_cache $OLD_CACHE_PATH $CACHE_PATH
# Return if install type is not source
local INSTALL_TYPE_VAR=DIB_INSTALLTYPE_${REPONAME//[^A-Za-z0-9]/_}
local INSTALL_TYPE=${!INSTALL_TYPE_VAR:-$DIB_DEFAULT_INSTALLTYPE}
if [ ! $INSTALL_TYPE = "source" ]; then
echo "$REPONAME install type not set to source"
continue
fi
case $REPOTYPE in
git)
if [ -z "${!REPOLOCATION_OVERRIDE:-""}" -a -n "${DIB_GITREPOBASE:-""}" ] ; then
# Transform the current repo base to the new one
local NEW_REPOLOCATION=$(echo $REPOLOCATION |\
sed "s,^[^:]\+://[^/]\+/\(~[^/]\+\)\?\(.*\)$,${DIB_GITREPOBASE}\2,g")
echo "Transformed ${REPOLOCATION} to ${NEW_REPOLOCATION}"
REPOLOCATION=$NEW_REPOLOCATION
# Also update the cache location
CACHE_NAME=$(echo "${REPOTYPE}_${REPOLOCATION}" | sha1sum | awk '{ print $1 }' )
CACHE_PATH=~/.cache/image-create/repository-sources/$CACHE_NAME
fi
sudo mkdir -p $REPO_SUB_DIRECTORY
if [ ! -e "$CACHE_PATH" ] ; then
echo "Caching $REPONAME from $REPOLOCATION in $CACHE_PATH"
git clone -q $REPOLOCATION $CACHE_PATH.tmp
mv ${CACHE_PATH}{.tmp,}
fi
if [ "$REPOREF" != "*" ] ; then
HAS_REF=$(git --git-dir=$CACHE_PATH/.git name-rev $REPOREF 2>/dev/null || true)
fi
if [ -z "$DIB_OFFLINE" -o -z "$HAS_REF" ] ; then
echo "Updating cache of $REPOLOCATION in $CACHE_PATH with ref $REPOREF"
# Copy named refs (which might be outside the usual heads
# pattern) - e.g. gerrit
if [ "$REPOREF" == "*" ] || ! git --git-dir=$CACHE_PATH/.git fetch -q --prune --update-head-ok $REPOLOCATION \
+${REPOREF}:${REPOREF} ; then
# Copy all heads from the remote repository - this permits
# using a SHA1 object reference so long as the object
# reference is reachable from one of the heads. git does
# not permit arbitrary sha fetching from remote servers.
# This is a separate fetch to the prior one as the prior
# one will fail when REPOREF is a SHA1.
git --git-dir=$CACHE_PATH/.git fetch -q --prune --update-head-ok $REPOLOCATION \
+refs/heads/*:refs/heads/* +refs/tags/*:refs/tags/*
fi
# Ensure that we have a reference to the revision.
if [ "$REPOREF" != "*" ] ; then
if ! git --git-dir=$CACHE_PATH/.git rev-parse -q --verify $REPOREF^{commit} > /dev/null; then
echo "Failed to find reference to $REPOREF"
exit 1
fi
fi
fi
echo "Cloning from $REPONAME cache and applying ref $REPOREF"
# If the local dir is already used, see if the pertinent details differ
if [[ -d $REPO_DEST ]]; then
DESIRED="$(sudo git --git-dir=$REPO_DEST/.git config remote.origin.url)"
if [[ "$CACHE_PATH" != "$DESIRED" ]]; then
echo "REPOLOCATIONS don't match ("$CACHE_PATH" != "$DESIRED")" >&2
exit 1
elif [[ "$REPOREF" != "*" ]]; then
pushd $REPO_DEST > /dev/null
# When we first clone we create a branch naming what we fetched
# that must match, or we are asking for two different references from the
# same repo, which is an error
if ! git rev-parse fetch_$REPOREF; then
echo "REPOREFS don't match - failed to get sha1 of fetch_$REPOREF" >&2
exit 1
fi
popd > /dev/null
fi
else
sudo git clone -q $CACHE_PATH $REPO_DEST
pushd $REPO_DEST > /dev/null
if [[ "$REPOREF" == "*" ]]; then
sudo git fetch -q --prune --update-head-ok $CACHE_PATH \
+refs/heads/*:refs/heads/* +refs/tags/*:refs/tags/*
git_sha=$(git rev-parse HEAD)
else
sudo git fetch -q $CACHE_PATH $REPOREF:fetch_$REPOREF
sudo git reset --hard FETCH_HEAD
# Get the sha in use
git_sha=$(git rev-parse FETCH_HEAD)
fi
popd > /dev/null
# Write the sha being used into the source-repositories manifest
echo "$REPONAME git $REPOPATH $REPOLOCATION $git_sha" >> $GIT_MANIFEST
fi
;;
tar)
# The top level directory of the tarball mightn't have a fixed name i.e.
# it could contain version numbers etc... so we write it to a tmpdir
# for inspection before transferring the contents into the target directory
local tmpdir=$(mktemp --tmpdir=$TMP_MOUNT_PATH/tmp -d)
if [ -n "$CACHE_PATH" ] ; then
echo "Caching $REPONAME tarball from $REPOLOCATION in $CACHE_PATH"
if [ ! -f "$CACHE_PATH" -o -z "$DIB_OFFLINE" ] ; then
$CACHE_URL $REPOLOCATION $CACHE_PATH
fi
tar -C $tmpdir -xzf $CACHE_PATH
else
echo "Fetching $REPONAME tarball from $REPOLOCATION"
curl $REPOLOCATION | tar -C $tmpdir -xzf -
fi
sudo mkdir -p $REPO_DEST
# A REPOREF of '.' will select the entire contents of the tarball,
# while '*' will select only the contents of its subdirectories.
sudo rsync -a --remove-source-files $tmpdir/$REPOREF/. $REPO_DEST
rm -rf $tmpdir
;;
file)
sudo mkdir -p $REPO_SUB_DIRECTORY
if [ -n "$CACHE_PATH" ] ; then
echo "Caching $REPONAME file from $REPOLOCATION in $CACHE_PATH"
if [ ! -f "$CACHE_PATH" -o -z "$DIB_OFFLINE" ] ; then
$CACHE_URL $REPOLOCATION $CACHE_PATH
fi
sudo cp $CACHE_PATH $REPO_DEST
else
echo "Fetching $REPONAME file from $REPOLOCATION"
sudo curl $REPOLOCATION -o $REPO_DEST
fi
;;
*)
echo "Unsupported repository type: $REPOTYPE"
return 1
;;
esac
# Capture the in-instance repository path for later review / other
# elements (like a pypi dependency cache).
echo "$REPOPATH" | sudo tee -a $TMP_MOUNT_PATH/etc/dib-source-repositories > /dev/null
else
echo "Couldn't parse '$line' as a source repository"
return 1
fi
done < $REPO_SOURCES
}
CACHE_BASE=$DIB_IMAGE_CACHE/source-repositories
OLD_CACHE_BASE=$DIB_IMAGE_CACHE/repository-sources
make_new_cache $OLD_CACHE_BASE $CACHE_BASE
mkdir -p $CACHE_BASE
# Use the IMAGE_NAME from the calling script, and make it unique with the temporary path
GIT_MANIFEST_NAME=dib-manifest-git-$(basename ${IMAGE_NAME})
GIT_MANIFEST_CACHE_NAME=${GIT_MANIFEST_NAME}_$(dirname ${TMP_MOUNT_PATH##*.})
GIT_MANIFEST=$CACHE_BASE/${GIT_MANIFEST_CACHE_NAME}
rm -f $GIT_MANIFEST
# Get source repositories for the target
for _SOURCEREPO in $(find $TMP_HOOKS_PATH -maxdepth 1 -name "source-repository-*" -not -name '*~'); do
repo_flock=$CACHE_BASE/repositories_flock
(
echo "Getting $repo_flock: $(date) for $_SOURCEREPO"
if ! flock -w 600 9 ; then
echo "Did not get $repo_flock: $(date)"
exit 1
fi
get_repos_for_element $_SOURCEREPO
) 9> $repo_flock
done
# Copy the manifest into the image if it exists (there may be no git repositories used)
if [ -e "$GIT_MANIFEST" ] ; then
sudo mv $GIT_MANIFEST $TMP_MOUNT_PATH/${DIB_MANIFEST_IMAGE_DIR}/$GIT_MANIFEST_NAME
fi