diskimage-builder/elements/source-repositories/extra-data.d/98-source-repositories
Ian Wienand 8c837409b8 Turn down tracing for source-repo cache
If you check logs like [1] it's literally thousands of lines of the
same thing over-and-over as the git caching happens.  It is basically
all just noise unless you're debugging it specifically.  Up this to
tracing level 2 ("-x -x") to see it.  Add a note in the help about
multiple flags, which has always been intended but not documented.

Image builds should continue to run with single "-x", but we could
probably greatly increase signal:noise ratio in the logs with a little
more judicial use of this to turn down some of the very noisy &
repetitive parts.

[1] anything in http://nodepool.openstack.org/

Change-Id: I91c5e55814ba9840769357261d203f4850e2eba6
2016-03-22 09:57:30 +11:00

264 lines
12 KiB
Bash
Executable File

#!/bin/bash
if [ ${DIB_DEBUG_TRACE:-0} -gt 1 ]; then
set -x
fi
set -eu
set -o pipefail
# Don't provide any source repositories in environments where they are not allowed
if [ -n "${NO_SOURCE_REPOSITORIES:-}" ]; then
exit 0
fi
# If the old cache exists, move it to the new name
function make_new_cache(){
local OLD_CACHE_BASE=$1
local CACHE_BASE=$2
# If the old cache name exists, move it to the new cache name
if [ -e "$OLD_CACHE_BASE" ] ; then
if [ ! -e "$CACHE_BASE" ] ; then
mv -n $OLD_CACHE_BASE $CACHE_BASE
else
echo "Not replacing new cache location with old cache"
fi
fi
}
# Gets repositories or individual files listed in the a repository file
# and places them in the specified destination path.
# The format of the repository file is one or more lines matching
# <name> <type> <destination> <location> [<ref>]
function get_repos_for_element(){
local REPO_SOURCES=$1
local CACHE_URL=$TMP_HOOKS_PATH/bin/cache-url
local REGEX="^([^ ]+) (git|tar|file|package) ?(/[^ ]+)? ?([^ ]+)? ?([^ ]*)$"
while read line; do
# temporarily turn off globbing '*' (since it may be used as the REPOREF for tarballs)
set -f
# expand variables
line=$(eval echo $line)
# restore globbing
set +f
# ignore blank lines and lines beginning in '#'
[[ "$line" == \#* ]] || [[ -z "$line" ]] && continue
if [[ "$line" =~ $REGEX ]] ; then
local REPONAME=${BASH_REMATCH[1]}
local REPOTYPE=${BASH_REMATCH[2]}
local REPOPATH=${BASH_REMATCH[3]}
local REPOLOCATION=${BASH_REMATCH[4]}
local REPO_ORIG_LOCATION=$REPOLOCATION
if [ $REPONAME = "tar" -a -z "${BASH_REMATCH[5]:-}" ] ; then
echo "Warning: Default tarball REPOREF of '*' is deprecated; do not rely on it."
fi
# Default of '*' for tar repositories is deprecated; do not rely on it.
local REPOREF_DEFAULT_TAR=*
local REPOREF_DEFAULT_GIT=master
local REPOREF_LOOKUP_DEFAULT=REPOREF_DEFAULT_${REPOTYPE^^}
local REPOREF=${BASH_REMATCH[5]:-${!REPOREF_LOOKUP_DEFAULT:-}}
local REPO_DEST=$TMP_MOUNT_PATH$REPOPATH
local REPO_SUB_DIRECTORY=$(dirname $REPO_DEST)
# REPOTYPE can be overridden with DIB_REPOTYPE_{name}
local REPOTYPE_OVERRIDE=DIB_REPOTYPE_${REPONAME//[^A-Za-z0-9]/_}
REPOTYPE=${!REPOTYPE_OVERRIDE:-$REPOTYPE}
# REPOLOCATION can be overridden with DIB_REPOLOCATION_{name}
local REPOLOCATION_OVERRIDE=DIB_REPOLOCATION_${REPONAME//[^A-Za-z0-9]/_}
REPOLOCATION=${!REPOLOCATION_OVERRIDE:-$REPOLOCATION}
# REPOREF can be overridden with DIB_REPOREF_{name}
local REPOREF_OVERRIDE=DIB_REPOREF_${REPONAME//[^A-Za-z0-9]/_}
REPOREF=${!REPOREF_OVERRIDE:-$REPOREF}
# Determine a unique cache path for this repo
CACHE_NAME=$(echo "${REPOTYPE}_${REPOLOCATION}" | sha1sum | awk '{ print $1 }' )
OLD_CACHE_PATH=${CACHE_BASE}/${CACHE_NAME}
# Add the repo name to the sha1sum for readability
CACHE_NAME=${REPONAME//[^A-Za-z0-9]/_}_${CACHE_NAME}
CACHE_PATH=${CACHE_BASE}/$CACHE_NAME
make_new_cache $OLD_CACHE_PATH $CACHE_PATH
# Return if install type is not source
local INSTALL_TYPE_VAR=DIB_INSTALLTYPE_${REPONAME//[^A-Za-z0-9]/_}
local INSTALL_TYPE=${!INSTALL_TYPE_VAR:-$DIB_DEFAULT_INSTALLTYPE}
if [ ! $INSTALL_TYPE = "source" ]; then
echo "$REPONAME install type not set to source"
continue
fi
case $REPOTYPE in
git)
if [ -z "${!REPOLOCATION_OVERRIDE:-""}" -a -n "${DIB_GITREPOBASE:-""}" ] ; then
# Transform the current repo base to the new one
local NEW_REPOLOCATION=$(echo $REPOLOCATION |\
sed "s,^[^:]\+://[^/]\+/\(~[^/]\+\)\?\(.*\)$,${DIB_GITREPOBASE}\2,g")
echo "Transformed ${REPOLOCATION} to ${NEW_REPOLOCATION}"
REPOLOCATION=$NEW_REPOLOCATION
# Also update the cache location
CACHE_NAME=$(echo "${REPOTYPE}_${REPOLOCATION}" | sha1sum | awk '{ print $1 }' )
CACHE_PATH=~/.cache/image-create/repository-sources/$CACHE_NAME
fi
sudo mkdir -p $REPO_SUB_DIRECTORY
if [ ! -e "$CACHE_PATH" ] ; then
echo "Caching $REPONAME from $REPOLOCATION in $CACHE_PATH"
git clone $REPOLOCATION $CACHE_PATH.tmp
mv ${CACHE_PATH}{.tmp,}
fi
if [ "$REPOREF" != "*" ] ; then
HAS_REF=$(git --git-dir=$CACHE_PATH/.git name-rev $REPOREF 2>/dev/null || true)
fi
if [ -z "$DIB_OFFLINE" -o -z "$HAS_REF" ] ; then
echo "Updating cache of $REPOLOCATION in $CACHE_PATH with ref $REPOREF"
# Copy named refs (which might be outside the usual heads
# pattern) - e.g. gerrit
if [ "$REPOREF" == "*" ] || ! git --git-dir=$CACHE_PATH/.git fetch --prune --update-head-ok $REPOLOCATION \
+${REPOREF}:${REPOREF} ; then
# Copy all heads from the remote repository - this permits
# using a SHA1 object reference so long as the object
# reference is reachable from one of the heads. git does
# not permit arbitrary sha fetching from remote servers.
# This is a separate fetch to the prior one as the prior
# one will fail when REPOREF is a SHA1.
git --git-dir=$CACHE_PATH/.git fetch --prune --update-head-ok $REPOLOCATION \
+refs/heads/*:refs/heads/* +refs/tags/*:refs/tags/*
fi
# Ensure that we have a reference to the revision.
if [ "$REPOREF" != "*" ] ; then
git --git-dir=$CACHE_PATH/.git rev-parse -q --verify $REPOREF^{commit}
fi
fi
echo "Cloning from $REPONAME cache and applying ref $REPOREF"
# If the local dir is already used, see if the pertinent details differ
if [[ -d $REPO_DEST ]]; then
DESIRED="$(sudo git --git-dir=$REPO_DEST/.git config remote.origin.url)"
if [[ "$CACHE_PATH" != "$DESIRED" ]]; then
echo "REPOLOCATIONS don't match ("$CACHE_PATH" != "$DESIRED")" >&2
exit 1
elif [[ "$REPOREF" != "*" ]]; then
pushd $REPO_DEST
# When we first clone we create a branch naming what we fetched
# that must match, or we are asking for two different references from the
# same repo, which is an error
if ! git rev-parse fetch_$REPOREF; then
echo "REPOREFS don't match - failed to get sha1 of fetch_$REPOREF" >&2
exit 1
fi
popd
fi
else
sudo git clone $CACHE_PATH $REPO_DEST
pushd $REPO_DEST
if [[ "$REPOREF" == "*" ]]; then
sudo git fetch --prune --update-head-ok $CACHE_PATH \
+refs/heads/*:refs/heads/* +refs/tags/*:refs/tags/*
git_sha=$(git rev-parse HEAD)
else
sudo git fetch $CACHE_PATH $REPOREF:fetch_$REPOREF
sudo git reset --hard FETCH_HEAD
# Get the sha in use
git_sha=$(git rev-parse FETCH_HEAD)
fi
popd
# Write the sha being used into the source-repositories manifest
echo "$REPONAME git $REPOPATH $REPOLOCATION $git_sha" >> $GIT_MANIFEST
fi
;;
tar)
# The top level directory of the tarball mightn't have a fixed name i.e.
# it could contain version numbers etc... so we write it to a tmpdir
# for inspection before transferring the contents into the target directory
local tmpdir=$(mktemp --tmpdir=$TMP_MOUNT_PATH/tmp -d)
if [ -n "$CACHE_PATH" ] ; then
echo "Caching $REPONAME tarball from $REPOLOCATION in $CACHE_PATH"
if [ ! -f "$CACHE_PATH" -o -z "$DIB_OFFLINE" ] ; then
$CACHE_URL $REPOLOCATION $CACHE_PATH
fi
tar -C $tmpdir -xzf $CACHE_PATH
else
echo "Fetching $REPONAME tarball from $REPOLOCATION"
curl $REPOLOCATION | tar -C $tmpdir -xzf -
fi
sudo mkdir -p $REPO_DEST
# A REPOREF of '.' will select the entire contents of the tarball,
# while '*' will select only the contents of its subdirectories.
sudo rsync -a --remove-source-files $tmpdir/$REPOREF/. $REPO_DEST
rm -rf $tmpdir
;;
file)
sudo mkdir -p $REPO_SUB_DIRECTORY
if [ -n "$CACHE_PATH" ] ; then
echo "Caching $REPONAME file from $REPOLOCATION in $CACHE_PATH"
if [ ! -f "$CACHE_PATH" -o -z "$DIB_OFFLINE" ] ; then
$CACHE_URL $REPOLOCATION $CACHE_PATH
fi
sudo cp $CACHE_PATH $REPO_DEST
else
echo "Fetching $REPONAME file from $REPOLOCATION"
sudo curl $REPOLOCATION -o $REPO_DEST
fi
;;
*)
echo "Unsupported repository type: $REPOTYPE"
return 1
;;
esac
# Capture the in-instance repository path for later review / other
# elements (like a pypi dependency cache).
echo "$REPOPATH" | sudo dd of=$TMP_MOUNT_PATH/etc/dib-source-repositories oflag=append conv=notrunc
else
echo "Couldn't parse '$line' as a source repository"
return 1
fi
done < $REPO_SOURCES
}
CACHE_BASE=$DIB_IMAGE_CACHE/source-repositories
OLD_CACHE_BASE=$DIB_IMAGE_CACHE/repository-sources
make_new_cache $OLD_CACHE_BASE $CACHE_BASE
mkdir -p $CACHE_BASE
# Use the IMAGE_NAME from the calling script, and make it unique with the temporary path
GIT_MANIFEST_NAME=dib-manifest-git-$(basename ${IMAGE_NAME})
GIT_MANIFEST_CACHE_NAME=${GIT_MANIFEST_NAME}_$(dirname ${TMP_MOUNT_PATH##*.})
GIT_MANIFEST=$CACHE_BASE/${GIT_MANIFEST_CACHE_NAME}
rm -f $GIT_MANIFEST
# Get source repositories for the target
for _SOURCEREPO in $(find $TMP_HOOKS_PATH -maxdepth 1 -name "source-repository-*" -not -name '*~'); do
repo_flock=$CACHE_BASE/repositories_flock
(
echo "Getting $repo_flock: $(date)"
if ! flock -w 600 9 ; then
echo "Did not get $repo_flock: $(date)"
exit 1
fi
get_repos_for_element $_SOURCEREPO
) 9> $repo_flock
done
# Copy the manifest into the image if it exists (there may be no git repositories used)
if [ -e "$GIT_MANIFEST" ] ; then
sudo mv $GIT_MANIFEST $TMP_MOUNT_PATH/${DIB_MANIFEST_IMAGE_DIR}/$GIT_MANIFEST_NAME
fi