ddb06b6657
A recent(-ish) change in git [1] has exposed a bug in caching that
appears in one very specific circumstance -- updating the
openstack/openstack super-repo [2].
This repo gets a submodule update every time something is pushed. By
using "--git-dir" while the cwd is one-level above the actual repo we
are confusing [1] which is not finding the submodule directories
correctly and giving us an error:
Could not access submodule 'foo'
for every submodule that has updated between now and the last time we
updated the cache. [3]
The git manual does warn about this
If you just want to run git as if it was started in <path> then use
git -C <path>.
Indeed, that is what we want to do in this path. Modify the calls to
use -C.
[1] 505a276596
[2] https://opendev.org/openstack/openstack/
[3] The result for opendev production is that image builds fail every
time an openstack/* project is checked in; we then race to retry
the build before another commit lands and updates the submodules
again.
Change-Id: Iadb23454e29d8869e11407e1592007b0f0963e17
272 lines
12 KiB
Bash
Executable file
272 lines
12 KiB
Bash
Executable file
#!/bin/bash
|
|
|
|
# dib-lint: disable=safe_sudo
|
|
if [ ${DIB_DEBUG_TRACE:-0} -gt 1 ]; then
|
|
set -x
|
|
fi
|
|
set -eu
|
|
set -o pipefail
|
|
|
|
# Don't provide any source repositories in environments where they are not allowed
|
|
if [ -n "${NO_SOURCE_REPOSITORIES:-}" ]; then
|
|
exit 0
|
|
fi
|
|
|
|
# If the old cache exists, move it to the new name
|
|
function make_new_cache(){
|
|
local OLD_CACHE_BASE=$1
|
|
local CACHE_BASE=$2
|
|
|
|
# If the old cache name exists, move it to the new cache name
|
|
if [ -e "$OLD_CACHE_BASE" ] ; then
|
|
if [ ! -e "$CACHE_BASE" ] ; then
|
|
mv -n $OLD_CACHE_BASE $CACHE_BASE
|
|
else
|
|
echo "Not replacing new cache location with old cache"
|
|
fi
|
|
fi
|
|
}
|
|
|
|
# Gets repositories or individual files listed in the a repository file
|
|
# and places them in the specified destination path.
|
|
# The format of the repository file is one or more lines matching
|
|
# <name> <type> <destination> <location> [<ref>]
|
|
function get_repos_for_element(){
|
|
local REPO_SOURCES=$1
|
|
local CACHE_URL=$TMP_HOOKS_PATH/bin/cache-url
|
|
|
|
local REGEX="^([^ ]+) (git|tar|file|package) ?(/[^ ]+)? ?([^ ]+)? ?([^ ]*)$"
|
|
|
|
# this can be a rather long list (e.g. caching ~1000 openstack
|
|
# repos), so we make a rough count for progress display
|
|
local COUNT=1
|
|
local REPO_SOURCES_COUNT=$(grep -v '^$\|^\s*\#' $REPO_SOURCES | wc -l)
|
|
|
|
while read line; do
|
|
|
|
# temporarily turn off globbing '*' (since it may be used as
|
|
# the REPOREF for tarballs) and expand variables
|
|
set -f; line=$(eval echo $line); set +f
|
|
|
|
# ignore blank lines and lines beginning in '#'
|
|
[[ "$line" == \#* ]] || [[ -z "$line" ]] && continue
|
|
|
|
# rough progress counter
|
|
printf "(%04d / %04d)\n" ${COUNT} ${REPO_SOURCES_COUNT}
|
|
COUNT=$(( COUNT + 1))
|
|
|
|
if [[ "$line" =~ $REGEX ]] ; then
|
|
local REPONAME=${BASH_REMATCH[1]}
|
|
local REPOTYPE=${BASH_REMATCH[2]}
|
|
local REPOPATH=${BASH_REMATCH[3]}
|
|
local REPOLOCATION=${BASH_REMATCH[4]}
|
|
local REPO_ORIG_LOCATION=$REPOLOCATION
|
|
|
|
if [ $REPONAME = "tar" -a -z "${BASH_REMATCH[5]:-}" ] ; then
|
|
echo "Warning: Default tarball REPOREF of '*' is deprecated; do not rely on it."
|
|
fi
|
|
|
|
# Default of '*' for tar repositories is deprecated; do not rely on it.
|
|
local REPOREF_DEFAULT_TAR=*
|
|
local REPOREF_DEFAULT_GIT=master
|
|
local REPOREF_LOOKUP_DEFAULT=REPOREF_DEFAULT_${REPOTYPE^^}
|
|
|
|
local REPOREF=${BASH_REMATCH[5]:-${!REPOREF_LOOKUP_DEFAULT:-}}
|
|
|
|
local REPO_DEST=$TMP_MOUNT_PATH$REPOPATH
|
|
local REPO_SUB_DIRECTORY=$(dirname $REPO_DEST)
|
|
|
|
# REPOTYPE can be overridden with DIB_REPOTYPE_{name}
|
|
local REPOTYPE_OVERRIDE=DIB_REPOTYPE_${REPONAME//[^A-Za-z0-9]/_}
|
|
REPOTYPE=${!REPOTYPE_OVERRIDE:-$REPOTYPE}
|
|
|
|
# REPOLOCATION can be overridden with DIB_REPOLOCATION_{name}
|
|
local REPOLOCATION_OVERRIDE=DIB_REPOLOCATION_${REPONAME//[^A-Za-z0-9]/_}
|
|
REPOLOCATION=${!REPOLOCATION_OVERRIDE:-$REPOLOCATION}
|
|
|
|
# REPOREF can be overridden with DIB_REPOREF_{name}
|
|
local REPOREF_OVERRIDE=DIB_REPOREF_${REPONAME//[^A-Za-z0-9]/_}
|
|
REPOREF=${!REPOREF_OVERRIDE:-$REPOREF}
|
|
|
|
# Determine a unique cache path for this repo
|
|
CACHE_NAME=$(echo "${REPOTYPE}_${REPOLOCATION}" | sha1sum | awk '{ print $1 }' )
|
|
OLD_CACHE_PATH=${CACHE_BASE}/${CACHE_NAME}
|
|
# Add the repo name to the sha1sum for readability
|
|
CACHE_NAME=${REPONAME//[^A-Za-z0-9]/_}_${CACHE_NAME}
|
|
CACHE_PATH=${CACHE_BASE}/$CACHE_NAME
|
|
make_new_cache $OLD_CACHE_PATH $CACHE_PATH
|
|
|
|
# Return if install type is not source
|
|
local INSTALL_TYPE_VAR=DIB_INSTALLTYPE_${REPONAME//[^A-Za-z0-9]/_}
|
|
local INSTALL_TYPE=${!INSTALL_TYPE_VAR:-$DIB_DEFAULT_INSTALLTYPE}
|
|
if [ ! $INSTALL_TYPE = "source" ]; then
|
|
echo "$REPONAME install type not set to source"
|
|
continue
|
|
fi
|
|
|
|
case $REPOTYPE in
|
|
git)
|
|
if [ -z "${!REPOLOCATION_OVERRIDE:-""}" -a -n "${DIB_GITREPOBASE:-""}" ] ; then
|
|
# Transform the current repo base to the new one
|
|
local NEW_REPOLOCATION=$(echo $REPOLOCATION |\
|
|
sed "s,^[^:]\+://[^/]\+/\(~[^/]\+\)\?\(.*\)$,${DIB_GITREPOBASE}\2,g")
|
|
echo "Transformed ${REPOLOCATION} to ${NEW_REPOLOCATION}"
|
|
REPOLOCATION=$NEW_REPOLOCATION
|
|
# Also update the cache location
|
|
CACHE_NAME=$(echo "${REPOTYPE}_${REPOLOCATION}" | sha1sum | awk '{ print $1 }' )
|
|
CACHE_PATH=~/.cache/image-create/repository-sources/$CACHE_NAME
|
|
fi
|
|
sudo mkdir -p $REPO_SUB_DIRECTORY
|
|
|
|
if [ ! -e "$CACHE_PATH" ] ; then
|
|
echo "Caching $REPONAME from $REPOLOCATION in $CACHE_PATH"
|
|
git clone -q $REPOLOCATION $CACHE_PATH.tmp
|
|
mv ${CACHE_PATH}{.tmp,}
|
|
fi
|
|
|
|
if [ "$REPOREF" != "*" ] ; then
|
|
HAS_REF=$(git -C ${CACHE_PATH} name-rev $REPOREF 2>/dev/null || true)
|
|
fi
|
|
if [ -z "$DIB_OFFLINE" -o -z "${HAS_REF:-}" ] ; then
|
|
echo "Updating cache of $REPOLOCATION in $CACHE_PATH with ref $REPOREF"
|
|
# Copy named refs (which might be outside the usual heads
|
|
# pattern) - e.g. gerrit
|
|
if [ "$REPOREF" == "*" ] || ! git -C ${CACHE_PATH} fetch -q --prune --update-head-ok $REPOLOCATION \
|
|
+${REPOREF}:${REPOREF} ; then
|
|
# Copy all heads from the remote repository - this permits
|
|
# using a SHA1 object reference so long as the object
|
|
# reference is reachable from one of the heads. git does
|
|
# not permit arbitrary sha fetching from remote servers.
|
|
# This is a separate fetch to the prior one as the prior
|
|
# one will fail when REPOREF is a SHA1.
|
|
git -C ${CACHE_PATH} fetch -q --prune --update-head-ok $REPOLOCATION \
|
|
+refs/heads/*:refs/heads/* +refs/tags/*:refs/tags/*
|
|
fi
|
|
# Ensure that we have a reference to the revision.
|
|
if [ "$REPOREF" != "*" ] ; then
|
|
if ! git -C ${CACHE_PATH} rev-parse -q --verify $REPOREF^{commit} > /dev/null; then
|
|
echo "Failed to find reference to $REPOREF"
|
|
exit 1
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
echo "Cloning from $REPONAME cache and applying ref $REPOREF"
|
|
# If the local dir is already used, see if the pertinent details differ
|
|
if [[ -d $REPO_DEST ]]; then
|
|
DESIRED="$(sudo git -C ${REPO_DEST} config remote.origin.url)"
|
|
if [[ "$CACHE_PATH" != "$DESIRED" ]]; then
|
|
echo "REPOLOCATIONS don't match ("$CACHE_PATH" != "$DESIRED")" >&2
|
|
exit 1
|
|
elif [[ "$REPOREF" != "*" ]]; then
|
|
pushd $REPO_DEST > /dev/null
|
|
# When we first clone we create a branch naming what we fetched
|
|
# that must match, or we are asking for two different references from the
|
|
# same repo, which is an error
|
|
if ! git rev-parse fetch_$REPOREF; then
|
|
echo "REPOREFS don't match - failed to get sha1 of fetch_$REPOREF" >&2
|
|
exit 1
|
|
fi
|
|
popd > /dev/null
|
|
fi
|
|
else
|
|
sudo git clone -q $CACHE_PATH $REPO_DEST
|
|
pushd $REPO_DEST > /dev/null
|
|
if [[ "$REPOREF" == "*" ]]; then
|
|
sudo git fetch -q --prune --update-head-ok $CACHE_PATH \
|
|
+refs/heads/*:refs/heads/* +refs/tags/*:refs/tags/*
|
|
git_sha=$(git rev-parse HEAD)
|
|
else
|
|
sudo git fetch -q $CACHE_PATH $REPOREF:fetch_$REPOREF
|
|
sudo git reset --hard FETCH_HEAD
|
|
# Get the sha in use
|
|
git_sha=$(git rev-parse FETCH_HEAD)
|
|
fi
|
|
popd > /dev/null
|
|
|
|
# Write the sha being used into the source-repositories manifest
|
|
echo "$REPONAME git $REPOPATH $REPOLOCATION $git_sha" >> $GIT_MANIFEST
|
|
fi
|
|
;;
|
|
tar)
|
|
# The top level directory of the tarball mightn't have a fixed name i.e.
|
|
# it could contain version numbers etc... so we write it to a tmpdir
|
|
# for inspection before transferring the contents into the target directory
|
|
local tmpdir=$(mktemp --tmpdir=$TMP_MOUNT_PATH/tmp -d)
|
|
if [ -n "$CACHE_PATH" ] ; then
|
|
echo "Caching $REPONAME tarball from $REPOLOCATION in $CACHE_PATH"
|
|
if [ ! -f "$CACHE_PATH" -o -z "$DIB_OFFLINE" ] ; then
|
|
$CACHE_URL $REPOLOCATION $CACHE_PATH
|
|
fi
|
|
tar -C $tmpdir -xzf $CACHE_PATH
|
|
else
|
|
echo "Fetching $REPONAME tarball from $REPOLOCATION"
|
|
curl -f $REPOLOCATION | tar -C $tmpdir -xzf -
|
|
fi
|
|
|
|
sudo mkdir -p $REPO_DEST
|
|
|
|
# A REPOREF of '.' will select the entire contents of the tarball,
|
|
# while '*' will select only the contents of its subdirectories.
|
|
sudo rsync -a --remove-source-files $tmpdir/$REPOREF/. $REPO_DEST
|
|
|
|
rm -rf $tmpdir
|
|
|
|
;;
|
|
file)
|
|
sudo mkdir -p $REPO_SUB_DIRECTORY
|
|
if [ -n "$CACHE_PATH" ] ; then
|
|
echo "Caching $REPONAME file from $REPOLOCATION in $CACHE_PATH"
|
|
if [ ! -f "$CACHE_PATH" -o -z "$DIB_OFFLINE" ] ; then
|
|
$CACHE_URL $REPOLOCATION $CACHE_PATH
|
|
fi
|
|
sudo cp $CACHE_PATH $REPO_DEST
|
|
else
|
|
echo "Fetching $REPONAME file from $REPOLOCATION"
|
|
sudo curl -f $REPOLOCATION -o $REPO_DEST
|
|
fi
|
|
;;
|
|
*)
|
|
echo "Unsupported repository type: $REPOTYPE"
|
|
return 1
|
|
;;
|
|
esac
|
|
|
|
# Capture the in-instance repository path for later review / other
|
|
# elements (like a pypi dependency cache).
|
|
echo "$REPOPATH" | sudo tee -a $TMP_MOUNT_PATH/etc/dib-source-repositories > /dev/null
|
|
|
|
else
|
|
echo "Couldn't parse '$line' as a source repository"
|
|
return 1
|
|
fi
|
|
done < $REPO_SOURCES
|
|
}
|
|
|
|
CACHE_BASE=$DIB_IMAGE_CACHE/source-repositories
|
|
OLD_CACHE_BASE=$DIB_IMAGE_CACHE/repository-sources
|
|
make_new_cache $OLD_CACHE_BASE $CACHE_BASE
|
|
mkdir -p $CACHE_BASE
|
|
# Use the IMAGE_NAME from the calling script, and make it unique with the temporary path
|
|
GIT_MANIFEST_NAME=dib-manifest-git-$(basename ${IMAGE_NAME})
|
|
GIT_MANIFEST_CACHE_NAME=${GIT_MANIFEST_NAME}_$(dirname ${TMP_MOUNT_PATH##*.})
|
|
GIT_MANIFEST=$CACHE_BASE/${GIT_MANIFEST_CACHE_NAME}
|
|
rm -f $GIT_MANIFEST
|
|
|
|
# Get source repositories for the target
|
|
for _SOURCEREPO in $(find $TMP_HOOKS_PATH -maxdepth 1 -name "source-repository-*" -not -name '*~'); do
|
|
repo_flock=$CACHE_BASE/repositories_flock
|
|
(
|
|
echo "Getting $repo_flock: $(date) for $_SOURCEREPO"
|
|
if ! flock -w 600 9 ; then
|
|
echo "Did not get $repo_flock: $(date)"
|
|
exit 1
|
|
fi
|
|
get_repos_for_element $_SOURCEREPO
|
|
) 9> $repo_flock
|
|
done
|
|
|
|
# Copy the manifest into the image if it exists (there may be no git repositories used)
|
|
if [ -e "$GIT_MANIFEST" ] ; then
|
|
sudo mv $GIT_MANIFEST $TMP_MOUNT_PATH/${DIB_MANIFEST_IMAGE_DIR}/$GIT_MANIFEST_NAME
|
|
fi
|