blob: d49016e90cd4c51e217640701668c49da636937d [file] [log] [blame]
#!/usr/bin/env bash
# NOTE: if the output of this backend has to change (e.g. we change what gets
# included in the archive (e.g. LFS), or we change the format of the archive
# (e.g. tar options, compression ratio or method)), we MUST update the format
# version in the variable BR_FMT_VERSION_git, in package/pkg-download.mk.
# We want to catch any unexpected failure
set -e
# Download helper for git, to be called from the download wrapper script
#
# Options:
# -q Be quiet.
# -r Clone and archive sub-modules.
# -o FILE Generate archive in FILE.
# -u URI Clone from repository at URI.
# -c CSET Use changeset CSET.
# -n NAME Use basename NAME.
#
# Environment:
# GIT : the git command to call
# shellcheck disable=SC1090 # Only provides mk_tar_gz()
. "${0%/*}/helpers"
# Save our path and options in case we need to call ourselves again
myname="${0}"
declare -a OPTS=("${@}")
# This function is called when an error occurs. Its job is to attempt a
# clone from scratch (only once!) in case the git tree is borked, or in
# case an unexpected and unsupported situation arises with submodules
# or uncommitted stuff (e.g. if the user manually mucked around in the
# git cache).
_on_error() {
local ret=${?}
printf "Detected a corrupted git cache.\n" >&2
if ${BR_GIT_BACKEND_FIRST_FAULT:-false}; then
printf "This is the second time in a row; bailing out\n" >&2
exit ${ret}
fi
export BR_GIT_BACKEND_FIRST_FAULT=true
printf "Removing it and starting afresh.\n" >&2
popd >/dev/null
rm -rf "${git_cache}"
exec "${myname}" "${OPTS[@]}" || exit ${ret}
}
quiet=
large_file=0
recurse=0
while getopts "${BR_BACKEND_DL_GETOPTS}" OPT; do
case "${OPT}" in
q) quiet=-q; exec >/dev/null;;
l) large_file=1;;
r) recurse=1;;
o) output="${OPTARG}";;
u) uri="${OPTARG}";;
c) cset="${OPTARG}";;
d) dl_dir="${OPTARG}";;
n) basename="${OPTARG}";;
:) printf "option '%s' expects a mandatory argument\n" "${OPTARG}"; exit 1;;
\?) printf "unknown option '%s'\n" "${OPTARG}" >&2; exit 1;;
esac
done
shift $((OPTIND-1)) # Get rid of our options
# Create and cd into the directory that will contain the local git cache
git_cache="${dl_dir}/git"
mkdir -p "${git_cache}"
pushd "${git_cache}" >/dev/null
# Any error now should try to recover
trap _on_error ERR
set -E
# Caller needs to single-quote its arguments to prevent them from
# being expanded a second time (in case there are spaces in them)
_git() {
if [ -z "${quiet}" ]; then
printf '%s ' GIT_DIR="${git_cache}/.git" "${GIT}" "${@}"; printf '\n'
fi
_plain_git "$@"
}
# Note: please keep command below aligned with what is printed above
_plain_git() {
# shellcheck disable=SC2086 # We want word-splitting for GIT
eval GIT_DIR="${git_cache}/.git" ${GIT} "${@}"
}
# Create a warning file, that the user should not use the git cache.
# It's ours. Our precious.
cat <<-_EOF_ >"${dl_dir}/git.readme"
IMPORTANT NOTE!
The git tree located in this directory is for the exclusive use
by Buildroot, which uses it as a local cache to reduce bandwidth
usage.
Buildroot *will* trash any changes in that tree whenever it needs
to use it. Buildroot may even remove it in case it detects the
repository may have been damaged or corrupted.
Do *not* work in that directory; your changes will eventually get
lost. Do *not* even use it as a remote, or as the source for new
worktrees; your commits will eventually get lost.
_EOF_
# Initialise a repository in the git cache. If the repository already
# existed, this is a noop, unless the repository was broken, in which
# case this magically restores it to working conditions. In the latter
# case, we might be missing blobs, but that's not a problem: we'll
# fetch what we need later anyway.
#
# We can still go through the wrapper, because 'init' does not use the
# path pointed to by GIT_DIR, but really uses the directory passed as
# argument.
_git init .
# Ensure the repo has an origin (in case a previous run was killed).
if ! _plain_git remote |grep -q -E '^origin$'; then
_git remote add origin "'${uri}'"
fi
_git remote set-url origin "'${uri}'"
printf "Fetching all references\n"
_git fetch origin
_git fetch origin -t -f
# Try to get the special refs exposed by some forges (pull-requests for
# github, changes for gerrit...). There is no easy way to know whether
# the cset the user passed us is such a special ref or a tag or a sha1
# or whatever else. We'll eventually fail at checking out that cset,
# below, if there is an issue anyway. Since most of the cset we're gonna
# have to clone are not such special refs, consign the output to oblivion
# so as not to alarm unsuspecting users, but still trace it as a warning.
if ! _git fetch origin "'${cset}:${cset}'" >/dev/null 2>&1; then
printf "Could not fetch special ref '%s'; assuming it is not special.\n" "${cset}"
fi
# Check that the changeset does exist. If it does not, re-cloning from
# scratch won't help, so we don't want to trash the repository for a
# missing commit. We just exit without going through the ERR trap.
if ! _git rev-parse --quiet --verify "'${cset}^{commit}'" >/dev/null 2>&1; then
printf "Commit '%s' does not exist in this repository.\n" "${cset}"
exit 1
fi
# The new cset we want to checkout might have different submodules, or
# have sub-dirs converted to/from a submodule. So we would need to
# deregister _current_ submodules before we checkout.
#
# Using "git submodule deinit --all" would remove all the files for
# all submodules, including the corresponding .git files or directories.
# However, it was only introduced with git-1.8.3, which is too recent
# for some enterprise-grade distros.
#
# So, we fall-back to just removing all submodules directories. We do
# not need to be recursive, as removing a submodule will de-facto remove
# its own submodules.
#
# For recent git versions, the repository for submodules is stored
# inside the repository of the super repository, so the following will
# only remove the working copies of submodules, effectively caching the
# submodules.
#
# For older versions however, the repository is stored in the .git/ of
# the submodule directory, so the following will effectively remove the
# the working copy as well as the repository, which means submodules
# will not be cached for older versions.
#
# shellcheck disable=SC2016 # Will be expanded by git-foreach
cmd='printf "Deregistering submodule \"%s\"\n" "${path}" && cd .. && rm -rf "${path##*/}"'
_git submodule --quiet foreach "'${cmd}'"
# Checkout the required changeset, so that we can update the required
# submodules.
_git checkout -f -q "'${cset}'"
# Get rid of now-untracked directories (in case a git operation was
# interrupted in a previous run, or to get rid of empty directories
# that were parents of submodules removed above).
_git clean -ffdx
# Get date of commit to generate a reproducible archive.
# %ci is ISO 8601, so it's fully qualified, with TZ and all.
date="$( _plain_git log -1 --pretty=format:%ci )"
# There might be submodules, so fetch them.
if [ ${recurse} -eq 1 ]; then
_git submodule update --init --recursive
# Older versions of git will store the absolute path of the git tree
# in the .git of submodules, while newer versions just use relative
# paths. Detect and fix the older variants to use relative paths, so
# that the archives are reproducible across a wider range of git
# versions. However, we can't do that if git is too old and uses
# full repositories for submodules.
# shellcheck disable=SC2016 # Will be expanded by git-foreach
cmd='printf "%s\n" "${path}/"'
for module_dir in $( _plain_git submodule --quiet foreach "'${cmd}'" ); do
[ -f "${module_dir}/.git" ] || continue
relative_dir="$( sed -r -e 's,/+,/,g; s,[^/]+/,../,g' <<<"${module_dir}" )"
sed -r -i -e "s:^gitdir\: $(pwd)/:gitdir\: ${relative_dir}:" "${module_dir}/.git"
done
fi
# If there are large files then fetch them.
if [ ${large_file} -eq 1 ]; then
_git lfs install --local
_git lfs fetch
_git lfs checkout
# If there are also submodules, recurse into them,
# shellcheck disable=SC2086 # We want word-splitting for GIT
if [ ${recurse} -eq 1 ]; then
_git submodule foreach --recursive ${GIT} lfs install --local
_git submodule foreach --recursive ${GIT} lfs fetch
_git submodule foreach --recursive ${GIT} lfs checkout
fi
fi
# Find files that are affected by the export-subst git-attribute.
# There might be a .gitattribute at the root of the repository, as well
# as in any arbitrary sub-directory, whether from the master repository
# or a submodule.
# "git check-attr -z" outputs results using \0 as separator for everything,
# so there is no difference between field or records (but there is a
# trailing \0):
# path_1\0attr_name\0attr_state\0path_2\0attr_name\0attr_state\0....
mapfile -d "" files < <(
set -o pipefail # Constrained to this sub-shell
find . -print0 \
|_plain_git check-attr --stdin -z export-subst \
|(i=0
while read -r -d "" val; do
case "$((i++%3))" in
(0) path="${val}";;
(1) ;; # Attribute name, always "export-subst", as requested
(2)
if [ "${val}" = "set" ]; then
printf "%s\0" "${path}"
fi;;
esac
done
)
)
# Replace format hints in those files. Always use the master repository
# as the source of the git metadata, even for files found in submodules
# as this is the most practical: there is no way to chdir() in (g)awk,
# and recomputing GIT_DIR for each submodule would really be tedious...
# There might be any arbitrary number of hints on each line, so iterate
# over those one by one.
for f in "${files[@]}"; do
TZ=UTC \
LC_ALL=C \
GIT_DIR="${git_cache}/.git" \
awk -v GIT="${GIT}" '
{
l = $(0);
while( (i = match(l, /\$Format:[^\$]+\$/)) > 0 ) {
len = RLENGTH;
printf("%s", substr(l, 1, i-1) );
fmt = substr(l, i, RLENGTH);
pretty = substr(fmt, 9, length(fmt)-9);
cmd = GIT " -c core.abbrev=40 log -s -n1 --pretty=format:'\''" pretty "'\''";
while ( (cmd | getline replace) > 0) {
printf("%s", replace);
}
ret = close(cmd);
if (ret != 0) {
printf("%s:%d: error while executing command \"%s\"\n", FILENAME, NR, cmd) > "/dev/stderr";
exit 1;
}
l = substr(l, i+len);
}
printf("%s\n", l);
}
' "${f}" >"${f}.br-temp"
mv -f "${f}.br-temp" "${f}"
done
popd >/dev/null
# Generate the archive.
# We do not want the .git dir; we keep other .git files, in case they are the
# only files in their directory.
# The .git dir would generate non reproducible tarballs as it depends on
# the state of the remote server. It also would generate large tarballs
# (gigabytes for some linux trees) when a full clone took place.
mk_tar_gz "${git_cache}" "${basename}" "${date}" "${output}" ".git/*"