support/download/git - buildroot - Git at Google

 #!/usr/bin/env bash

 # We want to catch any unexpected failure, and exit immediately
 set -E

 # Download helper for git, to be called from the download wrapper script
 #
 # Options:
 #   -q          Be quiet.
 #   -r          Clone and archive sub-modules.
 #   -o FILE     Generate archive in FILE.
 #   -u URI      Clone from repository at URI.
 #   -c CSET     Use changeset CSET.
 #   -n NAME     Use basename NAME.
 #
 # Environment:
 #   GIT      : the git command to call

 # Save our path and options in case we need to call ourselves again
 myname="${0}"
 declare -a OPTS=("${@}")

 # This function is called when an error occurs. Its job is to attempt a
 # clone from scratch (only once!) in case the git tree is borked, or in
 # case an unexpected and unsupported situation arises with submodules
 # or uncommitted stuff (e.g. if the user manually mucked around in the
 # git cache).
 _on_error() {
     local ret=${?}

     printf "Detected a corrupted git cache.\n" >&2
     if ${BR_GIT_BACKEND_FIRST_FAULT:-false}; then
         printf "This is the second time in a row; bailing out\n" >&2
         exit ${ret}
     fi
     export BR_GIT_BACKEND_FIRST_FAULT=true

     printf "Removing it and starting afresh.\n" >&2

     popd >/dev/null
     rm -rf "${git_cache}"

     exec "${myname}" "${OPTS[@]}" || exit ${ret}
 }

 verbose=
 recurse=0
 while getopts "${BR_BACKEND_DL_GETOPTS}" OPT; do
     case "${OPT}" in
     q)  verbose=-q; exec >/dev/null;;
     r)  recurse=1;;
     o)  output="${OPTARG}";;
     u)  uri="${OPTARG}";;
     c)  cset="${OPTARG}";;
     d)  dl_dir="${OPTARG}";;
     n)  basename="${OPTARG}";;
     :)  printf "option '%s' expects a mandatory argument\n" "${OPTARG}"; exit 1;;
     \?) printf "unknown option '%s'\n" "${OPTARG}" >&2; exit 1;;
     esac
 done

 shift $((OPTIND-1)) # Get rid of our options

 # Create and cd into the directory that will contain the local git cache
 git_cache="${dl_dir}/git"
 mkdir -p "${git_cache}"
 pushd "${git_cache}" >/dev/null

 # Any error now should try to recover
 trap _on_error ERR

 # Caller needs to single-quote its arguments to prevent them from
 # being expanded a second time (in case there are spaces in them)
 _git() {
     eval GIT_DIR="${git_cache}/.git" ${GIT} "${@}"
 }

 # Create a warning file, that the user should not use the git cache.
 # It's ours. Our precious.
 cat <<-_EOF_ >"${dl_dir}/git.readme"
 	IMPORTANT NOTE!

 	The git tree located in this directory is for the exclusive use
 	by Buildroot, which uses it as a local cache to reduce bandwidth
 	usage.

 	Buildroot *will* trash any changes in that tree whenever it needs
 	to use it. Buildroot may even remove it in case it detects the
 	repository may have been damaged or corrupted.

 	Do *not* work in that directory; your changes will eventually get
 	lost. Do *not* even use it as a remote, or as the source for new
 	worktrees; your commits will eventually get lost.
 _EOF_

 # Initialise a repository in the git cache. If the repository already
 # existed, this is a noop, unless the repository was broken, in which
 # case this magically restores it to working conditions. In the latter
 # case, we might be missing blobs, but that's not a problem: we'll
 # fetch what we need later anyway.
 #
 # We can still go through the wrapper, because 'init' does not use the
 # path pointed to by GIT_DIR, but really uses the directory passed as
 # argument.
 _git init .

 # Ensure the repo has an origin (in case a previous run was killed).
 if ! _git remote |grep -q -E '^origin$'; then
     _git remote add origin "'${uri}'"
 fi

 _git remote set-url origin "'${uri}'"

 printf "Fetching all references\n"
 _git fetch origin
 _git fetch origin -t

 # Try to get the special refs exposed by some forges (pull-requests for
 # github, changes for gerrit...). There is no easy way to know whether
 # the cset the user passed us is such a special ref or a tag or a sha1
 # or whatever else. We'll eventually fail at checking out that cset,
 # below, if there is an issue anyway. Since most of the cset we're gonna
 # have to clone are not such special refs, consign the output to oblivion
 # so as not to alarm unsuspecting users, but still trace it as a warning.
 if ! _git fetch origin "'${cset}:${cset}'" >/dev/null 2>&1; then
     printf "Could not fetch special ref '%s'; assuming it is not special.\n" "${cset}"
 fi

 # Check that the changeset does exist. If it does not, re-cloning from
 # scratch won't help, so we don't want to trash the repository for a
 # missing commit. We just exit without going through the ERR trap.
 if ! _git rev-parse --quiet --verify "'${cset}^{commit}'" >/dev/null 2>&1; then
     printf "Commit '%s' does not exist in this repository.\n" "${cset}"
     exit 1
 fi

 # The new cset we want to checkout might have different submodules, or
 # have sub-dirs converted to/from a submodule. So we would need to
 # deregister _current_ submodules before we checkout.
 #
 # Using "git submodule deinit --all" would remove all the files for
 # all submodules, including the corresponding .git files or directories.
 # However, it  was only introduced with git-1.8.3, which is too recent
 # for some enterprise-grade distros.
 #
 # So, we fall-back to just removing all submodules directories. We do
 # not need to be recursive, as removing a submodule will de-facto remove
 # its own submodules.
 #
 # For recent git versions, the repository for submodules is stored
 # inside the repository of the super repository, so the following will
 # only remove the working copies of submodules, effectively caching the
 # submodules.
 #
 # For older versions however, the repository is stored in the .git/ of
 # the submodule directory, so the following will effectively remove the
 # the working copy as well as the repository, which means submodules
 # will not be cached for older versions.
 #
 cmd='printf "Deregistering submodule \"%s\"\n" "${path}" && cd .. && rm -rf "${path##*/}"'
 _git submodule --quiet foreach "'${cmd}'"

 # Checkout the required changeset, so that we can update the required
 # submodules.
 _git checkout -f -q "'${cset}'"

 # Get rid of now-untracked directories (in case a git operation was
 # interrupted in a previous run, or to get rid of empty directories
 # that were parents of submodules removed above).
 _git clean -ffdx

 # Get date of commit to generate a reproducible archive.
 # %cD is RFC2822, so it's fully qualified, with TZ and all.
 date="$( _git log -1 --pretty=format:%cD )"

 # There might be submodules, so fetch them.
 if [ ${recurse} -eq 1 ]; then
     _git submodule update --init --recursive

     # Older versions of git will store the absolute path of the git tree
     # in the .git of submodules, while newer versions just use relative
     # paths. Detect and fix the older variants to use relative paths, so
     # that the archives are reproducible across a wider range of git
     # versions. However, we can't do that if git is too old and uses
     # full repositories for submodules.
     cmd='printf "%s\n" "${path}/"'
     for module_dir in $( _git submodule --quiet foreach "'${cmd}'" ); do
         [ -f "${module_dir}/.git" ] || continue
         relative_dir="$( sed -r -e 's,/+,/,g; s,[^/]+/,../,g' <<<"${module_dir}" )"
         sed -r -i -e "s:^gitdir\: $(pwd)/:gitdir\: "${relative_dir}":" "${module_dir}/.git"
     done
 fi

 # Generate the archive, sort with the C locale so that it is reproducible.
 # We do not want the .git dir; we keep other .git files, in case they are the
 # only files in their directory.
 # The .git dir would generate non reproducible tarballs as it depends on
 # the state of the remote server. It also would generate large tarballs
 # (gigabytes for some linux trees) when a full clone took place.
 find . -not -type d \
        -and -not -path "./.git/*" >"${output}.list"
 LC_ALL=C sort <"${output}.list" >"${output}.list.sorted"

 # Create GNU-format tarballs, since that's the format of the tarballs on
 # sources.buildroot.org and used in the *.hash files
 tar cf - --transform="s#^\./#${basename}/#" \
          --numeric-owner --owner=0 --group=0 --mtime="${date}" --format=gnu \
          -T "${output}.list.sorted" >"${output}.tar"
 gzip -6 -n <"${output}.tar" >"${output}"

 rm -f "${output}.list"
 rm -f "${output}.list.sorted"

 popd >/dev/null
	#!/usr/bin/env bash

	# We want to catch any unexpected failure, and exit immediately
	set -E

	# Download helper for git, to be called from the download wrapper script
	#
	# Options:
	# -q Be quiet.
	# -r Clone and archive sub-modules.
	# -o FILE Generate archive in FILE.
	# -u URI Clone from repository at URI.
	# -c CSET Use changeset CSET.
	# -n NAME Use basename NAME.
	#
	# Environment:
	# GIT : the git command to call

	# Save our path and options in case we need to call ourselves again
	myname="${0}"
	declare -a OPTS=("${@}")

	# This function is called when an error occurs. Its job is to attempt a
	# clone from scratch (only once!) in case the git tree is borked, or in
	# case an unexpected and unsupported situation arises with submodules
	# or uncommitted stuff (e.g. if the user manually mucked around in the
	# git cache).
	_on_error() {
	local ret=${?}

	printf "Detected a corrupted git cache.\n" >&2
	if ${BR_GIT_BACKEND_FIRST_FAULT:-false}; then
	printf "This is the second time in a row; bailing out\n" >&2
	exit ${ret}
	fi
	export BR_GIT_BACKEND_FIRST_FAULT=true

	printf "Removing it and starting afresh.\n" >&2

	popd >/dev/null
	rm -rf "${git_cache}"

	exec "${myname}" "${OPTS[@]}" \|\| exit ${ret}
	}

	verbose=
	recurse=0
	while getopts "${BR_BACKEND_DL_GETOPTS}" OPT; do
	case "${OPT}" in
	q) verbose=-q; exec >/dev/null;;
	r) recurse=1;;
	o) output="${OPTARG}";;
	u) uri="${OPTARG}";;
	c) cset="${OPTARG}";;
	d) dl_dir="${OPTARG}";;
	n) basename="${OPTARG}";;
	:) printf "option '%s' expects a mandatory argument\n" "${OPTARG}"; exit 1;;
	\?) printf "unknown option '%s'\n" "${OPTARG}" >&2; exit 1;;
	esac
	done

	shift $((OPTIND-1)) # Get rid of our options

	# Create and cd into the directory that will contain the local git cache
	git_cache="${dl_dir}/git"
	mkdir -p "${git_cache}"
	pushd "${git_cache}" >/dev/null

	# Any error now should try to recover
	trap _on_error ERR

	# Caller needs to single-quote its arguments to prevent them from
	# being expanded a second time (in case there are spaces in them)
	_git() {
	eval GIT_DIR="${git_cache}/.git" ${GIT} "${@}"
	}

	# Create a warning file, that the user should not use the git cache.
	# It's ours. Our precious.
	cat <<-_EOF_ >"${dl_dir}/git.readme"
	IMPORTANT NOTE!

	The git tree located in this directory is for the exclusive use
	by Buildroot, which uses it as a local cache to reduce bandwidth
	usage.

	Buildroot will trash any changes in that tree whenever it needs
	to use it. Buildroot may even remove it in case it detects the
	repository may have been damaged or corrupted.

	Do not work in that directory; your changes will eventually get
	lost. Do not even use it as a remote, or as the source for new
	worktrees; your commits will eventually get lost.
	_EOF_

	# Initialise a repository in the git cache. If the repository already
	# existed, this is a noop, unless the repository was broken, in which
	# case this magically restores it to working conditions. In the latter
	# case, we might be missing blobs, but that's not a problem: we'll
	# fetch what we need later anyway.
	#
	# We can still go through the wrapper, because 'init' does not use the
	# path pointed to by GIT_DIR, but really uses the directory passed as
	# argument.
	_git init .

	# Ensure the repo has an origin (in case a previous run was killed).
	if ! _git remote \|grep -q -E '^origin$'; then
	_git remote add origin "'${uri}'"
	fi

	_git remote set-url origin "'${uri}'"

	printf "Fetching all references\n"
	_git fetch origin
	_git fetch origin -t

	# Try to get the special refs exposed by some forges (pull-requests for
	# github, changes for gerrit...). There is no easy way to know whether
	# the cset the user passed us is such a special ref or a tag or a sha1
	# or whatever else. We'll eventually fail at checking out that cset,
	# below, if there is an issue anyway. Since most of the cset we're gonna
	# have to clone are not such special refs, consign the output to oblivion
	# so as not to alarm unsuspecting users, but still trace it as a warning.
	if ! _git fetch origin "'${cset}:${cset}'" >/dev/null 2>&1; then
	printf "Could not fetch special ref '%s'; assuming it is not special.\n" "${cset}"
	fi

	# Check that the changeset does exist. If it does not, re-cloning from
	# scratch won't help, so we don't want to trash the repository for a
	# missing commit. We just exit without going through the ERR trap.
	if ! _git rev-parse --quiet --verify "'${cset}^{commit}'" >/dev/null 2>&1; then
	printf "Commit '%s' does not exist in this repository.\n" "${cset}"
	exit 1
	fi

	# The new cset we want to checkout might have different submodules, or
	# have sub-dirs converted to/from a submodule. So we would need to
	# deregister _current_ submodules before we checkout.
	#
	# Using "git submodule deinit --all" would remove all the files for
	# all submodules, including the corresponding .git files or directories.
	# However, it was only introduced with git-1.8.3, which is too recent
	# for some enterprise-grade distros.
	#
	# So, we fall-back to just removing all submodules directories. We do
	# not need to be recursive, as removing a submodule will de-facto remove
	# its own submodules.
	#
	# For recent git versions, the repository for submodules is stored
	# inside the repository of the super repository, so the following will
	# only remove the working copies of submodules, effectively caching the
	# submodules.
	#
	# For older versions however, the repository is stored in the .git/ of
	# the submodule directory, so the following will effectively remove the
	# the working copy as well as the repository, which means submodules
	# will not be cached for older versions.
	#
	cmd='printf "Deregistering submodule \"%s\"\n" "${path}" && cd .. && rm -rf "${path##*/}"'
	_git submodule --quiet foreach "'${cmd}'"

	# Checkout the required changeset, so that we can update the required
	# submodules.
	_git checkout -f -q "'${cset}'"

	# Get rid of now-untracked directories (in case a git operation was
	# interrupted in a previous run, or to get rid of empty directories
	# that were parents of submodules removed above).
	_git clean -ffdx

	# Get date of commit to generate a reproducible archive.
	# %cD is RFC2822, so it's fully qualified, with TZ and all.
	date="$( _git log -1 --pretty=format:%cD )"

	# There might be submodules, so fetch them.
	if [ ${recurse} -eq 1 ]; then
	_git submodule update --init --recursive

	# Older versions of git will store the absolute path of the git tree
	# in the .git of submodules, while newer versions just use relative
	# paths. Detect and fix the older variants to use relative paths, so
	# that the archives are reproducible across a wider range of git
	# versions. However, we can't do that if git is too old and uses
	# full repositories for submodules.
	cmd='printf "%s\n" "${path}/"'
	for module_dir in $( _git submodule --quiet foreach "'${cmd}'" ); do
	[ -f "${module_dir}/.git" ] \|\| continue
	relative_dir="$( sed -r -e 's,/+,/,g; s,[^/]+/,../,g' <<<"${module_dir}" )"
	sed -r -i -e "s:^gitdir\: $(pwd)/:gitdir\: "${relative_dir}":" "${module_dir}/.git"
	done
	fi

	# Generate the archive, sort with the C locale so that it is reproducible.
	# We do not want the .git dir; we keep other .git files, in case they are the
	# only files in their directory.
	# The .git dir would generate non reproducible tarballs as it depends on
	# the state of the remote server. It also would generate large tarballs
	# (gigabytes for some linux trees) when a full clone took place.
	find . -not -type d \
	-and -not -path "./.git/*" >"${output}.list"
	LC_ALL=C sort <"${output}.list" >"${output}.list.sorted"

	# Create GNU-format tarballs, since that's the format of the tarballs on
	# sources.buildroot.org and used in the *.hash files
	tar cf - --transform="s#^\./#${basename}/#" \
	--numeric-owner --owner=0 --group=0 --mtime="${date}" --format=gnu \
	-T "${output}.list.sorted" >"${output}.tar"
	gzip -6 -n <"${output}.tar" >"${output}"

	rm -f "${output}.list"
	rm -f "${output}.list.sorted"

	popd >/dev/null