| #!/usr/bin/env bash |
| |
| # NOTE: if the output of this backend has to change (e.g. we change what gets |
| # included in the archive (e.g. LFS), or we change the format of the archive |
| # (e.g. tar options, compression ratio or method)), we MUST update the format |
| # version in the variable BR_FMT_VERSION_git, in package/pkg-download.mk. |
| |
| # We want to catch any unexpected failure |
| set -e |
| |
| # Download helper for git, to be called from the download wrapper script |
| # |
| # Options: |
| # -q Be quiet. |
| # -r Clone and archive sub-modules. |
| # -o FILE Generate archive in FILE. |
| # -u URI Clone from repository at URI. |
| # -c CSET Use changeset CSET. |
| # -n NAME Use basename NAME. |
| # |
| # Environment: |
| # GIT : the git command to call |
| |
| # shellcheck disable=SC1090 # Only provides mk_tar_gz() |
| . "${0%/*}/helpers" |
| |
| # Save our path and options in case we need to call ourselves again |
| myname="${0}" |
| declare -a OPTS=("${@}") |
| |
| # This function is called when an error occurs. Its job is to attempt a |
| # clone from scratch (only once!) in case the git tree is borked, or in |
| # case an unexpected and unsupported situation arises with submodules |
| # or uncommitted stuff (e.g. if the user manually mucked around in the |
| # git cache). |
| _on_error() { |
| local ret=${?} |
| |
| printf "Detected a corrupted git cache.\n" >&2 |
| if ${BR_GIT_BACKEND_FIRST_FAULT:-false}; then |
| printf "This is the second time in a row; bailing out\n" >&2 |
| exit ${ret} |
| fi |
| export BR_GIT_BACKEND_FIRST_FAULT=true |
| |
| printf "Removing it and starting afresh.\n" >&2 |
| |
| popd >/dev/null |
| rm -rf "${git_cache}" |
| |
| exec "${myname}" "${OPTS[@]}" || exit ${ret} |
| } |
| |
| quiet= |
| large_file=0 |
| recurse=0 |
| while getopts "${BR_BACKEND_DL_GETOPTS}" OPT; do |
| case "${OPT}" in |
| q) quiet=-q; exec >/dev/null;; |
| l) large_file=1;; |
| r) recurse=1;; |
| o) output="${OPTARG}";; |
| u) uri="${OPTARG}";; |
| c) cset="${OPTARG}";; |
| d) dl_dir="${OPTARG}";; |
| n) basename="${OPTARG}";; |
| :) printf "option '%s' expects a mandatory argument\n" "${OPTARG}"; exit 1;; |
| \?) printf "unknown option '%s'\n" "${OPTARG}" >&2; exit 1;; |
| esac |
| done |
| |
| shift $((OPTIND-1)) # Get rid of our options |
| |
| # Create and cd into the directory that will contain the local git cache |
| git_cache="${dl_dir}/git" |
| mkdir -p "${git_cache}" |
| pushd "${git_cache}" >/dev/null |
| |
| # Any error now should try to recover |
| trap _on_error ERR |
| set -E |
| |
| # Caller needs to single-quote its arguments to prevent them from |
| # being expanded a second time (in case there are spaces in them) |
| _git() { |
| if [ -z "${quiet}" ]; then |
| printf '%s ' GIT_DIR="${git_cache}/.git" "${GIT}" "${@}"; printf '\n' |
| fi |
| _plain_git "$@" |
| } |
| # Note: please keep command below aligned with what is printed above |
| _plain_git() { |
| # shellcheck disable=SC2086 # We want word-splitting for GIT |
| eval GIT_DIR="${git_cache}/.git" ${GIT} "${@}" |
| } |
| |
| # Create a warning file, that the user should not use the git cache. |
| # It's ours. Our precious. |
| cat <<-_EOF_ >"${dl_dir}/git.readme" |
| IMPORTANT NOTE! |
| |
| The git tree located in this directory is for the exclusive use |
| by Buildroot, which uses it as a local cache to reduce bandwidth |
| usage. |
| |
| Buildroot *will* trash any changes in that tree whenever it needs |
| to use it. Buildroot may even remove it in case it detects the |
| repository may have been damaged or corrupted. |
| |
| Do *not* work in that directory; your changes will eventually get |
| lost. Do *not* even use it as a remote, or as the source for new |
| worktrees; your commits will eventually get lost. |
| _EOF_ |
| |
| # Initialise a repository in the git cache. If the repository already |
| # existed, this is a noop, unless the repository was broken, in which |
| # case this magically restores it to working conditions. In the latter |
| # case, we might be missing blobs, but that's not a problem: we'll |
| # fetch what we need later anyway. |
| # |
| # We can still go through the wrapper, because 'init' does not use the |
| # path pointed to by GIT_DIR, but really uses the directory passed as |
| # argument. |
| _git init . |
| |
| # Ensure the repo has an origin (in case a previous run was killed). |
| if ! _plain_git remote |grep -q -E '^origin$'; then |
| _git remote add origin "'${uri}'" |
| fi |
| |
| _git remote set-url origin "'${uri}'" |
| |
| printf "Fetching all references\n" |
| _git fetch origin |
| _git fetch origin -t -f |
| |
| # Try to get the special refs exposed by some forges (pull-requests for |
| # github, changes for gerrit...). There is no easy way to know whether |
| # the cset the user passed us is such a special ref or a tag or a sha1 |
| # or whatever else. We'll eventually fail at checking out that cset, |
| # below, if there is an issue anyway. Since most of the cset we're gonna |
| # have to clone are not such special refs, consign the output to oblivion |
| # so as not to alarm unsuspecting users, but still trace it as a warning. |
| if ! _git fetch origin "'${cset}:${cset}'" >/dev/null 2>&1; then |
| printf "Could not fetch special ref '%s'; assuming it is not special.\n" "${cset}" |
| fi |
| |
| # Check that the changeset does exist. If it does not, re-cloning from |
| # scratch won't help, so we don't want to trash the repository for a |
| # missing commit. We just exit without going through the ERR trap. |
| if ! _git rev-parse --quiet --verify "'${cset}^{commit}'" >/dev/null 2>&1; then |
| printf "Commit '%s' does not exist in this repository.\n" "${cset}" |
| exit 1 |
| fi |
| |
| # The new cset we want to checkout might have different submodules, or |
| # have sub-dirs converted to/from a submodule. So we would need to |
| # deregister _current_ submodules before we checkout. |
| # |
| # Using "git submodule deinit --all" would remove all the files for |
| # all submodules, including the corresponding .git files or directories. |
| # However, it was only introduced with git-1.8.3, which is too recent |
| # for some enterprise-grade distros. |
| # |
| # So, we fall-back to just removing all submodules directories. We do |
| # not need to be recursive, as removing a submodule will de-facto remove |
| # its own submodules. |
| # |
| # For recent git versions, the repository for submodules is stored |
| # inside the repository of the super repository, so the following will |
| # only remove the working copies of submodules, effectively caching the |
| # submodules. |
| # |
| # For older versions however, the repository is stored in the .git/ of |
| # the submodule directory, so the following will effectively remove the |
| # the working copy as well as the repository, which means submodules |
| # will not be cached for older versions. |
| # |
| # shellcheck disable=SC2016 # Will be expanded by git-foreach |
| cmd='printf "Deregistering submodule \"%s\"\n" "${path}" && cd .. && rm -rf "${path##*/}"' |
| _git submodule --quiet foreach "'${cmd}'" |
| |
| # Checkout the required changeset, so that we can update the required |
| # submodules. |
| _git checkout -f -q "'${cset}'" |
| |
| # Get rid of now-untracked directories (in case a git operation was |
| # interrupted in a previous run, or to get rid of empty directories |
| # that were parents of submodules removed above). |
| _git clean -ffdx |
| |
| # Get date of commit to generate a reproducible archive. |
| # %ci is ISO 8601, so it's fully qualified, with TZ and all. |
| date="$( _plain_git log -1 --pretty=format:%ci )" |
| |
| # There might be submodules, so fetch them. |
| if [ ${recurse} -eq 1 ]; then |
| _git submodule update --init --recursive |
| |
| # Older versions of git will store the absolute path of the git tree |
| # in the .git of submodules, while newer versions just use relative |
| # paths. Detect and fix the older variants to use relative paths, so |
| # that the archives are reproducible across a wider range of git |
| # versions. However, we can't do that if git is too old and uses |
| # full repositories for submodules. |
| # shellcheck disable=SC2016 # Will be expanded by git-foreach |
| cmd='printf "%s\n" "${path}/"' |
| for module_dir in $( _plain_git submodule --quiet foreach "'${cmd}'" ); do |
| [ -f "${module_dir}/.git" ] || continue |
| relative_dir="$( sed -r -e 's,/+,/,g; s,[^/]+/,../,g' <<<"${module_dir}" )" |
| sed -r -i -e "s:^gitdir\: $(pwd)/:gitdir\: ${relative_dir}:" "${module_dir}/.git" |
| done |
| fi |
| |
| # If there are large files then fetch them. |
| if [ ${large_file} -eq 1 ]; then |
| _git lfs install --local |
| _git lfs fetch |
| _git lfs checkout |
| # If there are also submodules, recurse into them, |
| # shellcheck disable=SC2086 # We want word-splitting for GIT |
| if [ ${recurse} -eq 1 ]; then |
| _git submodule foreach --recursive ${GIT} lfs install --local |
| _git submodule foreach --recursive ${GIT} lfs fetch |
| _git submodule foreach --recursive ${GIT} lfs checkout |
| fi |
| fi |
| |
| # Find files that are affected by the export-subst git-attribute. |
| # There might be a .gitattribute at the root of the repository, as well |
| # as in any arbitrary sub-directory, whether from the master repository |
| # or a submodule. |
| # "git check-attr -z" outputs results using \0 as separator for everything, |
| # so there is no difference between field or records (but there is a |
| # trailing \0): |
| # path_1\0attr_name\0attr_state\0path_2\0attr_name\0attr_state\0.... |
| mapfile -d "" files < <( |
| set -o pipefail # Constrained to this sub-shell |
| find . -print0 \ |
| |_plain_git check-attr --stdin -z export-subst \ |
| |(i=0 |
| while read -r -d "" val; do |
| case "$((i++%3))" in |
| (0) path="${val}";; |
| (1) ;; # Attribute name, always "export-subst", as requested |
| (2) |
| if [ "${val}" = "set" ]; then |
| printf "%s\0" "${path}" |
| fi;; |
| esac |
| done |
| ) |
| ) |
| # Replace format hints in those files. Always use the master repository |
| # as the source of the git metadata, even for files found in submodules |
| # as this is the most practical: there is no way to chdir() in (g)awk, |
| # and recomputing GIT_DIR for each submodule would really be tedious... |
| # There might be any arbitrary number of hints on each line, so iterate |
| # over those one by one. |
| for f in "${files[@]}"; do |
| TZ=UTC \ |
| LC_ALL=C \ |
| GIT_DIR="${git_cache}/.git" \ |
| awk -v GIT="${GIT}" ' |
| { |
| l = $(0); |
| while( (i = match(l, /\$Format:[^\$]+\$/)) > 0 ) { |
| len = RLENGTH; |
| printf("%s", substr(l, 1, i-1) ); |
| fmt = substr(l, i, RLENGTH); |
| pretty = substr(fmt, 9, length(fmt)-9); |
| cmd = GIT " -c core.abbrev=40 log -s -n1 --pretty=format:'\''" pretty "'\''"; |
| while ( (cmd | getline replace) > 0) { |
| printf("%s", replace); |
| } |
| ret = close(cmd); |
| if (ret != 0) { |
| printf("%s:%d: error while executing command \"%s\"\n", FILENAME, NR, cmd) > "/dev/stderr"; |
| exit 1; |
| } |
| l = substr(l, i+len); |
| } |
| printf("%s\n", l); |
| } |
| ' "${f}" >"${f}.br-temp" |
| mv -f "${f}.br-temp" "${f}" |
| done |
| |
| popd >/dev/null |
| |
| # Generate the archive. |
| # We do not want the .git dir; we keep other .git files, in case they are the |
| # only files in their directory. |
| # The .git dir would generate non reproducible tarballs as it depends on |
| # the state of the remote server. It also would generate large tarballs |
| # (gigabytes for some linux trees) when a full clone took place. |
| mk_tar_gz "${git_cache}" "${basename}" "${date}" "${output}" ".git/*" |