| // SPDX-License-Identifier: GPL-2.0-only |
| // |
| // Traverse the source tree, parsing all .gitignore files, and print file paths |
| // that are ignored by git. |
| // The output is suitable to the --exclude-from option of tar. |
| // This is useful until the --exclude-vcs-ignores option gets working correctly. |
| // |
| // Copyright (C) 2023 Masahiro Yamada <masahiroy@kernel.org> |
| // (a lot of code imported from GIT) |
| |
| #include <assert.h> |
| #include <dirent.h> |
| #include <errno.h> |
| #include <fcntl.h> |
| #include <getopt.h> |
| #include <stdarg.h> |
| #include <stdbool.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <sys/stat.h> |
| #include <sys/types.h> |
| #include <unistd.h> |
| |
| // Imported from commit 23c56f7bd5f1667f8b793d796bf30e39545920f6 in GIT |
| // |
| //---------------------------(IMPORT FROM GIT BEGIN)--------------------------- |
| |
| // Copied from environment.c |
| |
| static bool ignore_case; |
| |
| // Copied from git-compat-util.h |
| |
| /* Sane ctype - no locale, and works with signed chars */ |
| #undef isascii |
| #undef isspace |
| #undef isdigit |
| #undef isalpha |
| #undef isalnum |
| #undef isprint |
| #undef islower |
| #undef isupper |
| #undef tolower |
| #undef toupper |
| #undef iscntrl |
| #undef ispunct |
| #undef isxdigit |
| |
| static const unsigned char sane_ctype[256]; |
| #define GIT_SPACE 0x01 |
| #define GIT_DIGIT 0x02 |
| #define GIT_ALPHA 0x04 |
| #define GIT_GLOB_SPECIAL 0x08 |
| #define GIT_REGEX_SPECIAL 0x10 |
| #define GIT_PATHSPEC_MAGIC 0x20 |
| #define GIT_CNTRL 0x40 |
| #define GIT_PUNCT 0x80 |
| #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) |
| #define isascii(x) (((x) & ~0x7f) == 0) |
| #define isspace(x) sane_istest(x,GIT_SPACE) |
| #define isdigit(x) sane_istest(x,GIT_DIGIT) |
| #define isalpha(x) sane_istest(x,GIT_ALPHA) |
| #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) |
| #define isprint(x) ((x) >= 0x20 && (x) <= 0x7e) |
| #define islower(x) sane_iscase(x, 1) |
| #define isupper(x) sane_iscase(x, 0) |
| #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL) |
| #define iscntrl(x) (sane_istest(x,GIT_CNTRL)) |
| #define ispunct(x) sane_istest(x, GIT_PUNCT | GIT_REGEX_SPECIAL | \ |
| GIT_GLOB_SPECIAL | GIT_PATHSPEC_MAGIC) |
| #define isxdigit(x) (hexval_table[(unsigned char)(x)] != -1) |
| #define tolower(x) sane_case((unsigned char)(x), 0x20) |
| #define toupper(x) sane_case((unsigned char)(x), 0) |
| |
| static inline int sane_case(int x, int high) |
| { |
| if (sane_istest(x, GIT_ALPHA)) |
| x = (x & ~0x20) | high; |
| return x; |
| } |
| |
| static inline int sane_iscase(int x, int is_lower) |
| { |
| if (!sane_istest(x, GIT_ALPHA)) |
| return 0; |
| |
| if (is_lower) |
| return (x & 0x20) != 0; |
| else |
| return (x & 0x20) == 0; |
| } |
| |
| // Copied from ctype.c |
| |
| enum { |
| S = GIT_SPACE, |
| A = GIT_ALPHA, |
| D = GIT_DIGIT, |
| G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */ |
| R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | */ |
| P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */ |
| X = GIT_CNTRL, |
| U = GIT_PUNCT, |
| Z = GIT_CNTRL | GIT_SPACE |
| }; |
| |
| static const unsigned char sane_ctype[256] = { |
| X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X, /* 0.. 15 */ |
| X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 16.. 31 */ |
| S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /* 32.. 47 */ |
| D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /* 48.. 63 */ |
| P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ |
| A, A, A, A, A, A, A, A, A, A, A, G, G, U, R, P, /* 80.. 95 */ |
| P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ |
| A, A, A, A, A, A, A, A, A, A, A, R, R, U, P, X, /* 112..127 */ |
| /* Nothing in the 128.. range */ |
| }; |
| |
| // Copied from hex.c |
| |
| static const signed char hexval_table[256] = { |
| -1, -1, -1, -1, -1, -1, -1, -1, /* 00-07 */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* 08-0f */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* 10-17 */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* 18-1f */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* 20-27 */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* 28-2f */ |
| 0, 1, 2, 3, 4, 5, 6, 7, /* 30-37 */ |
| 8, 9, -1, -1, -1, -1, -1, -1, /* 38-3f */ |
| -1, 10, 11, 12, 13, 14, 15, -1, /* 40-47 */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* 48-4f */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* 50-57 */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* 58-5f */ |
| -1, 10, 11, 12, 13, 14, 15, -1, /* 60-67 */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* 68-67 */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* 70-77 */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* 78-7f */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* 80-87 */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* 88-8f */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* 90-97 */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* 98-9f */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* a0-a7 */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* a8-af */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* b0-b7 */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* b8-bf */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* c0-c7 */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* c8-cf */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* d0-d7 */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* d8-df */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* e0-e7 */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* e8-ef */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* f0-f7 */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* f8-ff */ |
| }; |
| |
| // Copied from wildmatch.h |
| |
| #define WM_CASEFOLD 1 |
| #define WM_PATHNAME 2 |
| |
| #define WM_NOMATCH 1 |
| #define WM_MATCH 0 |
| #define WM_ABORT_ALL -1 |
| #define WM_ABORT_TO_STARSTAR -2 |
| |
| // Copied from wildmatch.c |
| |
| typedef unsigned char uchar; |
| |
| // local modification: remove NEGATE_CLASS(2) |
| |
| #define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \ |
| && *(class) == *(litmatch) \ |
| && strncmp((char*)class, litmatch, len) == 0) |
| |
| // local modification: simpilify macros |
| #define ISBLANK(c) ((c) == ' ' || (c) == '\t') |
| #define ISGRAPH(c) (isprint(c) && !isspace(c)) |
| #define ISPRINT(c) isprint(c) |
| #define ISDIGIT(c) isdigit(c) |
| #define ISALNUM(c) isalnum(c) |
| #define ISALPHA(c) isalpha(c) |
| #define ISCNTRL(c) iscntrl(c) |
| #define ISLOWER(c) islower(c) |
| #define ISPUNCT(c) ispunct(c) |
| #define ISSPACE(c) isspace(c) |
| #define ISUPPER(c) isupper(c) |
| #define ISXDIGIT(c) isxdigit(c) |
| |
| /* Match pattern "p" against "text" */ |
| static int dowild(const uchar *p, const uchar *text, unsigned int flags) |
| { |
| uchar p_ch; |
| const uchar *pattern = p; |
| |
| for ( ; (p_ch = *p) != '\0'; text++, p++) { |
| int matched, match_slash, negated; |
| uchar t_ch, prev_ch; |
| if ((t_ch = *text) == '\0' && p_ch != '*') |
| return WM_ABORT_ALL; |
| if ((flags & WM_CASEFOLD) && ISUPPER(t_ch)) |
| t_ch = tolower(t_ch); |
| if ((flags & WM_CASEFOLD) && ISUPPER(p_ch)) |
| p_ch = tolower(p_ch); |
| switch (p_ch) { |
| case '\\': |
| /* Literal match with following character. Note that the test |
| * in "default" handles the p[1] == '\0' failure case. */ |
| p_ch = *++p; |
| /* FALLTHROUGH */ |
| default: |
| if (t_ch != p_ch) |
| return WM_NOMATCH; |
| continue; |
| case '?': |
| /* Match anything but '/'. */ |
| if ((flags & WM_PATHNAME) && t_ch == '/') |
| return WM_NOMATCH; |
| continue; |
| case '*': |
| if (*++p == '*') { |
| const uchar *prev_p = p - 2; |
| while (*++p == '*') {} |
| if (!(flags & WM_PATHNAME)) |
| /* without WM_PATHNAME, '*' == '**' */ |
| match_slash = 1; |
| else if ((prev_p < pattern || *prev_p == '/') && |
| (*p == '\0' || *p == '/' || |
| (p[0] == '\\' && p[1] == '/'))) { |
| /* |
| * Assuming we already match 'foo/' and are at |
| * <star star slash>, just assume it matches |
| * nothing and go ahead match the rest of the |
| * pattern with the remaining string. This |
| * helps make foo/<*><*>/bar (<> because |
| * otherwise it breaks C comment syntax) match |
| * both foo/bar and foo/a/bar. |
| */ |
| if (p[0] == '/' && |
| dowild(p + 1, text, flags) == WM_MATCH) |
| return WM_MATCH; |
| match_slash = 1; |
| } else /* WM_PATHNAME is set */ |
| match_slash = 0; |
| } else |
| /* without WM_PATHNAME, '*' == '**' */ |
| match_slash = flags & WM_PATHNAME ? 0 : 1; |
| if (*p == '\0') { |
| /* Trailing "**" matches everything. Trailing "*" matches |
| * only if there are no more slash characters. */ |
| if (!match_slash) { |
| if (strchr((char *)text, '/')) |
| return WM_NOMATCH; |
| } |
| return WM_MATCH; |
| } else if (!match_slash && *p == '/') { |
| /* |
| * _one_ asterisk followed by a slash |
| * with WM_PATHNAME matches the next |
| * directory |
| */ |
| const char *slash = strchr((char*)text, '/'); |
| if (!slash) |
| return WM_NOMATCH; |
| text = (const uchar*)slash; |
| /* the slash is consumed by the top-level for loop */ |
| break; |
| } |
| while (1) { |
| if (t_ch == '\0') |
| break; |
| /* |
| * Try to advance faster when an asterisk is |
| * followed by a literal. We know in this case |
| * that the string before the literal |
| * must belong to "*". |
| * If match_slash is false, do not look past |
| * the first slash as it cannot belong to '*'. |
| */ |
| if (!is_glob_special(*p)) { |
| p_ch = *p; |
| if ((flags & WM_CASEFOLD) && ISUPPER(p_ch)) |
| p_ch = tolower(p_ch); |
| while ((t_ch = *text) != '\0' && |
| (match_slash || t_ch != '/')) { |
| if ((flags & WM_CASEFOLD) && ISUPPER(t_ch)) |
| t_ch = tolower(t_ch); |
| if (t_ch == p_ch) |
| break; |
| text++; |
| } |
| if (t_ch != p_ch) |
| return WM_NOMATCH; |
| } |
| if ((matched = dowild(p, text, flags)) != WM_NOMATCH) { |
| if (!match_slash || matched != WM_ABORT_TO_STARSTAR) |
| return matched; |
| } else if (!match_slash && t_ch == '/') |
| return WM_ABORT_TO_STARSTAR; |
| t_ch = *++text; |
| } |
| return WM_ABORT_ALL; |
| case '[': |
| p_ch = *++p; |
| if (p_ch == '^') |
| p_ch = '!'; |
| /* Assign literal 1/0 because of "matched" comparison. */ |
| negated = p_ch == '!' ? 1 : 0; |
| if (negated) { |
| /* Inverted character class. */ |
| p_ch = *++p; |
| } |
| prev_ch = 0; |
| matched = 0; |
| do { |
| if (!p_ch) |
| return WM_ABORT_ALL; |
| if (p_ch == '\\') { |
| p_ch = *++p; |
| if (!p_ch) |
| return WM_ABORT_ALL; |
| if (t_ch == p_ch) |
| matched = 1; |
| } else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') { |
| p_ch = *++p; |
| if (p_ch == '\\') { |
| p_ch = *++p; |
| if (!p_ch) |
| return WM_ABORT_ALL; |
| } |
| if (t_ch <= p_ch && t_ch >= prev_ch) |
| matched = 1; |
| else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) { |
| uchar t_ch_upper = toupper(t_ch); |
| if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch) |
| matched = 1; |
| } |
| p_ch = 0; /* This makes "prev_ch" get set to 0. */ |
| } else if (p_ch == '[' && p[1] == ':') { |
| const uchar *s; |
| int i; |
| for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/ |
| if (!p_ch) |
| return WM_ABORT_ALL; |
| i = p - s - 1; |
| if (i < 0 || p[-1] != ':') { |
| /* Didn't find ":]", so treat like a normal set. */ |
| p = s - 2; |
| p_ch = '['; |
| if (t_ch == p_ch) |
| matched = 1; |
| continue; |
| } |
| if (CC_EQ(s,i, "alnum")) { |
| if (ISALNUM(t_ch)) |
| matched = 1; |
| } else if (CC_EQ(s,i, "alpha")) { |
| if (ISALPHA(t_ch)) |
| matched = 1; |
| } else if (CC_EQ(s,i, "blank")) { |
| if (ISBLANK(t_ch)) |
| matched = 1; |
| } else if (CC_EQ(s,i, "cntrl")) { |
| if (ISCNTRL(t_ch)) |
| matched = 1; |
| } else if (CC_EQ(s,i, "digit")) { |
| if (ISDIGIT(t_ch)) |
| matched = 1; |
| } else if (CC_EQ(s,i, "graph")) { |
| if (ISGRAPH(t_ch)) |
| matched = 1; |
| } else if (CC_EQ(s,i, "lower")) { |
| if (ISLOWER(t_ch)) |
| matched = 1; |
| } else if (CC_EQ(s,i, "print")) { |
| if (ISPRINT(t_ch)) |
| matched = 1; |
| } else if (CC_EQ(s,i, "punct")) { |
| if (ISPUNCT(t_ch)) |
| matched = 1; |
| } else if (CC_EQ(s,i, "space")) { |
| if (ISSPACE(t_ch)) |
| matched = 1; |
| } else if (CC_EQ(s,i, "upper")) { |
| if (ISUPPER(t_ch)) |
| matched = 1; |
| else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) |
| matched = 1; |
| } else if (CC_EQ(s,i, "xdigit")) { |
| if (ISXDIGIT(t_ch)) |
| matched = 1; |
| } else /* malformed [:class:] string */ |
| return WM_ABORT_ALL; |
| p_ch = 0; /* This makes "prev_ch" get set to 0. */ |
| } else if (t_ch == p_ch) |
| matched = 1; |
| } while (prev_ch = p_ch, (p_ch = *++p) != ']'); |
| if (matched == negated || |
| ((flags & WM_PATHNAME) && t_ch == '/')) |
| return WM_NOMATCH; |
| continue; |
| } |
| } |
| |
| return *text ? WM_NOMATCH : WM_MATCH; |
| } |
| |
| /* Match the "pattern" against the "text" string. */ |
| static int wildmatch(const char *pattern, const char *text, unsigned int flags) |
| { |
| // local modification: move WM_CASEFOLD here |
| if (ignore_case) |
| flags |= WM_CASEFOLD; |
| |
| return dowild((const uchar*)pattern, (const uchar*)text, flags); |
| } |
| |
| // Copied from dir.h |
| |
| #define PATTERN_FLAG_NODIR 1 |
| #define PATTERN_FLAG_ENDSWITH 4 |
| #define PATTERN_FLAG_MUSTBEDIR 8 |
| #define PATTERN_FLAG_NEGATIVE 16 |
| |
| // Copied from dir.c |
| |
| static int fspathncmp(const char *a, const char *b, size_t count) |
| { |
| return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count); |
| } |
| |
| static int simple_length(const char *match) |
| { |
| int len = -1; |
| |
| for (;;) { |
| unsigned char c = *match++; |
| len++; |
| if (c == '\0' || is_glob_special(c)) |
| return len; |
| } |
| } |
| |
| static int no_wildcard(const char *string) |
| { |
| return string[simple_length(string)] == '\0'; |
| } |
| |
| static void parse_path_pattern(const char **pattern, |
| int *patternlen, |
| unsigned *flags, |
| int *nowildcardlen) |
| { |
| const char *p = *pattern; |
| size_t i, len; |
| |
| *flags = 0; |
| if (*p == '!') { |
| *flags |= PATTERN_FLAG_NEGATIVE; |
| p++; |
| } |
| len = strlen(p); |
| if (len && p[len - 1] == '/') { |
| len--; |
| *flags |= PATTERN_FLAG_MUSTBEDIR; |
| } |
| for (i = 0; i < len; i++) { |
| if (p[i] == '/') |
| break; |
| } |
| if (i == len) |
| *flags |= PATTERN_FLAG_NODIR; |
| *nowildcardlen = simple_length(p); |
| /* |
| * we should have excluded the trailing slash from 'p' too, |
| * but that's one more allocation. Instead just make sure |
| * nowildcardlen does not exceed real patternlen |
| */ |
| if (*nowildcardlen > len) |
| *nowildcardlen = len; |
| if (*p == '*' && no_wildcard(p + 1)) |
| *flags |= PATTERN_FLAG_ENDSWITH; |
| *pattern = p; |
| *patternlen = len; |
| } |
| |
| static void trim_trailing_spaces(char *buf) |
| { |
| char *p, *last_space = NULL; |
| |
| for (p = buf; *p; p++) |
| switch (*p) { |
| case ' ': |
| if (!last_space) |
| last_space = p; |
| break; |
| case '\\': |
| p++; |
| if (!*p) |
| return; |
| /* fallthrough */ |
| default: |
| last_space = NULL; |
| } |
| |
| if (last_space) |
| *last_space = '\0'; |
| } |
| |
| static int match_basename(const char *basename, int basenamelen, |
| const char *pattern, int prefix, int patternlen, |
| unsigned flags) |
| { |
| if (prefix == patternlen) { |
| if (patternlen == basenamelen && |
| !fspathncmp(pattern, basename, basenamelen)) |
| return 1; |
| } else if (flags & PATTERN_FLAG_ENDSWITH) { |
| /* "*literal" matching against "fooliteral" */ |
| if (patternlen - 1 <= basenamelen && |
| !fspathncmp(pattern + 1, |
| basename + basenamelen - (patternlen - 1), |
| patternlen - 1)) |
| return 1; |
| } else { |
| // local modification: call wildmatch() directly |
| if (!wildmatch(pattern, basename, flags)) |
| return 1; |
| } |
| return 0; |
| } |
| |
| static int match_pathname(const char *pathname, int pathlen, |
| const char *base, int baselen, |
| const char *pattern, int prefix, int patternlen) |
| { |
| // local modification: remove local variables |
| |
| /* |
| * match with FNM_PATHNAME; the pattern has base implicitly |
| * in front of it. |
| */ |
| if (*pattern == '/') { |
| pattern++; |
| patternlen--; |
| prefix--; |
| } |
| |
| /* |
| * baselen does not count the trailing slash. base[] may or |
| * may not end with a trailing slash though. |
| */ |
| if (pathlen < baselen + 1 || |
| (baselen && pathname[baselen] != '/') || |
| fspathncmp(pathname, base, baselen)) |
| return 0; |
| |
| // local modification: simplified because always baselen > 0 |
| pathname += baselen + 1; |
| pathlen -= baselen + 1; |
| |
| if (prefix) { |
| /* |
| * if the non-wildcard part is longer than the |
| * remaining pathname, surely it cannot match. |
| */ |
| if (prefix > pathlen) |
| return 0; |
| |
| if (fspathncmp(pattern, pathname, prefix)) |
| return 0; |
| pattern += prefix; |
| patternlen -= prefix; |
| pathname += prefix; |
| pathlen -= prefix; |
| |
| /* |
| * If the whole pattern did not have a wildcard, |
| * then our prefix match is all we need; we |
| * do not need to call fnmatch at all. |
| */ |
| if (!patternlen && !pathlen) |
| return 1; |
| } |
| |
| // local modification: call wildmatch() directly |
| return !wildmatch(pattern, pathname, WM_PATHNAME); |
| } |
| |
| // Copied from git/utf8.c |
| |
| static const char utf8_bom[] = "\357\273\277"; |
| |
| //----------------------------(IMPORT FROM GIT END)---------------------------- |
| |
| struct pattern { |
| unsigned int flags; |
| int nowildcardlen; |
| int patternlen; |
| int dirlen; |
| char pattern[]; |
| }; |
| |
| static struct pattern **pattern_list; |
| static int nr_patterns, alloced_patterns; |
| |
| // Remember the number of patterns at each directory level |
| static int *nr_patterns_at; |
| // Track the current/max directory level; |
| static int depth, max_depth; |
| static bool debug_on; |
| static FILE *out_fp, *stat_fp; |
| static char *prefix = ""; |
| static char *progname; |
| |
| static void __attribute__((noreturn)) perror_exit(const char *s) |
| { |
| perror(s); |
| |
| exit(EXIT_FAILURE); |
| } |
| |
| static void __attribute__((noreturn)) error_exit(const char *fmt, ...) |
| { |
| va_list args; |
| |
| fprintf(stderr, "%s: error: ", progname); |
| |
| va_start(args, fmt); |
| vfprintf(stderr, fmt, args); |
| va_end(args); |
| |
| exit(EXIT_FAILURE); |
| } |
| |
| static void debug(const char *fmt, ...) |
| { |
| va_list args; |
| int i; |
| |
| if (!debug_on) |
| return; |
| |
| fprintf(stderr, "[DEBUG] "); |
| |
| for (i = 0; i < depth * 2; i++) |
| fputc(' ', stderr); |
| |
| va_start(args, fmt); |
| vfprintf(stderr, fmt, args); |
| va_end(args); |
| } |
| |
| static void *xrealloc(void *ptr, size_t size) |
| { |
| ptr = realloc(ptr, size); |
| if (!ptr) |
| perror_exit(progname); |
| |
| return ptr; |
| } |
| |
| static void *xmalloc(size_t size) |
| { |
| return xrealloc(NULL, size); |
| } |
| |
| // similar to last_matching_pattern_from_list() in GIT |
| static bool is_ignored(const char *path, int pathlen, int dirlen, bool is_dir) |
| { |
| int i; |
| |
| // Search in the reverse order because the last matching pattern wins. |
| for (i = nr_patterns - 1; i >= 0; i--) { |
| struct pattern *p = pattern_list[i]; |
| unsigned int flags = p->flags; |
| const char *gitignore_dir = p->pattern + p->patternlen + 1; |
| bool ignored; |
| |
| if ((flags & PATTERN_FLAG_MUSTBEDIR) && !is_dir) |
| continue; |
| |
| if (flags & PATTERN_FLAG_NODIR) { |
| if (!match_basename(path + dirlen + 1, |
| pathlen - dirlen - 1, |
| p->pattern, |
| p->nowildcardlen, |
| p->patternlen, |
| p->flags)) |
| continue; |
| } else { |
| if (!match_pathname(path, pathlen, |
| gitignore_dir, p->dirlen, |
| p->pattern, |
| p->nowildcardlen, |
| p->patternlen)) |
| continue; |
| } |
| |
| debug("%s: matches %s%s%s (%s/.gitignore)\n", path, |
| flags & PATTERN_FLAG_NEGATIVE ? "!" : "", p->pattern, |
| flags & PATTERN_FLAG_MUSTBEDIR ? "/" : "", |
| gitignore_dir); |
| |
| ignored = (flags & PATTERN_FLAG_NEGATIVE) == 0; |
| if (ignored) |
| debug("Ignore: %s\n", path); |
| |
| return ignored; |
| } |
| |
| debug("%s: no match\n", path); |
| |
| return false; |
| } |
| |
| static void add_pattern(const char *string, const char *dir, int dirlen) |
| { |
| struct pattern *p; |
| int patternlen, nowildcardlen; |
| unsigned int flags; |
| |
| parse_path_pattern(&string, &patternlen, &flags, &nowildcardlen); |
| |
| if (patternlen == 0) |
| return; |
| |
| p = xmalloc(sizeof(*p) + patternlen + dirlen + 2); |
| |
| memcpy(p->pattern, string, patternlen); |
| p->pattern[patternlen] = 0; |
| memcpy(p->pattern + patternlen + 1, dir, dirlen); |
| p->pattern[patternlen + 1 + dirlen] = 0; |
| |
| p->patternlen = patternlen; |
| p->nowildcardlen = nowildcardlen; |
| p->dirlen = dirlen; |
| p->flags = flags; |
| |
| debug("Add pattern: %s%s%s\n", |
| flags & PATTERN_FLAG_NEGATIVE ? "!" : "", p->pattern, |
| flags & PATTERN_FLAG_MUSTBEDIR ? "/" : ""); |
| |
| if (nr_patterns >= alloced_patterns) { |
| alloced_patterns += 128; |
| pattern_list = xrealloc(pattern_list, |
| sizeof(*pattern_list) * alloced_patterns); |
| } |
| |
| pattern_list[nr_patterns++] = p; |
| } |
| |
| // similar to add_patterns_from_buffer() in GIT |
| static void add_patterns_from_gitignore(const char *dir, int dirlen) |
| { |
| struct stat st; |
| char path[PATH_MAX], *buf, *entry; |
| size_t size; |
| int fd, pathlen, i; |
| |
| pathlen = snprintf(path, sizeof(path), "%s/.gitignore", dir); |
| if (pathlen >= sizeof(path)) |
| error_exit("%s: too long path was truncated\n", path); |
| |
| fd = open(path, O_RDONLY | O_NOFOLLOW); |
| if (fd < 0) { |
| if (errno != ENOENT) |
| return perror_exit(path); |
| return; |
| } |
| |
| if (fstat(fd, &st) < 0) |
| perror_exit(path); |
| |
| size = st.st_size; |
| |
| buf = xmalloc(size + 1); |
| if (read(fd, buf, st.st_size) != st.st_size) |
| perror_exit(path); |
| |
| buf[st.st_size] = '\n'; |
| if (close(fd)) |
| perror_exit(path); |
| |
| debug("Parse %s\n", path); |
| |
| entry = buf; |
| |
| // skip utf8 bom |
| if (!strncmp(entry, utf8_bom, strlen(utf8_bom))) |
| entry += strlen(utf8_bom); |
| |
| for (i = entry - buf; i < size; i++) { |
| if (buf[i] == '\n') { |
| if (entry != buf + i && entry[0] != '#') { |
| buf[i - (i && buf[i-1] == '\r')] = 0; |
| trim_trailing_spaces(entry); |
| add_pattern(entry, dir, dirlen); |
| } |
| entry = buf + i + 1; |
| } |
| } |
| |
| free(buf); |
| } |
| |
| // Save the current number of patterns and increment the depth |
| static void increment_depth(void) |
| { |
| if (depth >= max_depth) { |
| max_depth += 1; |
| nr_patterns_at = xrealloc(nr_patterns_at, |
| sizeof(*nr_patterns_at) * max_depth); |
| } |
| |
| nr_patterns_at[depth] = nr_patterns; |
| depth++; |
| } |
| |
| // Decrement the depth, and free up the patterns of this directory level. |
| static void decrement_depth(void) |
| { |
| depth--; |
| assert(depth >= 0); |
| |
| while (nr_patterns > nr_patterns_at[depth]) |
| free(pattern_list[--nr_patterns]); |
| } |
| |
| static void print_path(const char *path) |
| { |
| // The path always starts with "./" |
| assert(strlen(path) >= 2); |
| |
| // Replace the root directory with a preferred prefix. |
| // This is useful for the tar command. |
| fprintf(out_fp, "%s%s\n", prefix, path + 2); |
| } |
| |
| static void print_stat(const char *path, struct stat *st) |
| { |
| if (!stat_fp) |
| return; |
| |
| if (!S_ISREG(st->st_mode) && !S_ISLNK(st->st_mode)) |
| return; |
| |
| assert(strlen(path) >= 2); |
| |
| fprintf(stat_fp, "%c %9ld %10ld %s\n", |
| S_ISLNK(st->st_mode) ? 'l' : '-', |
| st->st_size, st->st_mtim.tv_sec, path + 2); |
| } |
| |
| // Traverse the entire directory tree, parsing .gitignore files. |
| // Print file paths that are not tracked by git. |
| // |
| // Return true if all files under the directory are ignored, false otherwise. |
| static bool traverse_directory(const char *dir, int dirlen) |
| { |
| bool all_ignored = true; |
| DIR *dirp; |
| |
| debug("Enter[%d]: %s\n", depth, dir); |
| increment_depth(); |
| |
| add_patterns_from_gitignore(dir, dirlen); |
| |
| dirp = opendir(dir); |
| if (!dirp) |
| perror_exit(dir); |
| |
| while (1) { |
| struct dirent *d; |
| struct stat st; |
| char path[PATH_MAX]; |
| int pathlen; |
| bool ignored; |
| |
| errno = 0; |
| d = readdir(dirp); |
| if (!d) { |
| if (errno) |
| perror_exit(dir); |
| break; |
| } |
| |
| if (!strcmp(d->d_name, "..") || !strcmp(d->d_name, ".")) |
| continue; |
| |
| pathlen = snprintf(path, sizeof(path), "%s/%s", dir, d->d_name); |
| if (pathlen >= sizeof(path)) |
| error_exit("%s: too long path was truncated\n", path); |
| |
| if (lstat(path, &st) < 0) |
| perror_exit(path); |
| |
| if ((!S_ISREG(st.st_mode) && !S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode)) || |
| is_ignored(path, pathlen, dirlen, S_ISDIR(st.st_mode))) { |
| ignored = true; |
| } else { |
| if (S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode)) |
| // If all the files in a directory are ignored, |
| // let's ignore that directory as well. This |
| // will avoid empty directories in the tarball. |
| ignored = traverse_directory(path, pathlen); |
| else |
| ignored = false; |
| } |
| |
| if (ignored) { |
| print_path(path); |
| } else { |
| print_stat(path, &st); |
| all_ignored = false; |
| } |
| } |
| |
| if (closedir(dirp)) |
| perror_exit(dir); |
| |
| decrement_depth(); |
| debug("Leave[%d]: %s\n", depth, dir); |
| |
| return all_ignored; |
| } |
| |
| static void usage(void) |
| { |
| fprintf(stderr, |
| "usage: %s [options]\n" |
| "\n" |
| "Show files that are ignored by git\n" |
| "\n" |
| "options:\n" |
| " -d, --debug print debug messages to stderr\n" |
| " -e, --exclude PATTERN add the given exclude pattern\n" |
| " -h, --help show this help message and exit\n" |
| " -i, --ignore-case Ignore case differences between the patterns and the files\n" |
| " -o, --output FILE output the ignored files to a file (default: '-', i.e. stdout)\n" |
| " -p, --prefix PREFIX prefix added to each path (default: empty string)\n" |
| " -r, --rootdir DIR root of the source tree (default: current working directory)\n" |
| " -s, --stat FILE output the file stat of non-ignored files to a file\n", |
| progname); |
| } |
| |
| static void open_output(const char *pathname, FILE **fp) |
| { |
| if (strcmp(pathname, "-")) { |
| *fp = fopen(pathname, "w"); |
| if (!*fp) |
| perror_exit(pathname); |
| } else { |
| *fp = stdout; |
| } |
| } |
| |
| static void close_output(const char *pathname, FILE *fp) |
| { |
| fflush(fp); |
| |
| if (ferror(fp)) |
| error_exit("not all data was written to the output\n"); |
| |
| if (fclose(fp)) |
| perror_exit(pathname); |
| } |
| |
| int main(int argc, char *argv[]) |
| { |
| const char *output = "-"; |
| const char *rootdir = "."; |
| const char *stat = NULL; |
| |
| progname = strrchr(argv[0], '/'); |
| if (progname) |
| progname++; |
| else |
| progname = argv[0]; |
| |
| while (1) { |
| static struct option long_options[] = { |
| {"debug", no_argument, NULL, 'd'}, |
| {"help", no_argument, NULL, 'h'}, |
| {"ignore-case", no_argument, NULL, 'i'}, |
| {"output", required_argument, NULL, 'o'}, |
| {"prefix", required_argument, NULL, 'p'}, |
| {"rootdir", required_argument, NULL, 'r'}, |
| {"stat", required_argument, NULL, 's'}, |
| {"exclude", required_argument, NULL, 'x'}, |
| {}, |
| }; |
| |
| int c = getopt_long(argc, argv, "dhino:p:r:s:x:", long_options, NULL); |
| |
| if (c == -1) |
| break; |
| |
| switch (c) { |
| case 'd': |
| debug_on = true; |
| break; |
| case 'h': |
| usage(); |
| exit(0); |
| case 'i': |
| ignore_case = true; |
| break; |
| case 'o': |
| output = optarg; |
| break; |
| case 'p': |
| prefix = optarg; |
| break; |
| case 'r': |
| rootdir = optarg; |
| break; |
| case 's': |
| stat = optarg; |
| break; |
| case 'x': |
| add_pattern(optarg, ".", strlen(".")); |
| break; |
| case '?': |
| usage(); |
| /* fallthrough */ |
| default: |
| exit(EXIT_FAILURE); |
| } |
| } |
| |
| open_output(output, &out_fp); |
| if (stat && stat[0]) |
| open_output(stat, &stat_fp); |
| |
| if (chdir(rootdir)) |
| perror_exit(rootdir); |
| |
| add_pattern(".git/", ".", strlen(".")); |
| |
| if (traverse_directory(".", strlen("."))) |
| print_path("./"); |
| |
| assert(depth == 0); |
| |
| while (nr_patterns > 0) |
| free(pattern_list[--nr_patterns]); |
| free(pattern_list); |
| free(nr_patterns_at); |
| |
| close_output(output, out_fp); |
| if (stat_fp) |
| close_output(stat, stat_fp); |
| |
| return 0; |
| } |