diff options
| author | Shulhan <ms@kilabit.info> | 2026-01-02 00:29:23 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2026-01-06 14:56:07 +0700 |
| commit | 988ff4596d6fb600d0c94aeed41b5b17f1677032 (patch) | |
| tree | 5025b84a976cdef093e39c67ecdca7394127e029 | |
| parent | 2d18ac7fd9f8ddc92ddfb7de772adea8468ac3ea (diff) | |
| download | pakakeh.go-988ff4596d6fb600d0c94aeed41b5b17f1677032.tar.xz | |
lib/git: implement Gitignore
Gitignore is a type that represent ".gitignore" file.
There are two ways to populate Gitignore, by using [LoadGitignore]
function, or by using [Gitignore.Parse] method.
After the Gitignore created, one can check if a path is ignored by
using [Gitignore.IsIgnored] method, relative to the Gitignore directory.
| -rw-r--r-- | lib/git/gitignore.go | 106 | ||||
| -rw-r--r-- | lib/git/gitignore_example_test.go | 41 | ||||
| -rw-r--r-- | lib/git/ignore_pattern.go | 161 | ||||
| -rw-r--r-- | lib/git/ignore_pattern_test.go | 314 | ||||
| -rw-r--r-- | lib/git/testdata/IsIgnored/foo/vendor | 2 |
5 files changed, 624 insertions, 0 deletions
diff --git a/lib/git/gitignore.go b/lib/git/gitignore.go new file mode 100644 index 00000000..e9313cd0 --- /dev/null +++ b/lib/git/gitignore.go @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: BSD-3-Clause +// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> + +package git + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "strings" +) + +// Gitignore is a type that represent ".gitignore" file. +// The content of Gitignore can be populated from [LoadGitignore] function or +// [Gitignore.Parse] method. +type Gitignore struct { + // dir path to directory that contains ".gitignore" file. + dir string + + // path to the ".gitignore" file. + path string + + // excludePatterns contains list of excluded pattern from + // ".gitignore" file. + excludePatterns []ignorePattern + + // includePatterns contains list of include pattern, the one that + // start with "!". + includePatterns []ignorePattern +} + +// LoadGitignore load the gitignore file inside directory `dir`. +// It will return nil without error if the ".gitignore" file is not exists. +// +// Any invalid pattern will be ignored. +func LoadGitignore(dir string) (ign *Gitignore, err error) { + var logp = `LoadGitignore` + var content []byte + + ign = &Gitignore{ + path: filepath.Join(dir, `.gitignore`), + } + content, err = os.ReadFile(ign.path) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, fmt.Errorf(`%s: %w`, logp, err) + } + ign.Parse(dir, content) + return ign, nil +} + +// Parse the raw content of ".gitignore" file that located inside the `dir` +// directory. +// This is an alternative to populate Gitignore content beside +// [LoadGitignore]. +// Any invalid pattern inside the `content` will be ignored. +func (ign *Gitignore) Parse(dir string, content []byte) { + ign.dir = dir + var lines = bytes.Split(content, []byte{'\n'}) + var line []byte + for _, line = range lines { + var pat ignorePattern + pat = parsePattern(line) + if pat.pattern == nil { + // Skip invalid pattern. + continue + } + if pat.isNegate { + ign.includePatterns = append(ign.includePatterns, pat) + } else { + ign.excludePatterns = append(ign.excludePatterns, pat) + } + } +} + +// IsIgnored return true if the `path` is ignored by this Gitignore content. +// The `path` is relative to Gitignore directory. +func (ign *Gitignore) IsIgnored(path string) bool { + path = strings.TrimSpace(path) + if path == `` { + return true + } + var fullpath = filepath.Join(ign.dir, path) + var fi os.FileInfo + fi, _ = os.Stat(fullpath) + if fi != nil { + if fi.IsDir() { + path += "/" + } + } + var pat ignorePattern + for _, pat = range ign.includePatterns { + if pat.isMatch(path) { + return false + } + } + for _, pat = range ign.excludePatterns { + if pat.isMatch(path) { + return true + } + } + return false +} diff --git a/lib/git/gitignore_example_test.go b/lib/git/gitignore_example_test.go new file mode 100644 index 00000000..992a3525 --- /dev/null +++ b/lib/git/gitignore_example_test.go @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: BSD-3-Clause +// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> + +package git_test + +import ( + "fmt" + + "git.sr.ht/~shulhan/pakakeh.go/lib/git" +) + +func ExampleGitignore_IsIgnored() { + var ign = git.Gitignore{} + + ign.Parse(`testdata/IsIgnored/`, []byte(`# comment + # comment + vendor/ # Ignore vendor directory, but not vendor file. +/hello.* # Ignore hello at root, but not foo/hello.go. +!hello.go`)) + + var listPath = []string{ + ``, + `vendor`, + `vendor/dummy`, + `hello.html`, + `hello.go`, + `foo/hello.go`, + `foo/vendor`, + } + for _, path := range listPath { + fmt.Printf("%q: %t\n", path, ign.IsIgnored(path)) + } + // Output: + // "": true + // "vendor": true + // "vendor/dummy": true + // "hello.html": true + // "hello.go": false + // "foo/hello.go": false + // "foo/vendor": false +} diff --git a/lib/git/ignore_pattern.go b/lib/git/ignore_pattern.go new file mode 100644 index 00000000..db4e2256 --- /dev/null +++ b/lib/git/ignore_pattern.go @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: BSD-3-Clause +// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> + +package git + +import ( + "bytes" + "path/filepath" + "regexp" +) + +type ignorePattern struct { + pattern *regexp.Regexp + isDir bool // True if pattern end with '/'. + isNegate bool // True if pattern start with '!'. +} + +// parsePattern parse the line from gitignore. +// At this point, the line must be not empty and not a comment. +// If the pattern is invalid it return with nil [Gitignore.pattern]. +func parsePattern(line []byte) (ign ignorePattern) { + line = bytes.TrimSpace(line) + if len(line) == 0 { + // Skip empty line. + return ign + } + if line[0] == '#' { + // Skip comment. + return ign + } + if line[0] == '!' { + ign.isNegate = true + line = line[1:] + } + line = removeComment(line) + + if line[len(line)-1] == '/' { + ign.isDir = true + line = line[:len(line)-1] + } + + // The "**/foo" pattern is equal to "foo", so we can remove the "**/". + for bytes.HasPrefix(line, []byte("**/")) { + line = line[3:] + } + if len(line) == 0 { + return ign + } + for bytes.HasPrefix(line, []byte("**")) { + line = line[1:] + } + if len(line) == 0 || len(line) == 1 && line[0] == '*' { + // Ignore consecutive '*' pattern, since its mean match + // anything. + return ign + } + + // Get the index of directory separator, before we replace it some + // special characters with regex. + var sepIdx = bytes.LastIndexByte(line, '/') + + var RE_EVERYTHING_INSIDE = []byte(`/(.*)`) + var RE_FILE_OR_DIR = []byte(`/?$`) + var RE_NO_DIR_BEFORE = []byte(`^/?`) + var RE_ONE_CHAR_EXCEPT_SEP = []byte(`[^/]`) + var RE_ZERO_OR_MORE_CHAR_EXCEPT_SEP = []byte(`[^/]*`) + var RE_ZERO_OR_MORE_DIR = []byte(`(/.*)?/`) + var RE_ZERO_OR_MORE_DIR_BEFORE = []byte(`^(.*/|/)?`) + + // First replacement, + // - Replace single '*' with RE_ZERO_OR_MORE_CHAR_EXCEPT_SEP + // - Replace '?' with RE_ONE_CHAR_EXCEPT_SEP + // - Escape regex metacharacters '.', '+', '|', '(', and ')' + var newline = make([]byte, 0, len(line)) + var lastidx = len(line) - 1 + var x = 0 + var c byte + for x < len(line) { + c = line[x] + switch c { + case '*': + if x != lastidx && line[x+1] == '*' { + // The '**' is for regex. + newline = append(newline, c) + newline = append(newline, c) + x++ + } else { + newline = append(newline, RE_ZERO_OR_MORE_CHAR_EXCEPT_SEP...) + } + case '?': + newline = append(newline, RE_ONE_CHAR_EXCEPT_SEP...) + case '.', '+', '|', '(', ')': + newline = append(newline, '\\', c) + default: + newline = append(newline, c) + } + x++ + } + line = newline + + line = bytes.ReplaceAll(line, []byte("/**/"), RE_ZERO_OR_MORE_DIR) + line = bytes.ReplaceAll(line, []byte("/**"), RE_EVERYTHING_INSIDE) + line = bytes.ReplaceAll(line, []byte("**"), RE_ZERO_OR_MORE_CHAR_EXCEPT_SEP) + + switch sepIdx { + case -1: + // "foo" single string without directory separator should match only + // if its end with it. + // "foo" match with "/foo" or "a/foo" but not "afoo" or + // "a/foo/bar". + line = append(RE_ZERO_OR_MORE_DIR_BEFORE, line...) + case 0: + // "/foo" match with "foo" or "/foo" but not "a/foo" nor + // "a/foo/bar". + line = append(RE_NO_DIR_BEFORE, line[1:]...) + default: + // "foo/bar" should match with "/foo/bar" but not "a/foo/bar". + if line[0] == '/' { + line = line[1:] + } + line = append(RE_NO_DIR_BEFORE, line...) + } + if ign.isDir { + line = append(line, '/', '$') + } else { + line = append(line, RE_FILE_OR_DIR...) + } + ign.pattern, _ = regexp.Compile(string(line)) + return ign +} + +func removeComment(line []byte) []byte { + var x = bytes.LastIndexByte(line, '#') + if x == -1 { + return line + } + for line[x-1] == '\\' { + x = bytes.LastIndexByte(line[:x-1], '#') + if x == -1 { + return line + } + } + return bytes.TrimSpace(line[:x]) +} + +func (pat *ignorePattern) isMatch(path string) bool { + if pat.pattern.MatchString(path) { + return true + } + if !pat.isDir { + return false + } + path = filepath.Dir(path) + for path != `.` { + if pat.pattern.MatchString(path + "/") { + return true + } + path = filepath.Dir(path) + } + return false +} diff --git a/lib/git/ignore_pattern_test.go b/lib/git/ignore_pattern_test.go new file mode 100644 index 00000000..544cb0d8 --- /dev/null +++ b/lib/git/ignore_pattern_test.go @@ -0,0 +1,314 @@ +// SPDX-License-Identifier: BSD-3-Clause +// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> + +package git + +import ( + "regexp" + "testing" + + "git.sr.ht/~shulhan/pakakeh.go/lib/test" +) + +func TestParsePattern(t *testing.T) { + type testCase struct { + pattern string + exp ignorePattern + } + var listCase = []testCase{{ + pattern: `#`, + exp: ignorePattern{ + pattern: nil, + }, + }, { + pattern: `a #`, + exp: ignorePattern{ + pattern: regexp.MustCompile(`^(.*/|/)?a/?$`), + }, + }, { + pattern: `a \#`, + exp: ignorePattern{ + pattern: regexp.MustCompile(`^(.*/|/)?a \#/?$`), + }, + }, { + pattern: `?`, + exp: ignorePattern{ + pattern: regexp.MustCompile(`^(.*/|/)?[^/]/?$`), + }, + }, { + pattern: `!a`, + exp: ignorePattern{ + pattern: regexp.MustCompile(`^(.*/|/)?a/?$`), + isNegate: true, + }, + }, { + pattern: `*`, + exp: ignorePattern{ + pattern: nil, + }, + }, { + pattern: `*/`, + exp: ignorePattern{ + pattern: nil, + isDir: true, + }, + }, { + pattern: `**`, + exp: ignorePattern{ + pattern: nil, + }, + }, { + pattern: `***`, + exp: ignorePattern{ + pattern: nil, + }, + }, { + pattern: `**/**`, + exp: ignorePattern{ + pattern: nil, + }, + }, { + pattern: `**/**/`, + exp: ignorePattern{ + pattern: nil, + isDir: true, + }, + }, { + pattern: `**/**foo`, + exp: ignorePattern{ + pattern: regexp.MustCompile(`^(.*/|/)?[^/]*foo/?$`), + }, + }, { + pattern: `foo`, + exp: ignorePattern{ + pattern: regexp.MustCompile(`^(.*/|/)?foo/?$`), + }, + }, { + pattern: `foo/`, + exp: ignorePattern{ + pattern: regexp.MustCompile(`^(.*/|/)?foo/$`), + isDir: true, + }, + }, { + pattern: `/foo`, + exp: ignorePattern{ + pattern: regexp.MustCompile(`^/?foo/?$`), + }, + }, { + pattern: `foo/**/bar`, + exp: ignorePattern{ + pattern: regexp.MustCompile(`^/?foo(/.*)?/bar/?$`), + }, + }, { + pattern: `a+b|c`, + exp: ignorePattern{ + pattern: regexp.MustCompile(`^(.*/|/)?a\+b\|c/?$`), + }, + }, { + pattern: `(a|b)`, + exp: ignorePattern{ + pattern: regexp.MustCompile(`^(.*/|/)?\(a\|b\)/?$`), + }, + }} + for _, tc := range listCase { + var got = parsePattern([]byte(tc.pattern)) + test.Assert(t, tc.pattern, tc.exp, got) + } +} + +func TestIgnorePattern_isMatch(t *testing.T) { + type testCase struct { + listCase map[string]bool + pattern string + } + var listCase = []testCase{{ + pattern: ` foo # comment`, + listCase: map[string]bool{ + `foo`: true, + `foo/`: true, + `a/foo`: true, + `a/b/foo`: true, + `afoo`: false, + `a/foo/bar`: false, + }, + }, { + pattern: `foo/`, + listCase: map[string]bool{ + `foo/bar`: true, + `a/foo/bar`: true, + `foo`: false, + `afoo`: false, + `a/foo`: false, + }, + }, { + pattern: `/foo`, + listCase: map[string]bool{ + `foo`: true, + `foo/bar`: false, + `a/foo`: false, + `a/foo/bar`: false, + `afoo`: false, + }, + }, { + pattern: `/foo/bar`, + listCase: map[string]bool{ + `foo/bar`: true, + `foo/bar/`: true, + `foo/bar/z`: false, + `afoo/bar`: false, + `a/foo/bar`: false, + `a/foo/bar/z`: false, + }, + }, { + pattern: `foo/bar/`, + listCase: map[string]bool{ + `foo/bar/`: true, + `foo/bar/z`: true, + `foo/bar`: false, + `afoo/bar`: false, + `a/foo/bar`: false, + `a/foo/bar/z`: false, + }, + }, { + pattern: `/foo/bar/`, + listCase: map[string]bool{ + `/foo/bar/`: true, + `foo/bar/`: true, + `foo/bar/z`: true, + `foo/bar`: false, + `afoo/bar`: false, + `a/foo/bar`: false, + `a/foo/bar/z`: false, + }, + }, { + pattern: `foo*`, + listCase: map[string]bool{ + `foo`: true, + `foobar`: true, + `a/foo`: true, + `a/foobar`: true, + `a/foo/bar`: false, + }, + }, { + pattern: `foo.*`, + listCase: map[string]bool{ + `foo.`: true, + `foo.bar`: true, + `a/foo.bar`: true, + `a/foo./bar`: false, + `a/foobar`: false, + }, + }, { + pattern: `*foo`, + listCase: map[string]bool{ + `foo`: true, + `afoo`: true, + `a/foo`: true, + `a/bfoo`: true, + `foobar`: false, + `a/foo/bar`: false, + `a/foobar`: false, + }, + }, { + pattern: `foo?`, + listCase: map[string]bool{ + `food`: true, + `a/food`: true, + `foo`: false, + `foobar`: false, + `afoo`: false, + `a/foo`: false, + `a/foobar`: false, + `a/foo/bar`: false, + }, + }, { + pattern: `?foo`, + listCase: map[string]bool{ + `afoo`: true, + `a/afoo`: true, + `foo`: false, + `a/foo`: false, + `a/foobar`: false, + }, + }, { + pattern: `foo/*`, + listCase: map[string]bool{ + `foo`: false, + `foo/bar`: true, + `foo/bar/z`: false, + }, + }, { + pattern: `**/foo`, + listCase: map[string]bool{ + `foo`: true, + `/foo`: true, + `a/foo`: true, + `a/b/foo`: true, + `a/b/foo/bar`: false, + }, + }, { + pattern: `foo/**`, + listCase: map[string]bool{ + `foo/bar`: true, + `foo/bar/foo`: true, + `foo`: false, + `a/foo/bar`: false, + }, + }, { + pattern: `foo/**/bar`, + listCase: map[string]bool{ + `foo/bar`: true, + `foo/a/bar`: true, + `foo/a/b/bar`: true, + `foo/bar/foo`: false, + `bar`: false, + `a/foo/bar`: false, + `a/foo/b/bar`: false, + }, + }, { + pattern: `a+b|c`, + listCase: map[string]bool{ + `a+b|c`: true, + `aab|c`: false, + `aab`: false, + }, + }, { + pattern: `(a|b)`, + listCase: map[string]bool{ + `(a|b)`: true, + `a`: false, + `b`: false, + }, + }} + for _, tc := range listCase { + var pat = parsePattern([]byte(tc.pattern)) + for name, exp := range tc.listCase { + var got = pat.isMatch(name) + if exp != got { + t.Fatalf("%q: on %q want %t, got %t", + tc.pattern, name, exp, got) + } + } + } +} + +func TestRemoveComment(t *testing.T) { + type testCase struct { + pattern string + exp string + } + var listCase = []testCase{{ + pattern: `a#`, + exp: `a`, + }, { + pattern: `a\#`, + exp: `a\#`, + }, { + pattern: `a\##`, + exp: `a\#`, + }} + for _, tc := range listCase { + got := removeComment([]byte(tc.pattern)) + test.Assert(t, tc.pattern, tc.exp, string(got)) + } +} diff --git a/lib/git/testdata/IsIgnored/foo/vendor b/lib/git/testdata/IsIgnored/foo/vendor new file mode 100644 index 00000000..62f13a9e --- /dev/null +++ b/lib/git/testdata/IsIgnored/foo/vendor @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: BSD-3-Clause +// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> |
