aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2026-01-02 00:29:23 +0700
committerShulhan <ms@kilabit.info>2026-01-06 14:56:07 +0700
commit988ff4596d6fb600d0c94aeed41b5b17f1677032 (patch)
tree5025b84a976cdef093e39c67ecdca7394127e029
parent2d18ac7fd9f8ddc92ddfb7de772adea8468ac3ea (diff)
downloadpakakeh.go-988ff4596d6fb600d0c94aeed41b5b17f1677032.tar.xz
lib/git: implement Gitignore
Gitignore is a type that represent ".gitignore" file. There are two ways to populate Gitignore, by using [LoadGitignore] function, or by using [Gitignore.Parse] method. After the Gitignore created, one can check if a path is ignored by using [Gitignore.IsIgnored] method, relative to the Gitignore directory.
-rw-r--r--lib/git/gitignore.go106
-rw-r--r--lib/git/gitignore_example_test.go41
-rw-r--r--lib/git/ignore_pattern.go161
-rw-r--r--lib/git/ignore_pattern_test.go314
-rw-r--r--lib/git/testdata/IsIgnored/foo/vendor2
5 files changed, 624 insertions, 0 deletions
diff --git a/lib/git/gitignore.go b/lib/git/gitignore.go
new file mode 100644
index 00000000..e9313cd0
--- /dev/null
+++ b/lib/git/gitignore.go
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: BSD-3-Clause
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+
+package git
+
+import (
+ "bytes"
+ "fmt"
+ "os"
+ "path/filepath"
+ "strings"
+)
+
+// Gitignore is a type that represent ".gitignore" file.
+// The content of Gitignore can be populated from [LoadGitignore] function or
+// [Gitignore.Parse] method.
+type Gitignore struct {
+ // dir path to directory that contains ".gitignore" file.
+ dir string
+
+ // path to the ".gitignore" file.
+ path string
+
+ // excludePatterns contains list of excluded pattern from
+ // ".gitignore" file.
+ excludePatterns []ignorePattern
+
+ // includePatterns contains list of include pattern, the one that
+ // start with "!".
+ includePatterns []ignorePattern
+}
+
+// LoadGitignore load the gitignore file inside directory `dir`.
+// It will return nil without error if the ".gitignore" file is not exists.
+//
+// Any invalid pattern will be ignored.
+func LoadGitignore(dir string) (ign *Gitignore, err error) {
+ var logp = `LoadGitignore`
+ var content []byte
+
+ ign = &Gitignore{
+ path: filepath.Join(dir, `.gitignore`),
+ }
+ content, err = os.ReadFile(ign.path)
+ if err != nil {
+ if os.IsNotExist(err) {
+ return nil, nil
+ }
+ return nil, fmt.Errorf(`%s: %w`, logp, err)
+ }
+ ign.Parse(dir, content)
+ return ign, nil
+}
+
+// Parse the raw content of ".gitignore" file that located inside the `dir`
+// directory.
+// This is an alternative to populate Gitignore content beside
+// [LoadGitignore].
+// Any invalid pattern inside the `content` will be ignored.
+func (ign *Gitignore) Parse(dir string, content []byte) {
+ ign.dir = dir
+ var lines = bytes.Split(content, []byte{'\n'})
+ var line []byte
+ for _, line = range lines {
+ var pat ignorePattern
+ pat = parsePattern(line)
+ if pat.pattern == nil {
+ // Skip invalid pattern.
+ continue
+ }
+ if pat.isNegate {
+ ign.includePatterns = append(ign.includePatterns, pat)
+ } else {
+ ign.excludePatterns = append(ign.excludePatterns, pat)
+ }
+ }
+}
+
+// IsIgnored return true if the `path` is ignored by this Gitignore content.
+// The `path` is relative to Gitignore directory.
+func (ign *Gitignore) IsIgnored(path string) bool {
+ path = strings.TrimSpace(path)
+ if path == `` {
+ return true
+ }
+ var fullpath = filepath.Join(ign.dir, path)
+ var fi os.FileInfo
+ fi, _ = os.Stat(fullpath)
+ if fi != nil {
+ if fi.IsDir() {
+ path += "/"
+ }
+ }
+ var pat ignorePattern
+ for _, pat = range ign.includePatterns {
+ if pat.isMatch(path) {
+ return false
+ }
+ }
+ for _, pat = range ign.excludePatterns {
+ if pat.isMatch(path) {
+ return true
+ }
+ }
+ return false
+}
diff --git a/lib/git/gitignore_example_test.go b/lib/git/gitignore_example_test.go
new file mode 100644
index 00000000..992a3525
--- /dev/null
+++ b/lib/git/gitignore_example_test.go
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: BSD-3-Clause
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+
+package git_test
+
+import (
+ "fmt"
+
+ "git.sr.ht/~shulhan/pakakeh.go/lib/git"
+)
+
+func ExampleGitignore_IsIgnored() {
+ var ign = git.Gitignore{}
+
+ ign.Parse(`testdata/IsIgnored/`, []byte(`# comment
+ # comment
+ vendor/ # Ignore vendor directory, but not vendor file.
+/hello.* # Ignore hello at root, but not foo/hello.go.
+!hello.go`))
+
+ var listPath = []string{
+ ``,
+ `vendor`,
+ `vendor/dummy`,
+ `hello.html`,
+ `hello.go`,
+ `foo/hello.go`,
+ `foo/vendor`,
+ }
+ for _, path := range listPath {
+ fmt.Printf("%q: %t\n", path, ign.IsIgnored(path))
+ }
+ // Output:
+ // "": true
+ // "vendor": true
+ // "vendor/dummy": true
+ // "hello.html": true
+ // "hello.go": false
+ // "foo/hello.go": false
+ // "foo/vendor": false
+}
diff --git a/lib/git/ignore_pattern.go b/lib/git/ignore_pattern.go
new file mode 100644
index 00000000..db4e2256
--- /dev/null
+++ b/lib/git/ignore_pattern.go
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: BSD-3-Clause
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+
+package git
+
+import (
+ "bytes"
+ "path/filepath"
+ "regexp"
+)
+
+type ignorePattern struct {
+ pattern *regexp.Regexp
+ isDir bool // True if pattern end with '/'.
+ isNegate bool // True if pattern start with '!'.
+}
+
+// parsePattern parse the line from gitignore.
+// At this point, the line must be not empty and not a comment.
+// If the pattern is invalid it return with nil [Gitignore.pattern].
+func parsePattern(line []byte) (ign ignorePattern) {
+ line = bytes.TrimSpace(line)
+ if len(line) == 0 {
+ // Skip empty line.
+ return ign
+ }
+ if line[0] == '#' {
+ // Skip comment.
+ return ign
+ }
+ if line[0] == '!' {
+ ign.isNegate = true
+ line = line[1:]
+ }
+ line = removeComment(line)
+
+ if line[len(line)-1] == '/' {
+ ign.isDir = true
+ line = line[:len(line)-1]
+ }
+
+ // The "**/foo" pattern is equal to "foo", so we can remove the "**/".
+ for bytes.HasPrefix(line, []byte("**/")) {
+ line = line[3:]
+ }
+ if len(line) == 0 {
+ return ign
+ }
+ for bytes.HasPrefix(line, []byte("**")) {
+ line = line[1:]
+ }
+ if len(line) == 0 || len(line) == 1 && line[0] == '*' {
+ // Ignore consecutive '*' pattern, since its mean match
+ // anything.
+ return ign
+ }
+
+ // Get the index of directory separator, before we replace it some
+ // special characters with regex.
+ var sepIdx = bytes.LastIndexByte(line, '/')
+
+ var RE_EVERYTHING_INSIDE = []byte(`/(.*)`)
+ var RE_FILE_OR_DIR = []byte(`/?$`)
+ var RE_NO_DIR_BEFORE = []byte(`^/?`)
+ var RE_ONE_CHAR_EXCEPT_SEP = []byte(`[^/]`)
+ var RE_ZERO_OR_MORE_CHAR_EXCEPT_SEP = []byte(`[^/]*`)
+ var RE_ZERO_OR_MORE_DIR = []byte(`(/.*)?/`)
+ var RE_ZERO_OR_MORE_DIR_BEFORE = []byte(`^(.*/|/)?`)
+
+ // First replacement,
+ // - Replace single '*' with RE_ZERO_OR_MORE_CHAR_EXCEPT_SEP
+ // - Replace '?' with RE_ONE_CHAR_EXCEPT_SEP
+ // - Escape regex metacharacters '.', '+', '|', '(', and ')'
+ var newline = make([]byte, 0, len(line))
+ var lastidx = len(line) - 1
+ var x = 0
+ var c byte
+ for x < len(line) {
+ c = line[x]
+ switch c {
+ case '*':
+ if x != lastidx && line[x+1] == '*' {
+ // The '**' is for regex.
+ newline = append(newline, c)
+ newline = append(newline, c)
+ x++
+ } else {
+ newline = append(newline, RE_ZERO_OR_MORE_CHAR_EXCEPT_SEP...)
+ }
+ case '?':
+ newline = append(newline, RE_ONE_CHAR_EXCEPT_SEP...)
+ case '.', '+', '|', '(', ')':
+ newline = append(newline, '\\', c)
+ default:
+ newline = append(newline, c)
+ }
+ x++
+ }
+ line = newline
+
+ line = bytes.ReplaceAll(line, []byte("/**/"), RE_ZERO_OR_MORE_DIR)
+ line = bytes.ReplaceAll(line, []byte("/**"), RE_EVERYTHING_INSIDE)
+ line = bytes.ReplaceAll(line, []byte("**"), RE_ZERO_OR_MORE_CHAR_EXCEPT_SEP)
+
+ switch sepIdx {
+ case -1:
+ // "foo" single string without directory separator should match only
+ // if its end with it.
+ // "foo" match with "/foo" or "a/foo" but not "afoo" or
+ // "a/foo/bar".
+ line = append(RE_ZERO_OR_MORE_DIR_BEFORE, line...)
+ case 0:
+ // "/foo" match with "foo" or "/foo" but not "a/foo" nor
+ // "a/foo/bar".
+ line = append(RE_NO_DIR_BEFORE, line[1:]...)
+ default:
+ // "foo/bar" should match with "/foo/bar" but not "a/foo/bar".
+ if line[0] == '/' {
+ line = line[1:]
+ }
+ line = append(RE_NO_DIR_BEFORE, line...)
+ }
+ if ign.isDir {
+ line = append(line, '/', '$')
+ } else {
+ line = append(line, RE_FILE_OR_DIR...)
+ }
+ ign.pattern, _ = regexp.Compile(string(line))
+ return ign
+}
+
+func removeComment(line []byte) []byte {
+ var x = bytes.LastIndexByte(line, '#')
+ if x == -1 {
+ return line
+ }
+ for line[x-1] == '\\' {
+ x = bytes.LastIndexByte(line[:x-1], '#')
+ if x == -1 {
+ return line
+ }
+ }
+ return bytes.TrimSpace(line[:x])
+}
+
+func (pat *ignorePattern) isMatch(path string) bool {
+ if pat.pattern.MatchString(path) {
+ return true
+ }
+ if !pat.isDir {
+ return false
+ }
+ path = filepath.Dir(path)
+ for path != `.` {
+ if pat.pattern.MatchString(path + "/") {
+ return true
+ }
+ path = filepath.Dir(path)
+ }
+ return false
+}
diff --git a/lib/git/ignore_pattern_test.go b/lib/git/ignore_pattern_test.go
new file mode 100644
index 00000000..544cb0d8
--- /dev/null
+++ b/lib/git/ignore_pattern_test.go
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: BSD-3-Clause
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+
+package git
+
+import (
+ "regexp"
+ "testing"
+
+ "git.sr.ht/~shulhan/pakakeh.go/lib/test"
+)
+
+func TestParsePattern(t *testing.T) {
+ type testCase struct {
+ pattern string
+ exp ignorePattern
+ }
+ var listCase = []testCase{{
+ pattern: `#`,
+ exp: ignorePattern{
+ pattern: nil,
+ },
+ }, {
+ pattern: `a #`,
+ exp: ignorePattern{
+ pattern: regexp.MustCompile(`^(.*/|/)?a/?$`),
+ },
+ }, {
+ pattern: `a \#`,
+ exp: ignorePattern{
+ pattern: regexp.MustCompile(`^(.*/|/)?a \#/?$`),
+ },
+ }, {
+ pattern: `?`,
+ exp: ignorePattern{
+ pattern: regexp.MustCompile(`^(.*/|/)?[^/]/?$`),
+ },
+ }, {
+ pattern: `!a`,
+ exp: ignorePattern{
+ pattern: regexp.MustCompile(`^(.*/|/)?a/?$`),
+ isNegate: true,
+ },
+ }, {
+ pattern: `*`,
+ exp: ignorePattern{
+ pattern: nil,
+ },
+ }, {
+ pattern: `*/`,
+ exp: ignorePattern{
+ pattern: nil,
+ isDir: true,
+ },
+ }, {
+ pattern: `**`,
+ exp: ignorePattern{
+ pattern: nil,
+ },
+ }, {
+ pattern: `***`,
+ exp: ignorePattern{
+ pattern: nil,
+ },
+ }, {
+ pattern: `**/**`,
+ exp: ignorePattern{
+ pattern: nil,
+ },
+ }, {
+ pattern: `**/**/`,
+ exp: ignorePattern{
+ pattern: nil,
+ isDir: true,
+ },
+ }, {
+ pattern: `**/**foo`,
+ exp: ignorePattern{
+ pattern: regexp.MustCompile(`^(.*/|/)?[^/]*foo/?$`),
+ },
+ }, {
+ pattern: `foo`,
+ exp: ignorePattern{
+ pattern: regexp.MustCompile(`^(.*/|/)?foo/?$`),
+ },
+ }, {
+ pattern: `foo/`,
+ exp: ignorePattern{
+ pattern: regexp.MustCompile(`^(.*/|/)?foo/$`),
+ isDir: true,
+ },
+ }, {
+ pattern: `/foo`,
+ exp: ignorePattern{
+ pattern: regexp.MustCompile(`^/?foo/?$`),
+ },
+ }, {
+ pattern: `foo/**/bar`,
+ exp: ignorePattern{
+ pattern: regexp.MustCompile(`^/?foo(/.*)?/bar/?$`),
+ },
+ }, {
+ pattern: `a+b|c`,
+ exp: ignorePattern{
+ pattern: regexp.MustCompile(`^(.*/|/)?a\+b\|c/?$`),
+ },
+ }, {
+ pattern: `(a|b)`,
+ exp: ignorePattern{
+ pattern: regexp.MustCompile(`^(.*/|/)?\(a\|b\)/?$`),
+ },
+ }}
+ for _, tc := range listCase {
+ var got = parsePattern([]byte(tc.pattern))
+ test.Assert(t, tc.pattern, tc.exp, got)
+ }
+}
+
+func TestIgnorePattern_isMatch(t *testing.T) {
+ type testCase struct {
+ listCase map[string]bool
+ pattern string
+ }
+ var listCase = []testCase{{
+ pattern: ` foo # comment`,
+ listCase: map[string]bool{
+ `foo`: true,
+ `foo/`: true,
+ `a/foo`: true,
+ `a/b/foo`: true,
+ `afoo`: false,
+ `a/foo/bar`: false,
+ },
+ }, {
+ pattern: `foo/`,
+ listCase: map[string]bool{
+ `foo/bar`: true,
+ `a/foo/bar`: true,
+ `foo`: false,
+ `afoo`: false,
+ `a/foo`: false,
+ },
+ }, {
+ pattern: `/foo`,
+ listCase: map[string]bool{
+ `foo`: true,
+ `foo/bar`: false,
+ `a/foo`: false,
+ `a/foo/bar`: false,
+ `afoo`: false,
+ },
+ }, {
+ pattern: `/foo/bar`,
+ listCase: map[string]bool{
+ `foo/bar`: true,
+ `foo/bar/`: true,
+ `foo/bar/z`: false,
+ `afoo/bar`: false,
+ `a/foo/bar`: false,
+ `a/foo/bar/z`: false,
+ },
+ }, {
+ pattern: `foo/bar/`,
+ listCase: map[string]bool{
+ `foo/bar/`: true,
+ `foo/bar/z`: true,
+ `foo/bar`: false,
+ `afoo/bar`: false,
+ `a/foo/bar`: false,
+ `a/foo/bar/z`: false,
+ },
+ }, {
+ pattern: `/foo/bar/`,
+ listCase: map[string]bool{
+ `/foo/bar/`: true,
+ `foo/bar/`: true,
+ `foo/bar/z`: true,
+ `foo/bar`: false,
+ `afoo/bar`: false,
+ `a/foo/bar`: false,
+ `a/foo/bar/z`: false,
+ },
+ }, {
+ pattern: `foo*`,
+ listCase: map[string]bool{
+ `foo`: true,
+ `foobar`: true,
+ `a/foo`: true,
+ `a/foobar`: true,
+ `a/foo/bar`: false,
+ },
+ }, {
+ pattern: `foo.*`,
+ listCase: map[string]bool{
+ `foo.`: true,
+ `foo.bar`: true,
+ `a/foo.bar`: true,
+ `a/foo./bar`: false,
+ `a/foobar`: false,
+ },
+ }, {
+ pattern: `*foo`,
+ listCase: map[string]bool{
+ `foo`: true,
+ `afoo`: true,
+ `a/foo`: true,
+ `a/bfoo`: true,
+ `foobar`: false,
+ `a/foo/bar`: false,
+ `a/foobar`: false,
+ },
+ }, {
+ pattern: `foo?`,
+ listCase: map[string]bool{
+ `food`: true,
+ `a/food`: true,
+ `foo`: false,
+ `foobar`: false,
+ `afoo`: false,
+ `a/foo`: false,
+ `a/foobar`: false,
+ `a/foo/bar`: false,
+ },
+ }, {
+ pattern: `?foo`,
+ listCase: map[string]bool{
+ `afoo`: true,
+ `a/afoo`: true,
+ `foo`: false,
+ `a/foo`: false,
+ `a/foobar`: false,
+ },
+ }, {
+ pattern: `foo/*`,
+ listCase: map[string]bool{
+ `foo`: false,
+ `foo/bar`: true,
+ `foo/bar/z`: false,
+ },
+ }, {
+ pattern: `**/foo`,
+ listCase: map[string]bool{
+ `foo`: true,
+ `/foo`: true,
+ `a/foo`: true,
+ `a/b/foo`: true,
+ `a/b/foo/bar`: false,
+ },
+ }, {
+ pattern: `foo/**`,
+ listCase: map[string]bool{
+ `foo/bar`: true,
+ `foo/bar/foo`: true,
+ `foo`: false,
+ `a/foo/bar`: false,
+ },
+ }, {
+ pattern: `foo/**/bar`,
+ listCase: map[string]bool{
+ `foo/bar`: true,
+ `foo/a/bar`: true,
+ `foo/a/b/bar`: true,
+ `foo/bar/foo`: false,
+ `bar`: false,
+ `a/foo/bar`: false,
+ `a/foo/b/bar`: false,
+ },
+ }, {
+ pattern: `a+b|c`,
+ listCase: map[string]bool{
+ `a+b|c`: true,
+ `aab|c`: false,
+ `aab`: false,
+ },
+ }, {
+ pattern: `(a|b)`,
+ listCase: map[string]bool{
+ `(a|b)`: true,
+ `a`: false,
+ `b`: false,
+ },
+ }}
+ for _, tc := range listCase {
+ var pat = parsePattern([]byte(tc.pattern))
+ for name, exp := range tc.listCase {
+ var got = pat.isMatch(name)
+ if exp != got {
+ t.Fatalf("%q: on %q want %t, got %t",
+ tc.pattern, name, exp, got)
+ }
+ }
+ }
+}
+
+func TestRemoveComment(t *testing.T) {
+ type testCase struct {
+ pattern string
+ exp string
+ }
+ var listCase = []testCase{{
+ pattern: `a#`,
+ exp: `a`,
+ }, {
+ pattern: `a\#`,
+ exp: `a\#`,
+ }, {
+ pattern: `a\##`,
+ exp: `a\#`,
+ }}
+ for _, tc := range listCase {
+ got := removeComment([]byte(tc.pattern))
+ test.Assert(t, tc.pattern, tc.exp, string(got))
+ }
+}
diff --git a/lib/git/testdata/IsIgnored/foo/vendor b/lib/git/testdata/IsIgnored/foo/vendor
new file mode 100644
index 00000000..62f13a9e
--- /dev/null
+++ b/lib/git/testdata/IsIgnored/foo/vendor
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: BSD-3-Clause
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>