diff options
| author | Shulhan <ms@kilabit.info> | 2025-06-01 01:05:21 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2025-06-01 01:08:03 +0700 |
| commit | 3ed69f535b6081bfa88b83f6fbf8c94527afe350 (patch) | |
| tree | d92cc08f2586d9e75c9345f0609522e17bd4fb5c | |
| parent | 0e4126ad99a216a08896156d31aafe3ab5611ba2 (diff) | |
| download | jarink-3ed69f535b6081bfa88b83f6fbf8c94527afe350.tar.xz | |
all: rename the program and repository into jarink
Jarink is a program to help web administrator to maintains their website.
Currently its provides a command to scan for brokenlinks.
| -rw-r--r-- | brokenlinks.go | 68 | ||||
| -rw-r--r-- | brokenlinks_test.go (renamed from deadlinks_test.go) | 44 | ||||
| -rw-r--r-- | brokenlinks_worker.go (renamed from worker.go) | 32 | ||||
| -rw-r--r-- | cmd/jarink/main.go (renamed from cmd/deadlinks/main.go) | 41 | ||||
| -rw-r--r-- | deadlinks.go | 32 | ||||
| -rw-r--r-- | go.mod | 4 | ||||
| -rw-r--r-- | link_queue.go | 2 | ||||
| -rw-r--r-- | result.go | 36 | ||||
| -rw-r--r-- | scan_options.go | 10 | ||||
| -rw-r--r-- | url_test.go | 2 |
10 files changed, 134 insertions, 137 deletions
diff --git a/brokenlinks.go b/brokenlinks.go new file mode 100644 index 0000000..768216d --- /dev/null +++ b/brokenlinks.go @@ -0,0 +1,68 @@ +// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> +// SPDX-License-Identifier: GPL-3.0-only + +package jarink + +import ( + "fmt" + "slices" + "strings" +) + +const Version = `0.1.0` + +// StatusBadLink status for link that is not parseable by [url.Parse] or not +// reachable during GET or HEAD, either timeout or IP or domain not exist. +const StatusBadLink = 700 + +// BrokenlinksOptions define the options for scanning broken links. +type BrokenlinksOptions struct { + Url string + IsVerbose bool +} + +// Broken store the broken link, HTTP status code, and the error message that +// cause it. +type Broken struct { + Link string + Error string `json:"omitempty"` + Code int +} + +// BrokenlinksResult store the result of scanning for broken links. +type BrokenlinksResult struct { + // PageLinks store the page and its broken links. + PageLinks map[string][]Broken +} + +func newBrokenlinksResult() *BrokenlinksResult { + return &BrokenlinksResult{ + PageLinks: map[string][]Broken{}, + } +} + +func (result *BrokenlinksResult) sort() { + for _, listBroken := range result.PageLinks { + slices.SortFunc(listBroken, func(a, b Broken) int { + return strings.Compare(a.Link, b.Link) + }) + } +} + +// Brokenlinks scan the URL for broken links. +func Brokenlinks(opts BrokenlinksOptions) (result *BrokenlinksResult, err error) { + var logp = `brokenlinks` + var wrk *brokenlinksWorker + + wrk, err = newWorker(opts) + if err != nil { + return nil, fmt.Errorf(`%s: %s`, logp, err) + } + + result, err = wrk.run() + if err != nil { + return nil, fmt.Errorf(`%s: %s`, logp, err) + } + + return result, nil +} diff --git a/deadlinks_test.go b/brokenlinks_test.go index c219aa0..c1a607f 100644 --- a/deadlinks_test.go +++ b/brokenlinks_test.go @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> // SPDX-License-Identifier: GPL-3.0-only -package deadlinks_test +package jarink_test import ( "log" @@ -10,7 +10,7 @@ import ( "testing" "time" - "git.sr.ht/~shulhan/deadlinks" + "git.sr.ht/~shulhan/jarink" libnet "git.sr.ht/~shulhan/pakakeh.go/lib/net" "git.sr.ht/~shulhan/pakakeh.go/lib/test" ) @@ -71,25 +71,25 @@ func TestMain(m *testing.M) { os.Exit(m.Run()) } -func TestDeadLinks_Scan(t *testing.T) { +func TestBrokenlinks(t *testing.T) { var testUrl = `http://` + testAddress type testCase struct { - exp map[string][]deadlinks.Broken + exp map[string][]jarink.Broken scanUrl string expError string } listCase := []testCase{{ scanUrl: `127.0.0.1:14594`, - expError: `Scan: invalid URL "127.0.0.1:14594"`, + expError: `brokenlinks: invalid URL "127.0.0.1:14594"`, }, { scanUrl: `http://127.0.0.1:14594`, - expError: `Scan: Get "http://127.0.0.1:14594": dial tcp 127.0.0.1:14594: connect: connection refused`, + expError: `brokenlinks: Get "http://127.0.0.1:14594": dial tcp 127.0.0.1:14594: connect: connection refused`, }, { scanUrl: testUrl, - exp: map[string][]deadlinks.Broken{ - testUrl: []deadlinks.Broken{ + exp: map[string][]jarink.Broken{ + testUrl: []jarink.Broken{ { Link: testUrl + `/broken.png`, Code: http.StatusNotFound, @@ -99,20 +99,20 @@ func TestDeadLinks_Scan(t *testing.T) { }, { Link: `http://127.0.0.1:abc`, Error: `parse "http://127.0.0.1:abc": invalid port ":abc" after host`, - Code: deadlinks.StatusBadLink, + Code: jarink.StatusBadLink, }, { Link: `http:/127.0.0.1:11836`, Error: `Head "http:/127.0.0.1:11836": http: no Host in request URL`, - Code: deadlinks.StatusBadLink, + Code: jarink.StatusBadLink, }, }, - testUrl + `/broken.html`: []deadlinks.Broken{ + testUrl + `/broken.html`: []jarink.Broken{ { Link: testUrl + `/brokenPage`, Code: http.StatusNotFound, }, }, - testUrl + `/page2`: []deadlinks.Broken{ + testUrl + `/page2`: []jarink.Broken{ { Link: testUrl + `/broken.png`, Code: http.StatusNotFound, @@ -127,8 +127,8 @@ func TestDeadLinks_Scan(t *testing.T) { }, }, { scanUrl: testUrl + `/page2`, - exp: map[string][]deadlinks.Broken{ - testUrl: []deadlinks.Broken{ + exp: map[string][]jarink.Broken{ + testUrl: []jarink.Broken{ { Link: testUrl + `/broken.png`, Code: http.StatusNotFound, @@ -138,20 +138,20 @@ func TestDeadLinks_Scan(t *testing.T) { }, { Link: `http://127.0.0.1:abc`, Error: `parse "http://127.0.0.1:abc": invalid port ":abc" after host`, - Code: deadlinks.StatusBadLink, + Code: jarink.StatusBadLink, }, { Link: `http:/127.0.0.1:11836`, Error: `Head "http:/127.0.0.1:11836": http: no Host in request URL`, - Code: deadlinks.StatusBadLink, + Code: jarink.StatusBadLink, }, }, - testUrl + `/broken.html`: []deadlinks.Broken{ + testUrl + `/broken.html`: []jarink.Broken{ { Link: testUrl + `/brokenPage`, Code: http.StatusNotFound, }, }, - testUrl + `/page2`: []deadlinks.Broken{ + testUrl + `/page2`: []jarink.Broken{ { Link: testUrl + `/broken.png`, Code: http.StatusNotFound, @@ -167,15 +167,15 @@ func TestDeadLinks_Scan(t *testing.T) { }} var ( - result *deadlinks.Result + result *jarink.BrokenlinksResult err error ) for _, tcase := range listCase { - t.Logf(`--- Scan: %s`, tcase.scanUrl) - var scanOpts = deadlinks.ScanOptions{ + t.Logf(`--- brokenlinks: %s`, tcase.scanUrl) + var brokenlinksOpts = jarink.BrokenlinksOptions{ Url: tcase.scanUrl, } - result, err = deadlinks.Scan(scanOpts) + result, err = jarink.Brokenlinks(brokenlinksOpts) if err != nil { test.Assert(t, tcase.scanUrl+` error`, tcase.expError, err.Error()) diff --git a/worker.go b/brokenlinks_worker.go index 817ff3b..03359b7 100644 --- a/worker.go +++ b/brokenlinks_worker.go @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> // SPDX-License-Identifier: GPL-3.0-only -package deadlinks +package jarink import ( "fmt" @@ -16,7 +16,7 @@ import ( "golang.org/x/net/html/atom" ) -type worker struct { +type brokenlinksWorker struct { // seenLink store the URL being or has been scanned and its HTTP // status code. seenLink map[string]int @@ -26,7 +26,7 @@ type worker struct { // result contains the final result after all of the pages has been // scanned. - result *Result + result *BrokenlinksResult // The base URL that will be joined to relative or absolute // links or image. @@ -35,18 +35,18 @@ type worker struct { // The URL to scan. scanUrl *url.URL - opts ScanOptions + opts BrokenlinksOptions // wg sync the goroutine scanner. wg sync.WaitGroup } -func newWorker(opts ScanOptions) (wrk *worker, err error) { - wrk = &worker{ +func newWorker(opts BrokenlinksOptions) (wrk *brokenlinksWorker, err error) { + wrk = &brokenlinksWorker{ opts: opts, seenLink: map[string]int{}, resultq: make(chan map[string]linkQueue, 100), - result: newResult(), + result: newBrokenlinksResult(), } wrk.scanUrl, err = url.Parse(opts.Url) @@ -66,7 +66,7 @@ func newWorker(opts ScanOptions) (wrk *worker, err error) { return wrk, nil } -func (wrk *worker) run() (result *Result, err error) { +func (wrk *brokenlinksWorker) run() (result *BrokenlinksResult, err error) { // Scan the first URL to make sure that the server is reachable. var firstLinkq = linkQueue{ parentUrl: nil, @@ -89,7 +89,7 @@ func (wrk *worker) run() (result *Result, err error) { continue } if linkq.status >= http.StatusBadRequest { - wrk.markDead(linkq) + wrk.markBroken(linkq) continue } @@ -118,7 +118,7 @@ func (wrk *worker) run() (result *Result, err error) { var newList []linkQueue for _, linkq := range resultq { if linkq.status >= http.StatusBadRequest { - wrk.markDead(linkq) + wrk.markBroken(linkq) continue } if linkq.status != 0 { @@ -137,7 +137,7 @@ func (wrk *worker) run() (result *Result, err error) { } if seenStatus >= http.StatusBadRequest { linkq.status = seenStatus - wrk.markDead(linkq) + wrk.markBroken(linkq) continue } if seenStatus >= http.StatusOK { @@ -158,7 +158,7 @@ func (wrk *worker) run() (result *Result, err error) { seenStatus := wrk.seenLink[linkq.url] if seenStatus >= http.StatusBadRequest { linkq.status = seenStatus - wrk.markDead(linkq) + wrk.markBroken(linkq) continue } if seenStatus >= http.StatusOK { @@ -189,7 +189,7 @@ func (wrk *worker) run() (result *Result, err error) { return wrk.result, nil } -func (wrk *worker) markDead(linkq linkQueue) { +func (wrk *brokenlinksWorker) markBroken(linkq linkQueue) { var parentUrl = linkq.parentUrl.String() var listBroken = wrk.result.PageLinks[parentUrl] var brokenLink = Broken{ @@ -206,7 +206,7 @@ func (wrk *worker) markDead(linkq linkQueue) { } // scan fetch the HTML page or image to check if its valid. -func (wrk *worker) scan(linkq linkQueue) { +func (wrk *brokenlinksWorker) scan(linkq linkQueue) { defer func() { if wrk.opts.IsVerbose && linkq.errScan != nil { fmt.Printf("error: %d %s error=%v\n", linkq.status, @@ -308,7 +308,7 @@ func (wrk *worker) scan(linkq linkQueue) { go wrk.pushResult(resultq) } -func (wrk *worker) processLink(parentUrl *url.URL, val string, kind atom.Atom) ( +func (wrk *brokenlinksWorker) processLink(parentUrl *url.URL, val string, kind atom.Atom) ( linkq *linkQueue, ) { if len(val) == 0 { @@ -356,7 +356,7 @@ func (wrk *worker) processLink(parentUrl *url.URL, val string, kind atom.Atom) ( return linkq } -func (wrk *worker) pushResult(resultq map[string]linkQueue) { +func (wrk *brokenlinksWorker) pushResult(resultq map[string]linkQueue) { var tick = time.NewTicker(100 * time.Millisecond) for { select { diff --git a/cmd/deadlinks/main.go b/cmd/jarink/main.go index 16057ee..c4af038 100644 --- a/cmd/deadlinks/main.go +++ b/cmd/jarink/main.go @@ -11,7 +11,7 @@ import ( "os" "strings" - "git.sr.ht/~shulhan/deadlinks" + "git.sr.ht/~shulhan/jarink" ) func main() { @@ -28,18 +28,18 @@ func main() { } cmd = strings.ToLower(cmd) - if cmd == "scan" { - var scanOpts = deadlinks.ScanOptions{ + if cmd == "brokenlinks" { + var brokenlinksOpts = jarink.BrokenlinksOptions{ Url: flag.Arg(1), IsVerbose: optVerbose, } - if scanOpts.Url == "" { + if brokenlinksOpts.Url == "" { goto invalid_command } - var result *deadlinks.Result + var result *jarink.BrokenlinksResult var err error - result, err = deadlinks.Scan(scanOpts) + result, err = jarink.Brokenlinks(brokenlinksOpts) if err != nil { log.Fatal(err.Error()) } @@ -60,27 +60,34 @@ invalid_command: func usage() { log.Println(` -deadlinks <COMMAND> <args...> +Jarink is a program to help web administrator to maintains their website. -Deadlinks is a program to scan for invalid links inside HTML page on the live -web server. -Invalid links will be scanned on anchor href attribute ("<a href=...>") or on -the image src attribute ("<img src=..."). +== Synopsis + + jarink [OPTIONS] <COMMAND> <args...> + +Available commands, + + brokenlinks - scan the website for broken links (page and images). == Usage -[OPTIONS] scan URL +[OPTIONS] brokenlinks URL - Start scanning for deadlinks on the web server pointed by URL. - Once finished it will print the page and list of dead links inside + Start scanning for broken links on the web server pointed by URL. + Invalid links will be scanned on anchor href attribute + ("<a href=...>") or on the image src attribute ("<img src=..."). + + Once finished it will print the page and list of broken links inside that page in JSON format. + This command accept the following options, - -verbose : print the page that being scanned. + -verbose : print the page that being scanned. Example, - $ deadlinks scan https://kilabit.info + $ jarink scan https://kilabit.info { "https://kilabit.info/some/page": [ { @@ -107,5 +114,5 @@ the image src attribute ("<img src=..."). } -- -deadlinks v` + deadlinks.Version) +jarink v` + jarink.Version) } diff --git a/deadlinks.go b/deadlinks.go deleted file mode 100644 index 2edcd0d..0000000 --- a/deadlinks.go +++ /dev/null @@ -1,32 +0,0 @@ -// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> -// SPDX-License-Identifier: GPL-3.0-only - -package deadlinks - -import ( - "fmt" -) - -const Version = `0.1.0` - -// StatusBadLink status for link that is not parseable by [url.Parse] or not -// reachable during GET or HEAD, either timeout or IP or domain not exist. -const StatusBadLink = 700 - -// Scan the baseUrl for dead links. -func Scan(opts ScanOptions) (result *Result, err error) { - var logp = `Scan` - var wrk *worker - - wrk, err = newWorker(opts) - if err != nil { - return nil, fmt.Errorf(`%s: %s`, logp, err) - } - - result, err = wrk.run() - if err != nil { - return nil, fmt.Errorf(`%s: %s`, logp, err) - } - - return result, nil -} @@ -1,9 +1,9 @@ // SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> // SPDX-License-Identifier: GPL-3.0-only -module git.sr.ht/~shulhan/deadlinks +module git.sr.ht/~shulhan/jarink -go 1.25 +go 1.24 require ( git.sr.ht/~shulhan/pakakeh.go v0.60.1 diff --git a/link_queue.go b/link_queue.go index 63940cc..0b419b8 100644 --- a/link_queue.go +++ b/link_queue.go @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> // SPDX-License-Identifier: GPL-3.0-only -package deadlinks +package jarink import ( "net/url" diff --git a/result.go b/result.go deleted file mode 100644 index 6fdc817..0000000 --- a/result.go +++ /dev/null @@ -1,36 +0,0 @@ -// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> -// SPDX-License-Identifier: GPL-3.0-only - -package deadlinks - -import ( - "slices" - "strings" -) - -// Broken store the link with its HTTP status. -type Broken struct { - Link string - Error string `json:"omitempty"` - Code int -} - -// Result store the result of Scan. -type Result struct { - // PageLinks store the page and its broken links. - PageLinks map[string][]Broken -} - -func newResult() *Result { - return &Result{ - PageLinks: map[string][]Broken{}, - } -} - -func (result *Result) sort() { - for _, listBroken := range result.PageLinks { - slices.SortFunc(listBroken, func(a, b Broken) int { - return strings.Compare(a.Link, b.Link) - }) - } -} diff --git a/scan_options.go b/scan_options.go deleted file mode 100644 index bc5484e..0000000 --- a/scan_options.go +++ /dev/null @@ -1,10 +0,0 @@ -// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> -// SPDX-License-Identifier: GPL-3.0-only - -package deadlinks - -// ScanOptions define the options for scan command or Scan function. -type ScanOptions struct { - Url string - IsVerbose bool -} diff --git a/url_test.go b/url_test.go index 506090d..0b0bf03 100644 --- a/url_test.go +++ b/url_test.go @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> // SPDX-License-Identifier: GPL-3.0-only -package deadlinks +package jarink import ( "net/url" |
