aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--brokenlinks.go68
-rw-r--r--brokenlinks_test.go (renamed from deadlinks_test.go)44
-rw-r--r--brokenlinks_worker.go (renamed from worker.go)32
-rw-r--r--cmd/jarink/main.go (renamed from cmd/deadlinks/main.go)41
-rw-r--r--deadlinks.go32
-rw-r--r--go.mod4
-rw-r--r--link_queue.go2
-rw-r--r--result.go36
-rw-r--r--scan_options.go10
-rw-r--r--url_test.go2
10 files changed, 134 insertions, 137 deletions
diff --git a/brokenlinks.go b/brokenlinks.go
new file mode 100644
index 0000000..768216d
--- /dev/null
+++ b/brokenlinks.go
@@ -0,0 +1,68 @@
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+// SPDX-License-Identifier: GPL-3.0-only
+
+package jarink
+
+import (
+ "fmt"
+ "slices"
+ "strings"
+)
+
+const Version = `0.1.0`
+
+// StatusBadLink status for link that is not parseable by [url.Parse] or not
+// reachable during GET or HEAD, either timeout or IP or domain not exist.
+const StatusBadLink = 700
+
+// BrokenlinksOptions define the options for scanning broken links.
+type BrokenlinksOptions struct {
+ Url string
+ IsVerbose bool
+}
+
+// Broken store the broken link, HTTP status code, and the error message that
+// cause it.
+type Broken struct {
+ Link string
+ Error string `json:"omitempty"`
+ Code int
+}
+
+// BrokenlinksResult store the result of scanning for broken links.
+type BrokenlinksResult struct {
+ // PageLinks store the page and its broken links.
+ PageLinks map[string][]Broken
+}
+
+func newBrokenlinksResult() *BrokenlinksResult {
+ return &BrokenlinksResult{
+ PageLinks: map[string][]Broken{},
+ }
+}
+
+func (result *BrokenlinksResult) sort() {
+ for _, listBroken := range result.PageLinks {
+ slices.SortFunc(listBroken, func(a, b Broken) int {
+ return strings.Compare(a.Link, b.Link)
+ })
+ }
+}
+
+// Brokenlinks scan the URL for broken links.
+func Brokenlinks(opts BrokenlinksOptions) (result *BrokenlinksResult, err error) {
+ var logp = `brokenlinks`
+ var wrk *brokenlinksWorker
+
+ wrk, err = newWorker(opts)
+ if err != nil {
+ return nil, fmt.Errorf(`%s: %s`, logp, err)
+ }
+
+ result, err = wrk.run()
+ if err != nil {
+ return nil, fmt.Errorf(`%s: %s`, logp, err)
+ }
+
+ return result, nil
+}
diff --git a/deadlinks_test.go b/brokenlinks_test.go
index c219aa0..c1a607f 100644
--- a/deadlinks_test.go
+++ b/brokenlinks_test.go
@@ -1,7 +1,7 @@
// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
// SPDX-License-Identifier: GPL-3.0-only
-package deadlinks_test
+package jarink_test
import (
"log"
@@ -10,7 +10,7 @@ import (
"testing"
"time"
- "git.sr.ht/~shulhan/deadlinks"
+ "git.sr.ht/~shulhan/jarink"
libnet "git.sr.ht/~shulhan/pakakeh.go/lib/net"
"git.sr.ht/~shulhan/pakakeh.go/lib/test"
)
@@ -71,25 +71,25 @@ func TestMain(m *testing.M) {
os.Exit(m.Run())
}
-func TestDeadLinks_Scan(t *testing.T) {
+func TestBrokenlinks(t *testing.T) {
var testUrl = `http://` + testAddress
type testCase struct {
- exp map[string][]deadlinks.Broken
+ exp map[string][]jarink.Broken
scanUrl string
expError string
}
listCase := []testCase{{
scanUrl: `127.0.0.1:14594`,
- expError: `Scan: invalid URL "127.0.0.1:14594"`,
+ expError: `brokenlinks: invalid URL "127.0.0.1:14594"`,
}, {
scanUrl: `http://127.0.0.1:14594`,
- expError: `Scan: Get "http://127.0.0.1:14594": dial tcp 127.0.0.1:14594: connect: connection refused`,
+ expError: `brokenlinks: Get "http://127.0.0.1:14594": dial tcp 127.0.0.1:14594: connect: connection refused`,
}, {
scanUrl: testUrl,
- exp: map[string][]deadlinks.Broken{
- testUrl: []deadlinks.Broken{
+ exp: map[string][]jarink.Broken{
+ testUrl: []jarink.Broken{
{
Link: testUrl + `/broken.png`,
Code: http.StatusNotFound,
@@ -99,20 +99,20 @@ func TestDeadLinks_Scan(t *testing.T) {
}, {
Link: `http://127.0.0.1:abc`,
Error: `parse "http://127.0.0.1:abc": invalid port ":abc" after host`,
- Code: deadlinks.StatusBadLink,
+ Code: jarink.StatusBadLink,
}, {
Link: `http:/127.0.0.1:11836`,
Error: `Head "http:/127.0.0.1:11836": http: no Host in request URL`,
- Code: deadlinks.StatusBadLink,
+ Code: jarink.StatusBadLink,
},
},
- testUrl + `/broken.html`: []deadlinks.Broken{
+ testUrl + `/broken.html`: []jarink.Broken{
{
Link: testUrl + `/brokenPage`,
Code: http.StatusNotFound,
},
},
- testUrl + `/page2`: []deadlinks.Broken{
+ testUrl + `/page2`: []jarink.Broken{
{
Link: testUrl + `/broken.png`,
Code: http.StatusNotFound,
@@ -127,8 +127,8 @@ func TestDeadLinks_Scan(t *testing.T) {
},
}, {
scanUrl: testUrl + `/page2`,
- exp: map[string][]deadlinks.Broken{
- testUrl: []deadlinks.Broken{
+ exp: map[string][]jarink.Broken{
+ testUrl: []jarink.Broken{
{
Link: testUrl + `/broken.png`,
Code: http.StatusNotFound,
@@ -138,20 +138,20 @@ func TestDeadLinks_Scan(t *testing.T) {
}, {
Link: `http://127.0.0.1:abc`,
Error: `parse "http://127.0.0.1:abc": invalid port ":abc" after host`,
- Code: deadlinks.StatusBadLink,
+ Code: jarink.StatusBadLink,
}, {
Link: `http:/127.0.0.1:11836`,
Error: `Head "http:/127.0.0.1:11836": http: no Host in request URL`,
- Code: deadlinks.StatusBadLink,
+ Code: jarink.StatusBadLink,
},
},
- testUrl + `/broken.html`: []deadlinks.Broken{
+ testUrl + `/broken.html`: []jarink.Broken{
{
Link: testUrl + `/brokenPage`,
Code: http.StatusNotFound,
},
},
- testUrl + `/page2`: []deadlinks.Broken{
+ testUrl + `/page2`: []jarink.Broken{
{
Link: testUrl + `/broken.png`,
Code: http.StatusNotFound,
@@ -167,15 +167,15 @@ func TestDeadLinks_Scan(t *testing.T) {
}}
var (
- result *deadlinks.Result
+ result *jarink.BrokenlinksResult
err error
)
for _, tcase := range listCase {
- t.Logf(`--- Scan: %s`, tcase.scanUrl)
- var scanOpts = deadlinks.ScanOptions{
+ t.Logf(`--- brokenlinks: %s`, tcase.scanUrl)
+ var brokenlinksOpts = jarink.BrokenlinksOptions{
Url: tcase.scanUrl,
}
- result, err = deadlinks.Scan(scanOpts)
+ result, err = jarink.Brokenlinks(brokenlinksOpts)
if err != nil {
test.Assert(t, tcase.scanUrl+` error`,
tcase.expError, err.Error())
diff --git a/worker.go b/brokenlinks_worker.go
index 817ff3b..03359b7 100644
--- a/worker.go
+++ b/brokenlinks_worker.go
@@ -1,7 +1,7 @@
// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
// SPDX-License-Identifier: GPL-3.0-only
-package deadlinks
+package jarink
import (
"fmt"
@@ -16,7 +16,7 @@ import (
"golang.org/x/net/html/atom"
)
-type worker struct {
+type brokenlinksWorker struct {
// seenLink store the URL being or has been scanned and its HTTP
// status code.
seenLink map[string]int
@@ -26,7 +26,7 @@ type worker struct {
// result contains the final result after all of the pages has been
// scanned.
- result *Result
+ result *BrokenlinksResult
// The base URL that will be joined to relative or absolute
// links or image.
@@ -35,18 +35,18 @@ type worker struct {
// The URL to scan.
scanUrl *url.URL
- opts ScanOptions
+ opts BrokenlinksOptions
// wg sync the goroutine scanner.
wg sync.WaitGroup
}
-func newWorker(opts ScanOptions) (wrk *worker, err error) {
- wrk = &worker{
+func newWorker(opts BrokenlinksOptions) (wrk *brokenlinksWorker, err error) {
+ wrk = &brokenlinksWorker{
opts: opts,
seenLink: map[string]int{},
resultq: make(chan map[string]linkQueue, 100),
- result: newResult(),
+ result: newBrokenlinksResult(),
}
wrk.scanUrl, err = url.Parse(opts.Url)
@@ -66,7 +66,7 @@ func newWorker(opts ScanOptions) (wrk *worker, err error) {
return wrk, nil
}
-func (wrk *worker) run() (result *Result, err error) {
+func (wrk *brokenlinksWorker) run() (result *BrokenlinksResult, err error) {
// Scan the first URL to make sure that the server is reachable.
var firstLinkq = linkQueue{
parentUrl: nil,
@@ -89,7 +89,7 @@ func (wrk *worker) run() (result *Result, err error) {
continue
}
if linkq.status >= http.StatusBadRequest {
- wrk.markDead(linkq)
+ wrk.markBroken(linkq)
continue
}
@@ -118,7 +118,7 @@ func (wrk *worker) run() (result *Result, err error) {
var newList []linkQueue
for _, linkq := range resultq {
if linkq.status >= http.StatusBadRequest {
- wrk.markDead(linkq)
+ wrk.markBroken(linkq)
continue
}
if linkq.status != 0 {
@@ -137,7 +137,7 @@ func (wrk *worker) run() (result *Result, err error) {
}
if seenStatus >= http.StatusBadRequest {
linkq.status = seenStatus
- wrk.markDead(linkq)
+ wrk.markBroken(linkq)
continue
}
if seenStatus >= http.StatusOK {
@@ -158,7 +158,7 @@ func (wrk *worker) run() (result *Result, err error) {
seenStatus := wrk.seenLink[linkq.url]
if seenStatus >= http.StatusBadRequest {
linkq.status = seenStatus
- wrk.markDead(linkq)
+ wrk.markBroken(linkq)
continue
}
if seenStatus >= http.StatusOK {
@@ -189,7 +189,7 @@ func (wrk *worker) run() (result *Result, err error) {
return wrk.result, nil
}
-func (wrk *worker) markDead(linkq linkQueue) {
+func (wrk *brokenlinksWorker) markBroken(linkq linkQueue) {
var parentUrl = linkq.parentUrl.String()
var listBroken = wrk.result.PageLinks[parentUrl]
var brokenLink = Broken{
@@ -206,7 +206,7 @@ func (wrk *worker) markDead(linkq linkQueue) {
}
// scan fetch the HTML page or image to check if its valid.
-func (wrk *worker) scan(linkq linkQueue) {
+func (wrk *brokenlinksWorker) scan(linkq linkQueue) {
defer func() {
if wrk.opts.IsVerbose && linkq.errScan != nil {
fmt.Printf("error: %d %s error=%v\n", linkq.status,
@@ -308,7 +308,7 @@ func (wrk *worker) scan(linkq linkQueue) {
go wrk.pushResult(resultq)
}
-func (wrk *worker) processLink(parentUrl *url.URL, val string, kind atom.Atom) (
+func (wrk *brokenlinksWorker) processLink(parentUrl *url.URL, val string, kind atom.Atom) (
linkq *linkQueue,
) {
if len(val) == 0 {
@@ -356,7 +356,7 @@ func (wrk *worker) processLink(parentUrl *url.URL, val string, kind atom.Atom) (
return linkq
}
-func (wrk *worker) pushResult(resultq map[string]linkQueue) {
+func (wrk *brokenlinksWorker) pushResult(resultq map[string]linkQueue) {
var tick = time.NewTicker(100 * time.Millisecond)
for {
select {
diff --git a/cmd/deadlinks/main.go b/cmd/jarink/main.go
index 16057ee..c4af038 100644
--- a/cmd/deadlinks/main.go
+++ b/cmd/jarink/main.go
@@ -11,7 +11,7 @@ import (
"os"
"strings"
- "git.sr.ht/~shulhan/deadlinks"
+ "git.sr.ht/~shulhan/jarink"
)
func main() {
@@ -28,18 +28,18 @@ func main() {
}
cmd = strings.ToLower(cmd)
- if cmd == "scan" {
- var scanOpts = deadlinks.ScanOptions{
+ if cmd == "brokenlinks" {
+ var brokenlinksOpts = jarink.BrokenlinksOptions{
Url: flag.Arg(1),
IsVerbose: optVerbose,
}
- if scanOpts.Url == "" {
+ if brokenlinksOpts.Url == "" {
goto invalid_command
}
- var result *deadlinks.Result
+ var result *jarink.BrokenlinksResult
var err error
- result, err = deadlinks.Scan(scanOpts)
+ result, err = jarink.Brokenlinks(brokenlinksOpts)
if err != nil {
log.Fatal(err.Error())
}
@@ -60,27 +60,34 @@ invalid_command:
func usage() {
log.Println(`
-deadlinks <COMMAND> <args...>
+Jarink is a program to help web administrator to maintains their website.
-Deadlinks is a program to scan for invalid links inside HTML page on the live
-web server.
-Invalid links will be scanned on anchor href attribute ("<a href=...>") or on
-the image src attribute ("<img src=...").
+== Synopsis
+
+ jarink [OPTIONS] <COMMAND> <args...>
+
+Available commands,
+
+ brokenlinks - scan the website for broken links (page and images).
== Usage
-[OPTIONS] scan URL
+[OPTIONS] brokenlinks URL
- Start scanning for deadlinks on the web server pointed by URL.
- Once finished it will print the page and list of dead links inside
+ Start scanning for broken links on the web server pointed by URL.
+ Invalid links will be scanned on anchor href attribute
+ ("<a href=...>") or on the image src attribute ("<img src=...").
+
+ Once finished it will print the page and list of broken links inside
that page in JSON format.
+
This command accept the following options,
- -verbose : print the page that being scanned.
+ -verbose : print the page that being scanned.
Example,
- $ deadlinks scan https://kilabit.info
+ $ jarink scan https://kilabit.info
{
"https://kilabit.info/some/page": [
{
@@ -107,5 +114,5 @@ the image src attribute ("<img src=...").
}
--
-deadlinks v` + deadlinks.Version)
+jarink v` + jarink.Version)
}
diff --git a/deadlinks.go b/deadlinks.go
deleted file mode 100644
index 2edcd0d..0000000
--- a/deadlinks.go
+++ /dev/null
@@ -1,32 +0,0 @@
-// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
-// SPDX-License-Identifier: GPL-3.0-only
-
-package deadlinks
-
-import (
- "fmt"
-)
-
-const Version = `0.1.0`
-
-// StatusBadLink status for link that is not parseable by [url.Parse] or not
-// reachable during GET or HEAD, either timeout or IP or domain not exist.
-const StatusBadLink = 700
-
-// Scan the baseUrl for dead links.
-func Scan(opts ScanOptions) (result *Result, err error) {
- var logp = `Scan`
- var wrk *worker
-
- wrk, err = newWorker(opts)
- if err != nil {
- return nil, fmt.Errorf(`%s: %s`, logp, err)
- }
-
- result, err = wrk.run()
- if err != nil {
- return nil, fmt.Errorf(`%s: %s`, logp, err)
- }
-
- return result, nil
-}
diff --git a/go.mod b/go.mod
index a57a7c9..2063444 100644
--- a/go.mod
+++ b/go.mod
@@ -1,9 +1,9 @@
// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
// SPDX-License-Identifier: GPL-3.0-only
-module git.sr.ht/~shulhan/deadlinks
+module git.sr.ht/~shulhan/jarink
-go 1.25
+go 1.24
require (
git.sr.ht/~shulhan/pakakeh.go v0.60.1
diff --git a/link_queue.go b/link_queue.go
index 63940cc..0b419b8 100644
--- a/link_queue.go
+++ b/link_queue.go
@@ -1,7 +1,7 @@
// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
// SPDX-License-Identifier: GPL-3.0-only
-package deadlinks
+package jarink
import (
"net/url"
diff --git a/result.go b/result.go
deleted file mode 100644
index 6fdc817..0000000
--- a/result.go
+++ /dev/null
@@ -1,36 +0,0 @@
-// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
-// SPDX-License-Identifier: GPL-3.0-only
-
-package deadlinks
-
-import (
- "slices"
- "strings"
-)
-
-// Broken store the link with its HTTP status.
-type Broken struct {
- Link string
- Error string `json:"omitempty"`
- Code int
-}
-
-// Result store the result of Scan.
-type Result struct {
- // PageLinks store the page and its broken links.
- PageLinks map[string][]Broken
-}
-
-func newResult() *Result {
- return &Result{
- PageLinks: map[string][]Broken{},
- }
-}
-
-func (result *Result) sort() {
- for _, listBroken := range result.PageLinks {
- slices.SortFunc(listBroken, func(a, b Broken) int {
- return strings.Compare(a.Link, b.Link)
- })
- }
-}
diff --git a/scan_options.go b/scan_options.go
deleted file mode 100644
index bc5484e..0000000
--- a/scan_options.go
+++ /dev/null
@@ -1,10 +0,0 @@
-// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
-// SPDX-License-Identifier: GPL-3.0-only
-
-package deadlinks
-
-// ScanOptions define the options for scan command or Scan function.
-type ScanOptions struct {
- Url string
- IsVerbose bool
-}
diff --git a/url_test.go b/url_test.go
index 506090d..0b0bf03 100644
--- a/url_test.go
+++ b/url_test.go
@@ -1,7 +1,7 @@
// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
// SPDX-License-Identifier: GPL-3.0-only
-package deadlinks
+package jarink
import (
"net/url"