From 3ed69f535b6081bfa88b83f6fbf8c94527afe350 Mon Sep 17 00:00:00 2001
From: Shulhan <ms@kilabit.info>
Date: Sun, 1 Jun 2025 01:05:21 +0700
Subject: all: rename the program and repository into jarink

Jarink is a program to help web administrator to maintains their website.
Currently its provides a command to scan for brokenlinks.
---
 brokenlinks.go        |  68 ++++++++++
 brokenlinks_test.go   | 188 +++++++++++++++++++++++++
 brokenlinks_worker.go | 369 ++++++++++++++++++++++++++++++++++++++++++++++++++
 cmd/deadlinks/main.go | 111 ---------------
 cmd/jarink/main.go    | 118 ++++++++++++++++
 deadlinks.go          |  32 -----
 deadlinks_test.go     | 188 -------------------------
 go.mod                |   4 +-
 link_queue.go         |   2 +-
 result.go             |  36 -----
 scan_options.go       |  10 --
 url_test.go           |   2 +-
 worker.go             | 369 --------------------------------------------------
 13 files changed, 747 insertions(+), 750 deletions(-)
 create mode 100644 brokenlinks.go
 create mode 100644 brokenlinks_test.go
 create mode 100644 brokenlinks_worker.go
 delete mode 100644 cmd/deadlinks/main.go
 create mode 100644 cmd/jarink/main.go
 delete mode 100644 deadlinks.go
 delete mode 100644 deadlinks_test.go
 delete mode 100644 result.go
 delete mode 100644 scan_options.go
 delete mode 100644 worker.go

diff --git a/brokenlinks.go b/brokenlinks.go
new file mode 100644
index 0000000..768216d
--- /dev/null
+++ b/brokenlinks.go
@@ -0,0 +1,68 @@
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+// SPDX-License-Identifier: GPL-3.0-only
+
+package jarink
+
+import (
+	"fmt"
+	"slices"
+	"strings"
+)
+
+const Version = `0.1.0`
+
+// StatusBadLink status for link that is not parseable by [url.Parse] or not
+// reachable during GET or HEAD, either timeout or IP or domain not exist.
+const StatusBadLink = 700
+
+// BrokenlinksOptions define the options for scanning broken links.
+type BrokenlinksOptions struct {
+	Url       string
+	IsVerbose bool
+}
+
+// Broken store the broken link, HTTP status code, and the error message that
+// cause it.
+type Broken struct {
+	Link  string
+	Error string `json:"omitempty"`
+	Code  int
+}
+
+// BrokenlinksResult store the result of scanning for broken links.
+type BrokenlinksResult struct {
+	// PageLinks store the page and its broken links.
+	PageLinks map[string][]Broken
+}
+
+func newBrokenlinksResult() *BrokenlinksResult {
+	return &BrokenlinksResult{
+		PageLinks: map[string][]Broken{},
+	}
+}
+
+func (result *BrokenlinksResult) sort() {
+	for _, listBroken := range result.PageLinks {
+		slices.SortFunc(listBroken, func(a, b Broken) int {
+			return strings.Compare(a.Link, b.Link)
+		})
+	}
+}
+
+// Brokenlinks scan the URL for broken links.
+func Brokenlinks(opts BrokenlinksOptions) (result *BrokenlinksResult, err error) {
+	var logp = `brokenlinks`
+	var wrk *brokenlinksWorker
+
+	wrk, err = newWorker(opts)
+	if err != nil {
+		return nil, fmt.Errorf(`%s: %s`, logp, err)
+	}
+
+	result, err = wrk.run()
+	if err != nil {
+		return nil, fmt.Errorf(`%s: %s`, logp, err)
+	}
+
+	return result, nil
+}
diff --git a/brokenlinks_test.go b/brokenlinks_test.go
new file mode 100644
index 0000000..c1a607f
--- /dev/null
+++ b/brokenlinks_test.go
@@ -0,0 +1,188 @@
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+// SPDX-License-Identifier: GPL-3.0-only
+
+package jarink_test
+
+import (
+	"log"
+	"net/http"
+	"os"
+	"testing"
+	"time"
+
+	"git.sr.ht/~shulhan/jarink"
+	libnet "git.sr.ht/~shulhan/pakakeh.go/lib/net"
+	"git.sr.ht/~shulhan/pakakeh.go/lib/test"
+)
+
+// The test run two web servers that serve content on "testdata/web/".
+// The first web server is the one that we want to scan.
+// The second web server is external web server, where HTML pages should not
+// be parsed.
+
+const testAddress = `127.0.0.1:11836`
+const testExternalAddress = `127.0.0.1:11900`
+
+func TestMain(m *testing.M) {
+	log.SetFlags(0)
+	var httpDirWeb = http.Dir(`testdata/web`)
+	var fshandle = http.FileServer(httpDirWeb)
+
+	go func() {
+		var mux = http.NewServeMux()
+		mux.Handle(`/`, fshandle)
+		var testServer = &http.Server{
+			Addr:           testAddress,
+			Handler:        mux,
+			ReadTimeout:    10 * time.Second,
+			WriteTimeout:   10 * time.Second,
+			MaxHeaderBytes: 1 << 20,
+		}
+		var err = testServer.ListenAndServe()
+		if err != nil {
+			log.Fatal(err)
+		}
+	}()
+	go func() {
+		var mux = http.NewServeMux()
+		mux.Handle(`/`, fshandle)
+		var testServer = &http.Server{
+			Addr:           testExternalAddress,
+			Handler:        mux,
+			ReadTimeout:    10 * time.Second,
+			WriteTimeout:   10 * time.Second,
+			MaxHeaderBytes: 1 << 20,
+		}
+		var err = testServer.ListenAndServe()
+		if err != nil {
+			log.Fatal(err)
+		}
+	}()
+
+	var err = libnet.WaitAlive(`tcp`, testAddress, 5*time.Second)
+	if err != nil {
+		log.Fatal(err)
+	}
+	err = libnet.WaitAlive(`tcp`, testExternalAddress, 5*time.Second)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	os.Exit(m.Run())
+}
+
+func TestBrokenlinks(t *testing.T) {
+	var testUrl = `http://` + testAddress
+
+	type testCase struct {
+		exp      map[string][]jarink.Broken
+		scanUrl  string
+		expError string
+	}
+
+	listCase := []testCase{{
+		scanUrl:  `127.0.0.1:14594`,
+		expError: `brokenlinks: invalid URL "127.0.0.1:14594"`,
+	}, {
+		scanUrl:  `http://127.0.0.1:14594`,
+		expError: `brokenlinks: Get "http://127.0.0.1:14594": dial tcp 127.0.0.1:14594: connect: connection refused`,
+	}, {
+		scanUrl: testUrl,
+		exp: map[string][]jarink.Broken{
+			testUrl: []jarink.Broken{
+				{
+					Link: testUrl + `/broken.png`,
+					Code: http.StatusNotFound,
+				}, {
+					Link: testUrl + `/brokenPage`,
+					Code: http.StatusNotFound,
+				}, {
+					Link:  `http://127.0.0.1:abc`,
+					Error: `parse "http://127.0.0.1:abc": invalid port ":abc" after host`,
+					Code:  jarink.StatusBadLink,
+				}, {
+					Link:  `http:/127.0.0.1:11836`,
+					Error: `Head "http:/127.0.0.1:11836": http: no Host in request URL`,
+					Code:  jarink.StatusBadLink,
+				},
+			},
+			testUrl + `/broken.html`: []jarink.Broken{
+				{
+					Link: testUrl + `/brokenPage`,
+					Code: http.StatusNotFound,
+				},
+			},
+			testUrl + `/page2`: []jarink.Broken{
+				{
+					Link: testUrl + `/broken.png`,
+					Code: http.StatusNotFound,
+				}, {
+					Link: testUrl + `/page2/broken/relative`,
+					Code: http.StatusNotFound,
+				}, {
+					Link: testUrl + `/page2/broken2.png`,
+					Code: http.StatusNotFound,
+				},
+			},
+		},
+	}, {
+		scanUrl: testUrl + `/page2`,
+		exp: map[string][]jarink.Broken{
+			testUrl: []jarink.Broken{
+				{
+					Link: testUrl + `/broken.png`,
+					Code: http.StatusNotFound,
+				}, {
+					Link: testUrl + `/brokenPage`,
+					Code: http.StatusNotFound,
+				}, {
+					Link:  `http://127.0.0.1:abc`,
+					Error: `parse "http://127.0.0.1:abc": invalid port ":abc" after host`,
+					Code:  jarink.StatusBadLink,
+				}, {
+					Link:  `http:/127.0.0.1:11836`,
+					Error: `Head "http:/127.0.0.1:11836": http: no Host in request URL`,
+					Code:  jarink.StatusBadLink,
+				},
+			},
+			testUrl + `/broken.html`: []jarink.Broken{
+				{
+					Link: testUrl + `/brokenPage`,
+					Code: http.StatusNotFound,
+				},
+			},
+			testUrl + `/page2`: []jarink.Broken{
+				{
+					Link: testUrl + `/broken.png`,
+					Code: http.StatusNotFound,
+				}, {
+					Link: testUrl + `/page2/broken/relative`,
+					Code: http.StatusNotFound,
+				}, {
+					Link: testUrl + `/page2/broken2.png`,
+					Code: http.StatusNotFound,
+				},
+			},
+		},
+	}}
+
+	var (
+		result *jarink.BrokenlinksResult
+		err    error
+	)
+	for _, tcase := range listCase {
+		t.Logf(`--- brokenlinks: %s`, tcase.scanUrl)
+		var brokenlinksOpts = jarink.BrokenlinksOptions{
+			Url: tcase.scanUrl,
+		}
+		result, err = jarink.Brokenlinks(brokenlinksOpts)
+		if err != nil {
+			test.Assert(t, tcase.scanUrl+` error`,
+				tcase.expError, err.Error())
+			continue
+		}
+		//got, _ := json.MarshalIndent(result.PageLinks, ``, `  `)
+		//t.Logf(`got=%s`, got)
+		test.Assert(t, tcase.scanUrl, tcase.exp, result.PageLinks)
+	}
+}
diff --git a/brokenlinks_worker.go b/brokenlinks_worker.go
new file mode 100644
index 0000000..03359b7
--- /dev/null
+++ b/brokenlinks_worker.go
@@ -0,0 +1,369 @@
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+// SPDX-License-Identifier: GPL-3.0-only
+
+package jarink
+
+import (
+	"fmt"
+	"log"
+	"net/http"
+	"net/url"
+	"strings"
+	"sync"
+	"time"
+
+	"golang.org/x/net/html"
+	"golang.org/x/net/html/atom"
+)
+
+type brokenlinksWorker struct {
+	// seenLink store the URL being or has been scanned and its HTTP
+	// status code.
+	seenLink map[string]int
+
+	// resultq channel that collect result from scanning.
+	resultq chan map[string]linkQueue
+
+	// result contains the final result after all of the pages has been
+	// scanned.
+	result *BrokenlinksResult
+
+	// The base URL that will be joined to relative or absolute
+	// links or image.
+	baseUrl *url.URL
+
+	// The URL to scan.
+	scanUrl *url.URL
+
+	opts BrokenlinksOptions
+
+	// wg sync the goroutine scanner.
+	wg sync.WaitGroup
+}
+
+func newWorker(opts BrokenlinksOptions) (wrk *brokenlinksWorker, err error) {
+	wrk = &brokenlinksWorker{
+		opts:     opts,
+		seenLink: map[string]int{},
+		resultq:  make(chan map[string]linkQueue, 100),
+		result:   newBrokenlinksResult(),
+	}
+
+	wrk.scanUrl, err = url.Parse(opts.Url)
+	if err != nil {
+		return nil, fmt.Errorf(`invalid URL %q`, opts.Url)
+	}
+
+	wrk.scanUrl.Path = strings.TrimSuffix(wrk.scanUrl.Path, `/`)
+	wrk.scanUrl.Fragment = ""
+	wrk.scanUrl.RawFragment = ""
+
+	wrk.baseUrl = &url.URL{
+		Scheme: wrk.scanUrl.Scheme,
+		Host:   wrk.scanUrl.Host,
+	}
+
+	return wrk, nil
+}
+
+func (wrk *brokenlinksWorker) run() (result *BrokenlinksResult, err error) {
+	// Scan the first URL to make sure that the server is reachable.
+	var firstLinkq = linkQueue{
+		parentUrl: nil,
+		url:       wrk.scanUrl.String(),
+		status:    http.StatusProcessing,
+	}
+	wrk.seenLink[firstLinkq.url] = http.StatusProcessing
+
+	wrk.wg.Add(1)
+	go wrk.scan(firstLinkq)
+	wrk.wg.Wait()
+
+	var resultq = <-wrk.resultq
+	for _, linkq := range resultq {
+		if linkq.url == firstLinkq.url {
+			if linkq.errScan != nil {
+				return nil, linkq.errScan
+			}
+			wrk.seenLink[linkq.url] = linkq.status
+			continue
+		}
+		if linkq.status >= http.StatusBadRequest {
+			wrk.markBroken(linkq)
+			continue
+		}
+
+		wrk.seenLink[linkq.url] = http.StatusProcessing
+		wrk.wg.Add(1)
+		go wrk.scan(linkq)
+	}
+
+	var tick = time.NewTicker(500 * time.Millisecond)
+	var listWaitStatus []linkQueue
+	var isScanning = true
+	for isScanning {
+		select {
+		case resultq := <-wrk.resultq:
+
+			// The resultq contains the original URL being scanned
+			// and its child links.
+			// For example, scanning "http://example.tld" result
+			// in
+			//
+			//	"http://example.tld": {status=200}
+			//	"http://example.tld/page": {status=0}
+			//	"http://example.tld/image.png": {status=0}
+			//	"http://bad:domain/image.png": {status=700}
+
+			var newList []linkQueue
+			for _, linkq := range resultq {
+				if linkq.status >= http.StatusBadRequest {
+					wrk.markBroken(linkq)
+					continue
+				}
+				if linkq.status != 0 {
+					// linkq is the result of scan with
+					// non error status.
+					wrk.seenLink[linkq.url] = linkq.status
+					continue
+				}
+
+				seenStatus, seen := wrk.seenLink[linkq.url]
+				if !seen {
+					wrk.seenLink[linkq.url] = http.StatusProcessing
+					wrk.wg.Add(1)
+					go wrk.scan(linkq)
+					continue
+				}
+				if seenStatus >= http.StatusBadRequest {
+					linkq.status = seenStatus
+					wrk.markBroken(linkq)
+					continue
+				}
+				if seenStatus >= http.StatusOK {
+					// The link has been processed and its
+					// not an error.
+					continue
+				}
+				if seenStatus == http.StatusProcessing {
+					// The link being processed by other
+					// goroutine.
+					linkq.status = seenStatus
+					newList = append(newList, linkq)
+					continue
+				}
+				log.Fatalf("link=%s status=%d", linkq.url, linkq.status)
+			}
+			for _, linkq := range listWaitStatus {
+				seenStatus := wrk.seenLink[linkq.url]
+				if seenStatus >= http.StatusBadRequest {
+					linkq.status = seenStatus
+					wrk.markBroken(linkq)
+					continue
+				}
+				if seenStatus >= http.StatusOK {
+					continue
+				}
+				if seenStatus == http.StatusProcessing {
+					// Scanning still in progress.
+					newList = append(newList, linkq)
+					continue
+				}
+			}
+			listWaitStatus = newList
+
+		case <-tick.C:
+			wrk.wg.Wait()
+			if len(wrk.resultq) != 0 {
+				continue
+			}
+			if len(listWaitStatus) != 0 {
+				// There are links that still waiting for
+				// scanning to be completed.
+				continue
+			}
+			isScanning = false
+		}
+	}
+	wrk.result.sort()
+	return wrk.result, nil
+}
+
+func (wrk *brokenlinksWorker) markBroken(linkq linkQueue) {
+	var parentUrl = linkq.parentUrl.String()
+	var listBroken = wrk.result.PageLinks[parentUrl]
+	var brokenLink = Broken{
+		Link: linkq.url,
+		Code: linkq.status,
+	}
+	if linkq.errScan != nil {
+		brokenLink.Error = linkq.errScan.Error()
+	}
+	listBroken = append(listBroken, brokenLink)
+	wrk.result.PageLinks[parentUrl] = listBroken
+
+	wrk.seenLink[linkq.url] = linkq.status
+}
+
+// scan fetch the HTML page or image to check if its valid.
+func (wrk *brokenlinksWorker) scan(linkq linkQueue) {
+	defer func() {
+		if wrk.opts.IsVerbose && linkq.errScan != nil {
+			fmt.Printf("error: %d %s error=%v\n", linkq.status,
+				linkq.url, linkq.errScan)
+		}
+		wrk.wg.Done()
+	}()
+
+	var (
+		resultq  = map[string]linkQueue{}
+		httpResp *http.Response
+		err      error
+	)
+	if linkq.kind == atom.Img || linkq.isExternal {
+		if wrk.opts.IsVerbose {
+			fmt.Printf("scan: HEAD %s\n", linkq.url)
+		}
+		httpResp, err = http.Head(linkq.url)
+	} else {
+		if wrk.opts.IsVerbose {
+			fmt.Printf("scan: GET %s\n", linkq.url)
+		}
+		httpResp, err = http.Get(linkq.url)
+	}
+	if err != nil {
+		linkq.status = StatusBadLink
+		linkq.errScan = err
+		resultq[linkq.url] = linkq
+		go wrk.pushResult(resultq)
+		return
+	}
+	defer httpResp.Body.Close()
+
+	linkq.status = httpResp.StatusCode
+	resultq[linkq.url] = linkq
+
+	if httpResp.StatusCode >= http.StatusBadRequest {
+		go wrk.pushResult(resultq)
+		return
+	}
+	if linkq.kind == atom.Img || linkq.isExternal {
+		go wrk.pushResult(resultq)
+		return
+	}
+
+	var doc *html.Node
+	doc, _ = html.Parse(httpResp.Body)
+
+	// After we check the code and test for [html.Parse] there are
+	// no case actual cases where HTML content will return an error.
+	// The only possible error is when reading from body (io.Reader), and
+	// that is also almost impossible.
+	//
+	// [html.Parse]: https://go.googlesource.com/net/+/refs/tags/v0.40.0/html/parse.go#2347
+
+	var scanUrl *url.URL
+
+	scanUrl, err = url.Parse(linkq.url)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	var node *html.Node
+	for node = range doc.Descendants() {
+		if node.Type != html.ElementNode {
+			continue
+		}
+		var nodeLink *linkQueue
+		if node.DataAtom == atom.A {
+			for _, attr := range node.Attr {
+				if attr.Key != `href` {
+					continue
+				}
+				nodeLink = wrk.processLink(scanUrl, attr.Val, atom.A)
+				break
+			}
+		} else if node.DataAtom == atom.Img {
+			for _, attr := range node.Attr {
+				if attr.Key != `src` {
+					continue
+				}
+				nodeLink = wrk.processLink(scanUrl, attr.Val, atom.Img)
+				break
+			}
+		} else {
+			continue
+		}
+		if nodeLink == nil {
+			continue
+		}
+		_, seen := resultq[nodeLink.url]
+		if !seen {
+			if !strings.HasPrefix(nodeLink.url, wrk.baseUrl.String()) {
+				nodeLink.isExternal = true
+			}
+			resultq[nodeLink.url] = *nodeLink
+		}
+	}
+	go wrk.pushResult(resultq)
+}
+
+func (wrk *brokenlinksWorker) processLink(parentUrl *url.URL, val string, kind atom.Atom) (
+	linkq *linkQueue,
+) {
+	if len(val) == 0 {
+		return nil
+	}
+
+	var newUrl *url.URL
+	var err error
+	newUrl, err = url.Parse(val)
+	if err != nil {
+		return &linkQueue{
+			parentUrl: parentUrl,
+			errScan:   err,
+			url:       val,
+			kind:      kind,
+			status:    StatusBadLink,
+		}
+	}
+	newUrl.Fragment = ""
+	newUrl.RawFragment = ""
+
+	if kind == atom.A && val[0] == '#' {
+		// Ignore link to ID, like `href="#element_id"`.
+		return nil
+	}
+	if strings.HasPrefix(val, `http`) {
+		return &linkQueue{
+			parentUrl: parentUrl,
+			url:       strings.TrimSuffix(newUrl.String(), `/`),
+			kind:      kind,
+		}
+	}
+	if val[0] == '/' {
+		// val is absolute to parent URL.
+		newUrl = wrk.baseUrl.JoinPath(newUrl.Path)
+	} else {
+		// val is relative to parent URL.
+		newUrl = parentUrl.JoinPath(`/`, newUrl.Path)
+	}
+	linkq = &linkQueue{
+		parentUrl: parentUrl,
+		url:       strings.TrimSuffix(newUrl.String(), `/`),
+		kind:      kind,
+	}
+	return linkq
+}
+
+func (wrk *brokenlinksWorker) pushResult(resultq map[string]linkQueue) {
+	var tick = time.NewTicker(100 * time.Millisecond)
+	for {
+		select {
+		case wrk.resultq <- resultq:
+			tick.Stop()
+			return
+		case <-tick.C:
+		}
+	}
+}
diff --git a/cmd/deadlinks/main.go b/cmd/deadlinks/main.go
deleted file mode 100644
index 16057ee..0000000
--- a/cmd/deadlinks/main.go
+++ /dev/null
@@ -1,111 +0,0 @@
-// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
-// SPDX-License-Identifier: GPL-3.0-only
-
-package main
-
-import (
-	"encoding/json"
-	"flag"
-	"fmt"
-	"log"
-	"os"
-	"strings"
-
-	"git.sr.ht/~shulhan/deadlinks"
-)
-
-func main() {
-	var optVerbose bool
-
-	flag.BoolVar(&optVerbose, `verbose`, false,
-		`print additional information while running`)
-
-	flag.Parse()
-
-	var cmd = flag.Arg(0)
-	if cmd == "" {
-		goto invalid_command
-	}
-
-	cmd = strings.ToLower(cmd)
-	if cmd == "scan" {
-		var scanOpts = deadlinks.ScanOptions{
-			Url:       flag.Arg(1),
-			IsVerbose: optVerbose,
-		}
-		if scanOpts.Url == "" {
-			goto invalid_command
-		}
-
-		var result *deadlinks.Result
-		var err error
-		result, err = deadlinks.Scan(scanOpts)
-		if err != nil {
-			log.Fatal(err.Error())
-		}
-
-		var resultJson []byte
-		resultJson, err = json.MarshalIndent(result.PageLinks, ``, `  `)
-		if err != nil {
-			log.Fatal(err.Error())
-		}
-		fmt.Printf("%s\n", resultJson)
-		return
-	}
-
-invalid_command:
-	usage()
-	os.Exit(1)
-}
-
-func usage() {
-	log.Println(`
-deadlinks <COMMAND> <args...>
-
-Deadlinks is a program to scan for invalid links inside HTML page on the live
-web server.
-Invalid links will be scanned on anchor href attribute ("<a href=...>") or on
-the image src attribute ("<img src=...").
-
-== Usage
-
-[OPTIONS] scan URL
-
-	Start scanning for deadlinks on the web server pointed by URL.
-	Once finished it will print the page and list of dead links inside
-	that page in JSON format.
-	This command accept the following options,
-
-	-verbose : print the page that being scanned.
-
-	Example,
-
-	$ deadlinks scan https://kilabit.info
-	{
-	  "https://kilabit.info/some/page": [
-	    {
-	      "Link": "https://kilabit.info/some/page/image.png",
-	      "Code": 404
-	    },
-	    {
-	      "Link": "https://external.com/link",
-	      "Error": "Internal server error",
-	      "Code": 500
-	    }
-	  ],
-	  "https://kilabit.info/another/page": [
-	    {
-	      "Link": "https://kilabit.info/another/page/image.png",
-	      "Code": 404
-	    },
-	    {
-	      "Link": "https://external.org/link",
-	      "Error": "Internal server error",
-	      "Code": 500
-	    }
-	  ]
-	}
-
---
-deadlinks v` + deadlinks.Version)
-}
diff --git a/cmd/jarink/main.go b/cmd/jarink/main.go
new file mode 100644
index 0000000..c4af038
--- /dev/null
+++ b/cmd/jarink/main.go
@@ -0,0 +1,118 @@
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+// SPDX-License-Identifier: GPL-3.0-only
+
+package main
+
+import (
+	"encoding/json"
+	"flag"
+	"fmt"
+	"log"
+	"os"
+	"strings"
+
+	"git.sr.ht/~shulhan/jarink"
+)
+
+func main() {
+	var optVerbose bool
+
+	flag.BoolVar(&optVerbose, `verbose`, false,
+		`print additional information while running`)
+
+	flag.Parse()
+
+	var cmd = flag.Arg(0)
+	if cmd == "" {
+		goto invalid_command
+	}
+
+	cmd = strings.ToLower(cmd)
+	if cmd == "brokenlinks" {
+		var brokenlinksOpts = jarink.BrokenlinksOptions{
+			Url:       flag.Arg(1),
+			IsVerbose: optVerbose,
+		}
+		if brokenlinksOpts.Url == "" {
+			goto invalid_command
+		}
+
+		var result *jarink.BrokenlinksResult
+		var err error
+		result, err = jarink.Brokenlinks(brokenlinksOpts)
+		if err != nil {
+			log.Fatal(err.Error())
+		}
+
+		var resultJson []byte
+		resultJson, err = json.MarshalIndent(result.PageLinks, ``, `  `)
+		if err != nil {
+			log.Fatal(err.Error())
+		}
+		fmt.Printf("%s\n", resultJson)
+		return
+	}
+
+invalid_command:
+	usage()
+	os.Exit(1)
+}
+
+func usage() {
+	log.Println(`
+Jarink is a program to help web administrator to maintains their website.
+
+== Synopsis
+
+	jarink [OPTIONS] <COMMAND> <args...>
+
+Available commands,
+
+	brokenlinks - scan the website for broken links (page and images).
+
+== Usage
+
+[OPTIONS] brokenlinks URL
+
+	Start scanning for broken links on the web server pointed by URL.
+	Invalid links will be scanned on anchor href attribute
+	("<a href=...>") or on the image src attribute ("<img src=...").
+
+	Once finished it will print the page and list of broken links inside
+	that page in JSON format.
+
+	This command accept the following options,
+
+		-verbose : print the page that being scanned.
+
+	Example,
+
+	$ jarink scan https://kilabit.info
+	{
+	  "https://kilabit.info/some/page": [
+	    {
+	      "Link": "https://kilabit.info/some/page/image.png",
+	      "Code": 404
+	    },
+	    {
+	      "Link": "https://external.com/link",
+	      "Error": "Internal server error",
+	      "Code": 500
+	    }
+	  ],
+	  "https://kilabit.info/another/page": [
+	    {
+	      "Link": "https://kilabit.info/another/page/image.png",
+	      "Code": 404
+	    },
+	    {
+	      "Link": "https://external.org/link",
+	      "Error": "Internal server error",
+	      "Code": 500
+	    }
+	  ]
+	}
+
+--
+jarink v` + jarink.Version)
+}
diff --git a/deadlinks.go b/deadlinks.go
deleted file mode 100644
index 2edcd0d..0000000
--- a/deadlinks.go
+++ /dev/null
@@ -1,32 +0,0 @@
-// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
-// SPDX-License-Identifier: GPL-3.0-only
-
-package deadlinks
-
-import (
-	"fmt"
-)
-
-const Version = `0.1.0`
-
-// StatusBadLink status for link that is not parseable by [url.Parse] or not
-// reachable during GET or HEAD, either timeout or IP or domain not exist.
-const StatusBadLink = 700
-
-// Scan the baseUrl for dead links.
-func Scan(opts ScanOptions) (result *Result, err error) {
-	var logp = `Scan`
-	var wrk *worker
-
-	wrk, err = newWorker(opts)
-	if err != nil {
-		return nil, fmt.Errorf(`%s: %s`, logp, err)
-	}
-
-	result, err = wrk.run()
-	if err != nil {
-		return nil, fmt.Errorf(`%s: %s`, logp, err)
-	}
-
-	return result, nil
-}
diff --git a/deadlinks_test.go b/deadlinks_test.go
deleted file mode 100644
index c219aa0..0000000
--- a/deadlinks_test.go
+++ /dev/null
@@ -1,188 +0,0 @@
-// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
-// SPDX-License-Identifier: GPL-3.0-only
-
-package deadlinks_test
-
-import (
-	"log"
-	"net/http"
-	"os"
-	"testing"
-	"time"
-
-	"git.sr.ht/~shulhan/deadlinks"
-	libnet "git.sr.ht/~shulhan/pakakeh.go/lib/net"
-	"git.sr.ht/~shulhan/pakakeh.go/lib/test"
-)
-
-// The test run two web servers that serve content on "testdata/web/".
-// The first web server is the one that we want to scan.
-// The second web server is external web server, where HTML pages should not
-// be parsed.
-
-const testAddress = `127.0.0.1:11836`
-const testExternalAddress = `127.0.0.1:11900`
-
-func TestMain(m *testing.M) {
-	log.SetFlags(0)
-	var httpDirWeb = http.Dir(`testdata/web`)
-	var fshandle = http.FileServer(httpDirWeb)
-
-	go func() {
-		var mux = http.NewServeMux()
-		mux.Handle(`/`, fshandle)
-		var testServer = &http.Server{
-			Addr:           testAddress,
-			Handler:        mux,
-			ReadTimeout:    10 * time.Second,
-			WriteTimeout:   10 * time.Second,
-			MaxHeaderBytes: 1 << 20,
-		}
-		var err = testServer.ListenAndServe()
-		if err != nil {
-			log.Fatal(err)
-		}
-	}()
-	go func() {
-		var mux = http.NewServeMux()
-		mux.Handle(`/`, fshandle)
-		var testServer = &http.Server{
-			Addr:           testExternalAddress,
-			Handler:        mux,
-			ReadTimeout:    10 * time.Second,
-			WriteTimeout:   10 * time.Second,
-			MaxHeaderBytes: 1 << 20,
-		}
-		var err = testServer.ListenAndServe()
-		if err != nil {
-			log.Fatal(err)
-		}
-	}()
-
-	var err = libnet.WaitAlive(`tcp`, testAddress, 5*time.Second)
-	if err != nil {
-		log.Fatal(err)
-	}
-	err = libnet.WaitAlive(`tcp`, testExternalAddress, 5*time.Second)
-	if err != nil {
-		log.Fatal(err)
-	}
-
-	os.Exit(m.Run())
-}
-
-func TestDeadLinks_Scan(t *testing.T) {
-	var testUrl = `http://` + testAddress
-
-	type testCase struct {
-		exp      map[string][]deadlinks.Broken
-		scanUrl  string
-		expError string
-	}
-
-	listCase := []testCase{{
-		scanUrl:  `127.0.0.1:14594`,
-		expError: `Scan: invalid URL "127.0.0.1:14594"`,
-	}, {
-		scanUrl:  `http://127.0.0.1:14594`,
-		expError: `Scan: Get "http://127.0.0.1:14594": dial tcp 127.0.0.1:14594: connect: connection refused`,
-	}, {
-		scanUrl: testUrl,
-		exp: map[string][]deadlinks.Broken{
-			testUrl: []deadlinks.Broken{
-				{
-					Link: testUrl + `/broken.png`,
-					Code: http.StatusNotFound,
-				}, {
-					Link: testUrl + `/brokenPage`,
-					Code: http.StatusNotFound,
-				}, {
-					Link:  `http://127.0.0.1:abc`,
-					Error: `parse "http://127.0.0.1:abc": invalid port ":abc" after host`,
-					Code:  deadlinks.StatusBadLink,
-				}, {
-					Link:  `http:/127.0.0.1:11836`,
-					Error: `Head "http:/127.0.0.1:11836": http: no Host in request URL`,
-					Code:  deadlinks.StatusBadLink,
-				},
-			},
-			testUrl + `/broken.html`: []deadlinks.Broken{
-				{
-					Link: testUrl + `/brokenPage`,
-					Code: http.StatusNotFound,
-				},
-			},
-			testUrl + `/page2`: []deadlinks.Broken{
-				{
-					Link: testUrl + `/broken.png`,
-					Code: http.StatusNotFound,
-				}, {
-					Link: testUrl + `/page2/broken/relative`,
-					Code: http.StatusNotFound,
-				}, {
-					Link: testUrl + `/page2/broken2.png`,
-					Code: http.StatusNotFound,
-				},
-			},
-		},
-	}, {
-		scanUrl: testUrl + `/page2`,
-		exp: map[string][]deadlinks.Broken{
-			testUrl: []deadlinks.Broken{
-				{
-					Link: testUrl + `/broken.png`,
-					Code: http.StatusNotFound,
-				}, {
-					Link: testUrl + `/brokenPage`,
-					Code: http.StatusNotFound,
-				}, {
-					Link:  `http://127.0.0.1:abc`,
-					Error: `parse "http://127.0.0.1:abc": invalid port ":abc" after host`,
-					Code:  deadlinks.StatusBadLink,
-				}, {
-					Link:  `http:/127.0.0.1:11836`,
-					Error: `Head "http:/127.0.0.1:11836": http: no Host in request URL`,
-					Code:  deadlinks.StatusBadLink,
-				},
-			},
-			testUrl + `/broken.html`: []deadlinks.Broken{
-				{
-					Link: testUrl + `/brokenPage`,
-					Code: http.StatusNotFound,
-				},
-			},
-			testUrl + `/page2`: []deadlinks.Broken{
-				{
-					Link: testUrl + `/broken.png`,
-					Code: http.StatusNotFound,
-				}, {
-					Link: testUrl + `/page2/broken/relative`,
-					Code: http.StatusNotFound,
-				}, {
-					Link: testUrl + `/page2/broken2.png`,
-					Code: http.StatusNotFound,
-				},
-			},
-		},
-	}}
-
-	var (
-		result *deadlinks.Result
-		err    error
-	)
-	for _, tcase := range listCase {
-		t.Logf(`--- Scan: %s`, tcase.scanUrl)
-		var scanOpts = deadlinks.ScanOptions{
-			Url: tcase.scanUrl,
-		}
-		result, err = deadlinks.Scan(scanOpts)
-		if err != nil {
-			test.Assert(t, tcase.scanUrl+` error`,
-				tcase.expError, err.Error())
-			continue
-		}
-		//got, _ := json.MarshalIndent(result.PageLinks, ``, `  `)
-		//t.Logf(`got=%s`, got)
-		test.Assert(t, tcase.scanUrl, tcase.exp, result.PageLinks)
-	}
-}
diff --git a/go.mod b/go.mod
index a57a7c9..2063444 100644
--- a/go.mod
+++ b/go.mod
@@ -1,9 +1,9 @@
 // SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
 // SPDX-License-Identifier: GPL-3.0-only
 
-module git.sr.ht/~shulhan/deadlinks
+module git.sr.ht/~shulhan/jarink
 
-go 1.25
+go 1.24
 
 require (
 	git.sr.ht/~shulhan/pakakeh.go v0.60.1
diff --git a/link_queue.go b/link_queue.go
index 63940cc..0b419b8 100644
--- a/link_queue.go
+++ b/link_queue.go
@@ -1,7 +1,7 @@
 // SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
 // SPDX-License-Identifier: GPL-3.0-only
 
-package deadlinks
+package jarink
 
 import (
 	"net/url"
diff --git a/result.go b/result.go
deleted file mode 100644
index 6fdc817..0000000
--- a/result.go
+++ /dev/null
@@ -1,36 +0,0 @@
-// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
-// SPDX-License-Identifier: GPL-3.0-only
-
-package deadlinks
-
-import (
-	"slices"
-	"strings"
-)
-
-// Broken store the link with its HTTP status.
-type Broken struct {
-	Link  string
-	Error string `json:"omitempty"`
-	Code  int
-}
-
-// Result store the result of Scan.
-type Result struct {
-	// PageLinks store the page and its broken links.
-	PageLinks map[string][]Broken
-}
-
-func newResult() *Result {
-	return &Result{
-		PageLinks: map[string][]Broken{},
-	}
-}
-
-func (result *Result) sort() {
-	for _, listBroken := range result.PageLinks {
-		slices.SortFunc(listBroken, func(a, b Broken) int {
-			return strings.Compare(a.Link, b.Link)
-		})
-	}
-}
diff --git a/scan_options.go b/scan_options.go
deleted file mode 100644
index bc5484e..0000000
--- a/scan_options.go
+++ /dev/null
@@ -1,10 +0,0 @@
-// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
-// SPDX-License-Identifier: GPL-3.0-only
-
-package deadlinks
-
-// ScanOptions define the options for scan command or Scan function.
-type ScanOptions struct {
-	Url       string
-	IsVerbose bool
-}
diff --git a/url_test.go b/url_test.go
index 506090d..0b0bf03 100644
--- a/url_test.go
+++ b/url_test.go
@@ -1,7 +1,7 @@
 // SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
 // SPDX-License-Identifier: GPL-3.0-only
 
-package deadlinks
+package jarink
 
 import (
 	"net/url"
diff --git a/worker.go b/worker.go
deleted file mode 100644
index 817ff3b..0000000
--- a/worker.go
+++ /dev/null
@@ -1,369 +0,0 @@
-// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
-// SPDX-License-Identifier: GPL-3.0-only
-
-package deadlinks
-
-import (
-	"fmt"
-	"log"
-	"net/http"
-	"net/url"
-	"strings"
-	"sync"
-	"time"
-
-	"golang.org/x/net/html"
-	"golang.org/x/net/html/atom"
-)
-
-type worker struct {
-	// seenLink store the URL being or has been scanned and its HTTP
-	// status code.
-	seenLink map[string]int
-
-	// resultq channel that collect result from scanning.
-	resultq chan map[string]linkQueue
-
-	// result contains the final result after all of the pages has been
-	// scanned.
-	result *Result
-
-	// The base URL that will be joined to relative or absolute
-	// links or image.
-	baseUrl *url.URL
-
-	// The URL to scan.
-	scanUrl *url.URL
-
-	opts ScanOptions
-
-	// wg sync the goroutine scanner.
-	wg sync.WaitGroup
-}
-
-func newWorker(opts ScanOptions) (wrk *worker, err error) {
-	wrk = &worker{
-		opts:     opts,
-		seenLink: map[string]int{},
-		resultq:  make(chan map[string]linkQueue, 100),
-		result:   newResult(),
-	}
-
-	wrk.scanUrl, err = url.Parse(opts.Url)
-	if err != nil {
-		return nil, fmt.Errorf(`invalid URL %q`, opts.Url)
-	}
-
-	wrk.scanUrl.Path = strings.TrimSuffix(wrk.scanUrl.Path, `/`)
-	wrk.scanUrl.Fragment = ""
-	wrk.scanUrl.RawFragment = ""
-
-	wrk.baseUrl = &url.URL{
-		Scheme: wrk.scanUrl.Scheme,
-		Host:   wrk.scanUrl.Host,
-	}
-
-	return wrk, nil
-}
-
-func (wrk *worker) run() (result *Result, err error) {
-	// Scan the first URL to make sure that the server is reachable.
-	var firstLinkq = linkQueue{
-		parentUrl: nil,
-		url:       wrk.scanUrl.String(),
-		status:    http.StatusProcessing,
-	}
-	wrk.seenLink[firstLinkq.url] = http.StatusProcessing
-
-	wrk.wg.Add(1)
-	go wrk.scan(firstLinkq)
-	wrk.wg.Wait()
-
-	var resultq = <-wrk.resultq
-	for _, linkq := range resultq {
-		if linkq.url == firstLinkq.url {
-			if linkq.errScan != nil {
-				return nil, linkq.errScan
-			}
-			wrk.seenLink[linkq.url] = linkq.status
-			continue
-		}
-		if linkq.status >= http.StatusBadRequest {
-			wrk.markDead(linkq)
-			continue
-		}
-
-		wrk.seenLink[linkq.url] = http.StatusProcessing
-		wrk.wg.Add(1)
-		go wrk.scan(linkq)
-	}
-
-	var tick = time.NewTicker(500 * time.Millisecond)
-	var listWaitStatus []linkQueue
-	var isScanning = true
-	for isScanning {
-		select {
-		case resultq := <-wrk.resultq:
-
-			// The resultq contains the original URL being scanned
-			// and its child links.
-			// For example, scanning "http://example.tld" result
-			// in
-			//
-			//	"http://example.tld": {status=200}
-			//	"http://example.tld/page": {status=0}
-			//	"http://example.tld/image.png": {status=0}
-			//	"http://bad:domain/image.png": {status=700}
-
-			var newList []linkQueue
-			for _, linkq := range resultq {
-				if linkq.status >= http.StatusBadRequest {
-					wrk.markDead(linkq)
-					continue
-				}
-				if linkq.status != 0 {
-					// linkq is the result of scan with
-					// non error status.
-					wrk.seenLink[linkq.url] = linkq.status
-					continue
-				}
-
-				seenStatus, seen := wrk.seenLink[linkq.url]
-				if !seen {
-					wrk.seenLink[linkq.url] = http.StatusProcessing
-					wrk.wg.Add(1)
-					go wrk.scan(linkq)
-					continue
-				}
-				if seenStatus >= http.StatusBadRequest {
-					linkq.status = seenStatus
-					wrk.markDead(linkq)
-					continue
-				}
-				if seenStatus >= http.StatusOK {
-					// The link has been processed and its
-					// not an error.
-					continue
-				}
-				if seenStatus == http.StatusProcessing {
-					// The link being processed by other
-					// goroutine.
-					linkq.status = seenStatus
-					newList = append(newList, linkq)
-					continue
-				}
-				log.Fatalf("link=%s status=%d", linkq.url, linkq.status)
-			}
-			for _, linkq := range listWaitStatus {
-				seenStatus := wrk.seenLink[linkq.url]
-				if seenStatus >= http.StatusBadRequest {
-					linkq.status = seenStatus
-					wrk.markDead(linkq)
-					continue
-				}
-				if seenStatus >= http.StatusOK {
-					continue
-				}
-				if seenStatus == http.StatusProcessing {
-					// Scanning still in progress.
-					newList = append(newList, linkq)
-					continue
-				}
-			}
-			listWaitStatus = newList
-
-		case <-tick.C:
-			wrk.wg.Wait()
-			if len(wrk.resultq) != 0 {
-				continue
-			}
-			if len(listWaitStatus) != 0 {
-				// There are links that still waiting for
-				// scanning to be completed.
-				continue
-			}
-			isScanning = false
-		}
-	}
-	wrk.result.sort()
-	return wrk.result, nil
-}
-
-func (wrk *worker) markDead(linkq linkQueue) {
-	var parentUrl = linkq.parentUrl.String()
-	var listBroken = wrk.result.PageLinks[parentUrl]
-	var brokenLink = Broken{
-		Link: linkq.url,
-		Code: linkq.status,
-	}
-	if linkq.errScan != nil {
-		brokenLink.Error = linkq.errScan.Error()
-	}
-	listBroken = append(listBroken, brokenLink)
-	wrk.result.PageLinks[parentUrl] = listBroken
-
-	wrk.seenLink[linkq.url] = linkq.status
-}
-
-// scan fetch the HTML page or image to check if its valid.
-func (wrk *worker) scan(linkq linkQueue) {
-	defer func() {
-		if wrk.opts.IsVerbose && linkq.errScan != nil {
-			fmt.Printf("error: %d %s error=%v\n", linkq.status,
-				linkq.url, linkq.errScan)
-		}
-		wrk.wg.Done()
-	}()
-
-	var (
-		resultq  = map[string]linkQueue{}
-		httpResp *http.Response
-		err      error
-	)
-	if linkq.kind == atom.Img || linkq.isExternal {
-		if wrk.opts.IsVerbose {
-			fmt.Printf("scan: HEAD %s\n", linkq.url)
-		}
-		httpResp, err = http.Head(linkq.url)
-	} else {
-		if wrk.opts.IsVerbose {
-			fmt.Printf("scan: GET %s\n", linkq.url)
-		}
-		httpResp, err = http.Get(linkq.url)
-	}
-	if err != nil {
-		linkq.status = StatusBadLink
-		linkq.errScan = err
-		resultq[linkq.url] = linkq
-		go wrk.pushResult(resultq)
-		return
-	}
-	defer httpResp.Body.Close()
-
-	linkq.status = httpResp.StatusCode
-	resultq[linkq.url] = linkq
-
-	if httpResp.StatusCode >= http.StatusBadRequest {
-		go wrk.pushResult(resultq)
-		return
-	}
-	if linkq.kind == atom.Img || linkq.isExternal {
-		go wrk.pushResult(resultq)
-		return
-	}
-
-	var doc *html.Node
-	doc, _ = html.Parse(httpResp.Body)
-
-	// After we check the code and test for [html.Parse] there are
-	// no case actual cases where HTML content will return an error.
-	// The only possible error is when reading from body (io.Reader), and
-	// that is also almost impossible.
-	//
-	// [html.Parse]: https://go.googlesource.com/net/+/refs/tags/v0.40.0/html/parse.go#2347
-
-	var scanUrl *url.URL
-
-	scanUrl, err = url.Parse(linkq.url)
-	if err != nil {
-		log.Fatal(err)
-	}
-
-	var node *html.Node
-	for node = range doc.Descendants() {
-		if node.Type != html.ElementNode {
-			continue
-		}
-		var nodeLink *linkQueue
-		if node.DataAtom == atom.A {
-			for _, attr := range node.Attr {
-				if attr.Key != `href` {
-					continue
-				}
-				nodeLink = wrk.processLink(scanUrl, attr.Val, atom.A)
-				break
-			}
-		} else if node.DataAtom == atom.Img {
-			for _, attr := range node.Attr {
-				if attr.Key != `src` {
-					continue
-				}
-				nodeLink = wrk.processLink(scanUrl, attr.Val, atom.Img)
-				break
-			}
-		} else {
-			continue
-		}
-		if nodeLink == nil {
-			continue
-		}
-		_, seen := resultq[nodeLink.url]
-		if !seen {
-			if !strings.HasPrefix(nodeLink.url, wrk.baseUrl.String()) {
-				nodeLink.isExternal = true
-			}
-			resultq[nodeLink.url] = *nodeLink
-		}
-	}
-	go wrk.pushResult(resultq)
-}
-
-func (wrk *worker) processLink(parentUrl *url.URL, val string, kind atom.Atom) (
-	linkq *linkQueue,
-) {
-	if len(val) == 0 {
-		return nil
-	}
-
-	var newUrl *url.URL
-	var err error
-	newUrl, err = url.Parse(val)
-	if err != nil {
-		return &linkQueue{
-			parentUrl: parentUrl,
-			errScan:   err,
-			url:       val,
-			kind:      kind,
-			status:    StatusBadLink,
-		}
-	}
-	newUrl.Fragment = ""
-	newUrl.RawFragment = ""
-
-	if kind == atom.A && val[0] == '#' {
-		// Ignore link to ID, like `href="#element_id"`.
-		return nil
-	}
-	if strings.HasPrefix(val, `http`) {
-		return &linkQueue{
-			parentUrl: parentUrl,
-			url:       strings.TrimSuffix(newUrl.String(), `/`),
-			kind:      kind,
-		}
-	}
-	if val[0] == '/' {
-		// val is absolute to parent URL.
-		newUrl = wrk.baseUrl.JoinPath(newUrl.Path)
-	} else {
-		// val is relative to parent URL.
-		newUrl = parentUrl.JoinPath(`/`, newUrl.Path)
-	}
-	linkq = &linkQueue{
-		parentUrl: parentUrl,
-		url:       strings.TrimSuffix(newUrl.String(), `/`),
-		kind:      kind,
-	}
-	return linkq
-}
-
-func (wrk *worker) pushResult(resultq map[string]linkQueue) {
-	var tick = time.NewTicker(100 * time.Millisecond)
-	for {
-		select {
-		case wrk.resultq <- resultq:
-			tick.Stop()
-			return
-		case <-tick.C:
-		}
-	}
-}
-- 
cgit v1.3