From a02e915388723a5d8cc3b555fb3dfec477fc2a55 Mon Sep 17 00:00:00 2001
From: Shulhan <ms@kilabit.info>
Date: Thu, 12 Jun 2025 21:13:58 +0700
Subject: all: refactoring, move brokenlinks code to its own package

When two or more struct has the same prefix that means it is time to
move it to group it.

Also, we will group one command to one package in the future.
---
 brokenlinks.go                                |  69 ----
 brokenlinks/brokenlinks.go                    |  39 +++
 brokenlinks/brokenlinks_test.go               | 227 +++++++++++++
 brokenlinks/link_queue.go                     |  55 +++
 brokenlinks/result.go                         |  37 ++
 brokenlinks/testdata/past_result.json         |  10 +
 brokenlinks/testdata/past_result.json.license |   2 +
 brokenlinks/testdata/web/broken.html          |   7 +
 brokenlinks/testdata/web/gopher.png           | Bin 0 -> 32775 bytes
 brokenlinks/testdata/web/index.html           |  22 ++
 brokenlinks/testdata/web/page2/index.html     |  14 +
 brokenlinks/worker.go                         | 467 ++++++++++++++++++++++++++
 brokenlinks_test.go                           | 166 ---------
 brokenlinks_worker.go                         | 467 --------------------------
 cmd/jarink/main.go                            |  25 +-
 jarink_test.go                                |  70 ----
 link_queue.go                                 |  55 ---
 testdata/past_result.json                     |  10 -
 testdata/past_result.json.license             |   2 -
 testdata/web/broken.html                      |   7 -
 testdata/web/gopher.png                       | Bin 32775 -> 0 bytes
 testdata/web/index.html                       |  22 --
 testdata/web/page2/index.html                 |  14 -
 23 files changed, 897 insertions(+), 890 deletions(-)
 delete mode 100644 brokenlinks.go
 create mode 100644 brokenlinks/brokenlinks.go
 create mode 100644 brokenlinks/brokenlinks_test.go
 create mode 100644 brokenlinks/link_queue.go
 create mode 100644 brokenlinks/result.go
 create mode 100644 brokenlinks/testdata/past_result.json
 create mode 100644 brokenlinks/testdata/past_result.json.license
 create mode 100644 brokenlinks/testdata/web/broken.html
 create mode 100644 brokenlinks/testdata/web/gopher.png
 create mode 100644 brokenlinks/testdata/web/index.html
 create mode 100644 brokenlinks/testdata/web/page2/index.html
 create mode 100644 brokenlinks/worker.go
 delete mode 100644 brokenlinks_test.go
 delete mode 100644 brokenlinks_worker.go
 delete mode 100644 jarink_test.go
 delete mode 100644 link_queue.go
 delete mode 100644 testdata/past_result.json
 delete mode 100644 testdata/past_result.json.license
 delete mode 100644 testdata/web/broken.html
 delete mode 100644 testdata/web/gopher.png
 delete mode 100644 testdata/web/index.html
 delete mode 100644 testdata/web/page2/index.html

diff --git a/brokenlinks.go b/brokenlinks.go
deleted file mode 100644
index 96580e5..0000000
--- a/brokenlinks.go
+++ /dev/null
@@ -1,69 +0,0 @@
-// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
-// SPDX-License-Identifier: GPL-3.0-only
-
-package jarink
-
-import (
-	"fmt"
-	"slices"
-	"strings"
-)
-
-const Version = `0.1.0`
-
-// StatusBadLink status for link that is not parseable by [url.Parse] or not
-// reachable during GET or HEAD, either timeout or IP or domain not exist.
-const StatusBadLink = 700
-
-// Broken store the broken link, HTTP status code, and the error message that
-// cause it.
-type Broken struct {
-	Link  string `json:"link"`
-	Error string `json:"error,omitempty"`
-	Code  int    `json:"code"`
-}
-
-// BrokenlinksOptions define the options for scanning broken links.
-type BrokenlinksOptions struct {
-	Url            string
-	PastResultFile string
-	IsVerbose      bool
-}
-
-// BrokenlinksResult store the result of scanning for broken links.
-type BrokenlinksResult struct {
-	// BrokenLinks store the page and its broken links.
-	BrokenLinks map[string][]Broken `json:"broken_links"`
-}
-
-func newBrokenlinksResult() *BrokenlinksResult {
-	return &BrokenlinksResult{
-		BrokenLinks: map[string][]Broken{},
-	}
-}
-
-func (result *BrokenlinksResult) sort() {
-	for _, listBroken := range result.BrokenLinks {
-		slices.SortFunc(listBroken, func(a, b Broken) int {
-			return strings.Compare(a.Link, b.Link)
-		})
-	}
-}
-
-// Brokenlinks scan the URL for broken links.
-func Brokenlinks(opts BrokenlinksOptions) (result *BrokenlinksResult, err error) {
-	var logp = `brokenlinks`
-	var wrk *brokenlinksWorker
-
-	wrk, err = newWorker(opts)
-	if err != nil {
-		return nil, fmt.Errorf(`%s: %s`, logp, err)
-	}
-
-	result, err = wrk.run()
-	if err != nil {
-		return nil, fmt.Errorf(`%s: %s`, logp, err)
-	}
-
-	return result, nil
-}
diff --git a/brokenlinks/brokenlinks.go b/brokenlinks/brokenlinks.go
new file mode 100644
index 0000000..8ac458f
--- /dev/null
+++ b/brokenlinks/brokenlinks.go
@@ -0,0 +1,39 @@
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+// SPDX-License-Identifier: GPL-3.0-only
+
+package brokenlinks
+
+import (
+	"fmt"
+)
+
+const Version = `0.1.0`
+
+// StatusBadLink status for link that is not parseable by [url.Parse] or not
+// reachable during GET or HEAD, either timeout or IP or domain not exist.
+const StatusBadLink = 700
+
+// Options define the options for scanning broken links.
+type Options struct {
+	Url            string
+	PastResultFile string
+	IsVerbose      bool
+}
+
+// Scan the URL for broken links.
+func Scan(opts Options) (result *Result, err error) {
+	var logp = `brokenlinks`
+	var wrk *worker
+
+	wrk, err = newWorker(opts)
+	if err != nil {
+		return nil, fmt.Errorf(`%s: %s`, logp, err)
+	}
+
+	result, err = wrk.run()
+	if err != nil {
+		return nil, fmt.Errorf(`%s: %s`, logp, err)
+	}
+
+	return result, nil
+}
diff --git a/brokenlinks/brokenlinks_test.go b/brokenlinks/brokenlinks_test.go
new file mode 100644
index 0000000..367ae6c
--- /dev/null
+++ b/brokenlinks/brokenlinks_test.go
@@ -0,0 +1,227 @@
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+// SPDX-License-Identifier: GPL-3.0-only
+
+package brokenlinks_test
+
+import (
+	"encoding/json"
+	"log"
+	"net/http"
+	"os"
+	"testing"
+	"time"
+
+	libnet "git.sr.ht/~shulhan/pakakeh.go/lib/net"
+	"git.sr.ht/~shulhan/pakakeh.go/lib/test"
+
+	"git.sr.ht/~shulhan/jarink/brokenlinks"
+)
+
+// The test run two web servers that serve content on "testdata/web/".
+// The first web server is the one that we want to scan.
+// The second web server is external web server, where HTML pages should not
+// be parsed.
+
+const testAddress = `127.0.0.1:11836`
+const testExternalAddress = `127.0.0.1:11900`
+
+func TestMain(m *testing.M) {
+	log.SetFlags(0)
+	var httpDirWeb = http.Dir(`testdata/web`)
+	var fshandle = http.FileServer(httpDirWeb)
+
+	go func() {
+		var mux = http.NewServeMux()
+		mux.Handle(`/`, fshandle)
+		var testServer = &http.Server{
+			Addr:           testAddress,
+			Handler:        mux,
+			ReadTimeout:    10 * time.Second,
+			WriteTimeout:   10 * time.Second,
+			MaxHeaderBytes: 1 << 20,
+		}
+		var err = testServer.ListenAndServe()
+		if err != nil {
+			log.Fatal(err)
+		}
+	}()
+	go func() {
+		var mux = http.NewServeMux()
+		mux.Handle(`/`, fshandle)
+		var testServer = &http.Server{
+			Addr:           testExternalAddress,
+			Handler:        mux,
+			ReadTimeout:    10 * time.Second,
+			WriteTimeout:   10 * time.Second,
+			MaxHeaderBytes: 1 << 20,
+		}
+		var err = testServer.ListenAndServe()
+		if err != nil {
+			log.Fatal(err)
+		}
+	}()
+
+	var err = libnet.WaitAlive(`tcp`, testAddress, 5*time.Second)
+	if err != nil {
+		log.Fatal(err)
+	}
+	err = libnet.WaitAlive(`tcp`, testExternalAddress, 5*time.Second)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	os.Exit(m.Run())
+}
+
+func TestBrokenlinks(t *testing.T) {
+	var testUrl = `http://` + testAddress
+
+	type testCase struct {
+		exp      map[string][]brokenlinks.Broken
+		scanUrl  string
+		expError string
+	}
+
+	listCase := []testCase{{
+		scanUrl:  `127.0.0.1:14594`,
+		expError: `brokenlinks: invalid URL "127.0.0.1:14594"`,
+	}, {
+		scanUrl:  `http://127.0.0.1:14594`,
+		expError: `brokenlinks: Get "http://127.0.0.1:14594": dial tcp 127.0.0.1:14594: connect: connection refused`,
+	}, {
+		scanUrl: testUrl,
+		exp: map[string][]brokenlinks.Broken{
+			testUrl: []brokenlinks.Broken{
+				{
+					Link: testUrl + `/broken.png`,
+					Code: http.StatusNotFound,
+				}, {
+					Link: testUrl + `/brokenPage`,
+					Code: http.StatusNotFound,
+				}, {
+					Link:  `http://127.0.0.1:abc`,
+					Error: `parse "http://127.0.0.1:abc": invalid port ":abc" after host`,
+					Code:  brokenlinks.StatusBadLink,
+				}, {
+					Link:  `http:/127.0.0.1:11836`,
+					Error: `Get "http:/127.0.0.1:11836": http: no Host in request URL`,
+					Code:  brokenlinks.StatusBadLink,
+				},
+			},
+			testUrl + `/broken.html`: []brokenlinks.Broken{
+				{
+					Link: testUrl + `/brokenPage`,
+					Code: http.StatusNotFound,
+				},
+			},
+			testUrl + `/page2`: []brokenlinks.Broken{
+				{
+					Link: testUrl + `/broken.png`,
+					Code: http.StatusNotFound,
+				}, {
+					Link: testUrl + `/page2/broken/relative`,
+					Code: http.StatusNotFound,
+				}, {
+					Link: testUrl + `/page2/broken2.png`,
+					Code: http.StatusNotFound,
+				},
+			},
+		},
+	}, {
+		// Scanning on "/path" should not scan the the "/" or other
+		// pages other than below of "/path" itself.
+		scanUrl: testUrl + `/page2`,
+		exp: map[string][]brokenlinks.Broken{
+			testUrl + `/page2`: []brokenlinks.Broken{
+				{
+					Link: testUrl + `/broken.png`,
+					Code: http.StatusNotFound,
+				}, {
+					Link: testUrl + `/page2/broken/relative`,
+					Code: http.StatusNotFound,
+				}, {
+					Link: testUrl + `/page2/broken2.png`,
+					Code: http.StatusNotFound,
+				},
+			},
+		},
+	}}
+
+	var (
+		result *brokenlinks.Result
+		err    error
+	)
+	for _, tcase := range listCase {
+		t.Logf(`--- brokenlinks: %s`, tcase.scanUrl)
+		var opts = brokenlinks.Options{
+			Url: tcase.scanUrl,
+		}
+		result, err = brokenlinks.Scan(opts)
+		if err != nil {
+			test.Assert(t, tcase.scanUrl+` error`,
+				tcase.expError, err.Error())
+			continue
+		}
+		//got, _ := json.MarshalIndent(result.BrokenLinks, ``, `  `)
+		//t.Logf(`got=%s`, got)
+		test.Assert(t, tcase.scanUrl, tcase.exp, result.BrokenLinks)
+	}
+}
+
+// Test running Brokenlinks with file PastResultFile is set.
+// The PastResultFile is modified to only report errors on "/page2".
+func TestBrokenlinks_pastResult(t *testing.T) {
+	var testUrl = `http://` + testAddress
+
+	type testCase struct {
+		exp      map[string][]brokenlinks.Broken
+		expError string
+		opts     brokenlinks.Options
+	}
+
+	listCase := []testCase{{
+		// With invalid file.
+		opts: brokenlinks.Options{
+			Url:            testUrl,
+			PastResultFile: `testdata/invalid`,
+		},
+		expError: `brokenlinks: open testdata/invalid: no such file or directory`,
+	}, {
+		// With valid file.
+		opts: brokenlinks.Options{
+			Url:            testUrl,
+			PastResultFile: `testdata/past_result.json`,
+		},
+		exp: map[string][]brokenlinks.Broken{
+			testUrl + `/page2`: []brokenlinks.Broken{
+				{
+					Link: testUrl + `/broken.png`,
+					Code: http.StatusNotFound,
+				}, {
+					Link: testUrl + `/page2/broken/relative`,
+					Code: http.StatusNotFound,
+				}, {
+					Link: testUrl + `/page2/broken2.png`,
+					Code: http.StatusNotFound,
+				},
+			},
+		},
+	}}
+
+	var (
+		result *brokenlinks.Result
+		err    error
+	)
+	for _, tcase := range listCase {
+		t.Logf(`--- brokenlinks: %s`, tcase.opts.Url)
+		result, err = brokenlinks.Scan(tcase.opts)
+		if err != nil {
+			test.Assert(t, tcase.opts.Url+` error`,
+				tcase.expError, err.Error())
+			continue
+		}
+		got, _ := json.MarshalIndent(result.BrokenLinks, ``, `  `)
+		t.Logf(`got=%s`, got)
+		test.Assert(t, tcase.opts.Url, tcase.exp, result.BrokenLinks)
+	}
+}
diff --git a/brokenlinks/link_queue.go b/brokenlinks/link_queue.go
new file mode 100644
index 0000000..164a902
--- /dev/null
+++ b/brokenlinks/link_queue.go
@@ -0,0 +1,55 @@
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+// SPDX-License-Identifier: GPL-3.0-only
+
+package brokenlinks
+
+import (
+	"net/url"
+	"strings"
+
+	"golang.org/x/net/html/atom"
+)
+
+type linkQueue struct {
+	parentUrl *url.URL
+
+	// The error from scan.
+	errScan error
+
+	// url being scanned.
+	url string
+
+	// kind of url, its either an anchor or image.
+	// It set to 0 if url is the first URL being scanned.
+	kind atom.Atom
+
+	// isExternal if true the scan will issue HTTP method HEAD instead of
+	// GET.
+	isExternal bool
+
+	// Status of link after scan, its mostly used the HTTP status code.
+	// 0: link is the result of scan, not processed yet.
+	// StatusBadLink: link is invalid, not parseable or unreachable.
+	// 200 - 211: OK.
+	// 400 - 511: Error.
+	status int
+}
+
+// checkExternal set the isExternal field to be true if
+//
+// (1) [linkQueue.url] does not start with [worker.scanUrl]
+//
+// (2) linkQueue is from scanPastResult, indicated by non-nil
+// [worker.pastResult].
+// In this case, we did not want to scan the other pages from the same scanUrl
+// domain.
+func (linkq *linkQueue) checkExternal(wrk *worker) {
+	if !strings.HasPrefix(linkq.url, wrk.scanUrl.String()) {
+		linkq.isExternal = true
+		return
+	}
+	if wrk.pastResult != nil {
+		linkq.isExternal = true
+		return
+	}
+}
diff --git a/brokenlinks/result.go b/brokenlinks/result.go
new file mode 100644
index 0000000..676859b
--- /dev/null
+++ b/brokenlinks/result.go
@@ -0,0 +1,37 @@
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+// SPDX-License-Identifier: GPL-3.0-only
+
+package brokenlinks
+
+import (
+	"slices"
+	"strings"
+)
+
+// Broken store the broken link, HTTP status code, and the error message that
+// cause it.
+type Broken struct {
+	Link  string `json:"link"`
+	Error string `json:"error,omitempty"`
+	Code  int    `json:"code"`
+}
+
+// Result store the result of scanning for broken links.
+type Result struct {
+	// BrokenLinks store the page and its broken links.
+	BrokenLinks map[string][]Broken `json:"broken_links"`
+}
+
+func newResult() *Result {
+	return &Result{
+		BrokenLinks: map[string][]Broken{},
+	}
+}
+
+func (result *Result) sort() {
+	for _, listBroken := range result.BrokenLinks {
+		slices.SortFunc(listBroken, func(a, b Broken) int {
+			return strings.Compare(a.Link, b.Link)
+		})
+	}
+}
diff --git a/brokenlinks/testdata/past_result.json b/brokenlinks/testdata/past_result.json
new file mode 100644
index 0000000..ca29d35
--- /dev/null
+++ b/brokenlinks/testdata/past_result.json
@@ -0,0 +1,10 @@
+{
+  "broken_links": {
+    "http://127.0.0.1:11836/page2": [
+      {
+        "link": "http://127.0.0.1:11836/",
+        "code": 404
+      }
+    ]
+  }
+}
diff --git a/brokenlinks/testdata/past_result.json.license b/brokenlinks/testdata/past_result.json.license
new file mode 100644
index 0000000..22616a9
--- /dev/null
+++ b/brokenlinks/testdata/past_result.json.license
@@ -0,0 +1,2 @@
+SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+SPDX-License-Identifier: GPL-3.0-only
diff --git a/brokenlinks/testdata/web/broken.html b/brokenlinks/testdata/web/broken.html
new file mode 100644
index 0000000..533e542
--- /dev/null
+++ b/brokenlinks/testdata/web/broken.html
@@ -0,0 +1,7 @@
+<html>
+  <head></head>
+  <body>
+    <a href="/brokenPage"
+    <p>
+  </body>
+</html>
diff --git a/brokenlinks/testdata/web/gopher.png b/brokenlinks/testdata/web/gopher.png
new file mode 100644
index 0000000..79352be
Binary files /dev/null and b/brokenlinks/testdata/web/gopher.png differ
diff --git a/brokenlinks/testdata/web/index.html b/brokenlinks/testdata/web/index.html
new file mode 100644
index 0000000..61a1f39
--- /dev/null
+++ b/brokenlinks/testdata/web/index.html
@@ -0,0 +1,22 @@
+<!--
+SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+SPDX-License-Identifier: GPL-3.0-only
+-->
+<html>
+  <body>
+    <img src="/broken.png" />
+    <a href="/brokenPage">Broken page</a>
+    <img src="/gopher.png" />
+    <img width="200" src="" />
+    <a href="/page2">Page 2</a>
+    <a href="/broken.html">Broken HTML</a>
+    <a href="http://127.0.0.1:11900">External URL</a>
+    <!-- Error when fetching with GET -->
+    <a href="http:/127.0.0.1:11836">Invalid external URL</a>
+    <!-- Error when parsing URL -->
+    <a href="http://127.0.0.1:abc">Invalid URL port</a>
+    <!-- Fragment should be skipped and cleaned up -->
+    <a href="#goto_a">Same with href to "/"</a>
+    <a href="/page2#goto_a">Same with href to "/page2"</a>
+  </body>
+</html>
diff --git a/brokenlinks/testdata/web/page2/index.html b/brokenlinks/testdata/web/page2/index.html
new file mode 100644
index 0000000..ae6b4ea
--- /dev/null
+++ b/brokenlinks/testdata/web/page2/index.html
@@ -0,0 +1,14 @@
+<!--
+SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+SPDX-License-Identifier: GPL-3.0-only
+-->
+<html>
+  <body>
+    <img src="/broken.png" />
+    <img src="broken2.png" />
+    <a href="broken/relative">broken relative link</a>
+    <a href="/">Back with absolute path</a>
+    <a href="../">Back with relative path</a>
+    <a href="http://127.0.0.1:11900/page2">External URL page2</a>
+  </body>
+</html>
diff --git a/brokenlinks/worker.go b/brokenlinks/worker.go
new file mode 100644
index 0000000..4ed56d2
--- /dev/null
+++ b/brokenlinks/worker.go
@@ -0,0 +1,467 @@
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+// SPDX-License-Identifier: GPL-3.0-only
+
+package brokenlinks
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"log"
+	"net"
+	"net/http"
+	"net/url"
+	"os"
+	"strings"
+	"sync"
+	"time"
+
+	"golang.org/x/net/html"
+	"golang.org/x/net/html/atom"
+)
+
+type worker struct {
+	// seenLink store the URL being or has been scanned and its HTTP
+	// status code.
+	seenLink map[string]int
+
+	// resultq channel that collect result from scanning.
+	resultq chan map[string]linkQueue
+
+	// result contains the final result after all of the pages has been
+	// scanned.
+	result *Result
+
+	// pastResult containts the past scan result, loaded from file
+	// [Options.PastResultFile].
+	pastResult *Result
+
+	// The base URL that will be joined to relative or absolute
+	// links or image.
+	baseUrl *url.URL
+
+	// The URL to scan.
+	scanUrl *url.URL
+
+	log *log.Logger
+
+	opts Options
+
+	// wg sync the goroutine scanner.
+	wg sync.WaitGroup
+}
+
+func newWorker(opts Options) (wrk *worker, err error) {
+	wrk = &worker{
+		opts:     opts,
+		seenLink: map[string]int{},
+		resultq:  make(chan map[string]linkQueue, 100),
+		result:   newResult(),
+		log:      log.New(os.Stderr, ``, log.LstdFlags),
+	}
+
+	wrk.scanUrl, err = url.Parse(opts.Url)
+	if err != nil {
+		return nil, fmt.Errorf(`invalid URL %q`, opts.Url)
+	}
+	wrk.scanUrl.Path = strings.TrimSuffix(wrk.scanUrl.Path, `/`)
+	wrk.scanUrl.Fragment = ""
+	wrk.scanUrl.RawFragment = ""
+
+	wrk.baseUrl = &url.URL{
+		Scheme: wrk.scanUrl.Scheme,
+		Host:   wrk.scanUrl.Host,
+	}
+
+	if opts.PastResultFile == "" {
+		// Run with normal scan.
+		return wrk, nil
+	}
+
+	pastresult, err := os.ReadFile(opts.PastResultFile)
+	if err != nil {
+		return nil, err
+	}
+
+	wrk.pastResult = newResult()
+	err = json.Unmarshal(pastresult, &wrk.pastResult)
+	if err != nil {
+		return nil, err
+	}
+
+	return wrk, nil
+}
+
+func (wrk *worker) run() (result *Result, err error) {
+	if wrk.pastResult == nil {
+		result, err = wrk.scanAll()
+	} else {
+		result, err = wrk.scanPastResult()
+	}
+	return result, err
+}
+
+// scanAll scan all pages start from [Options.Url].
+func (wrk *worker) scanAll() (result *Result, err error) {
+	// Scan the first URL to make sure that the server is reachable.
+	var firstLinkq = linkQueue{
+		parentUrl: nil,
+		url:       wrk.scanUrl.String(),
+		status:    http.StatusProcessing,
+	}
+	wrk.seenLink[firstLinkq.url] = http.StatusProcessing
+
+	wrk.wg.Add(1)
+	go wrk.scan(firstLinkq)
+	wrk.wg.Wait()
+
+	var resultq = <-wrk.resultq
+	for _, linkq := range resultq {
+		if linkq.url == firstLinkq.url {
+			if linkq.errScan != nil {
+				return nil, linkq.errScan
+			}
+			wrk.seenLink[linkq.url] = linkq.status
+			continue
+		}
+		if linkq.status >= http.StatusBadRequest {
+			wrk.markBroken(linkq)
+			continue
+		}
+
+		wrk.seenLink[linkq.url] = http.StatusProcessing
+		wrk.wg.Add(1)
+		go wrk.scan(linkq)
+	}
+
+	var tick = time.NewTicker(500 * time.Millisecond)
+	var listWaitStatus []linkQueue
+	var isScanning = true
+	for isScanning {
+		select {
+		case resultq := <-wrk.resultq:
+			listWaitStatus = wrk.processResult(resultq, listWaitStatus)
+
+		case <-tick.C:
+			wrk.wg.Wait()
+			if len(wrk.resultq) != 0 {
+				continue
+			}
+			if len(listWaitStatus) != 0 {
+				// There are links that still waiting for
+				// scanning to be completed.
+				continue
+			}
+			isScanning = false
+		}
+	}
+	wrk.result.sort()
+	return wrk.result, nil
+}
+
+// scanPastResult scan only pages reported inside
+// [Result.BrokenLinks].
+func (wrk *worker) scanPastResult() (
+	result *Result, err error,
+) {
+	go func() {
+		for page := range wrk.pastResult.BrokenLinks {
+			var linkq = linkQueue{
+				parentUrl: nil,
+				url:       page,
+				status:    http.StatusProcessing,
+			}
+			wrk.seenLink[linkq.url] = http.StatusProcessing
+			wrk.wg.Add(1)
+			go wrk.scan(linkq)
+		}
+	}()
+
+	var tick = time.NewTicker(500 * time.Millisecond)
+	var listWaitStatus []linkQueue
+	var isScanning = true
+	for isScanning {
+		select {
+		case resultq := <-wrk.resultq:
+			listWaitStatus = wrk.processResult(resultq, listWaitStatus)
+
+		case <-tick.C:
+			wrk.wg.Wait()
+			if len(wrk.resultq) != 0 {
+				continue
+			}
+			if len(listWaitStatus) != 0 {
+				// There are links that still waiting for
+				// scanning to be completed.
+				continue
+			}
+			isScanning = false
+		}
+	}
+	wrk.result.sort()
+	return wrk.result, nil
+}
+
+// processResult the resultq contains the original URL being scanned
+// and its child links.
+// For example, scanning "http://example.tld" result in
+//
+//	"http://example.tld": {status=200}
+//	"http://example.tld/page": {status=0}
+//	"http://example.tld/image.png": {status=0}
+//	"http://bad:domain/image.png": {status=700}
+func (wrk *worker) processResult(
+	resultq map[string]linkQueue, listWaitStatus []linkQueue,
+) (
+	newList []linkQueue,
+) {
+	for _, linkq := range resultq {
+		if linkq.status >= http.StatusBadRequest {
+			wrk.markBroken(linkq)
+			continue
+		}
+		if linkq.status != 0 {
+			// linkq is the result of scan with
+			// non error status.
+			wrk.seenLink[linkq.url] = linkq.status
+			continue
+		}
+
+		seenStatus, seen := wrk.seenLink[linkq.url]
+		if !seen {
+			wrk.seenLink[linkq.url] = http.StatusProcessing
+			wrk.wg.Add(1)
+			go wrk.scan(linkq)
+			continue
+		}
+		if seenStatus >= http.StatusBadRequest {
+			linkq.status = seenStatus
+			wrk.markBroken(linkq)
+			continue
+		}
+		if seenStatus >= http.StatusOK {
+			// The link has been processed and its
+			// not an error.
+			continue
+		}
+		// The link being processed by other goroutine.
+		linkq.status = seenStatus
+		newList = append(newList, linkq)
+	}
+	for _, linkq := range listWaitStatus {
+		seenStatus := wrk.seenLink[linkq.url]
+		if seenStatus >= http.StatusBadRequest {
+			linkq.status = seenStatus
+			wrk.markBroken(linkq)
+			continue
+		}
+		if seenStatus >= http.StatusOK {
+			continue
+		}
+		if seenStatus == http.StatusProcessing {
+			// Scanning still in progress.
+			newList = append(newList, linkq)
+			continue
+		}
+	}
+	return newList
+}
+
+func (wrk *worker) markBroken(linkq linkQueue) {
+	var parentUrl = linkq.parentUrl.String()
+	var listBroken = wrk.result.BrokenLinks[parentUrl]
+	var brokenLink = Broken{
+		Link: linkq.url,
+		Code: linkq.status,
+	}
+	if linkq.errScan != nil {
+		brokenLink.Error = linkq.errScan.Error()
+	}
+	listBroken = append(listBroken, brokenLink)
+	wrk.result.BrokenLinks[parentUrl] = listBroken
+
+	wrk.seenLink[linkq.url] = linkq.status
+}
+
+// scan fetch the HTML page or image to check if its valid.
+func (wrk *worker) scan(linkq linkQueue) {
+	defer func() {
+		if wrk.opts.IsVerbose && linkq.errScan != nil {
+			wrk.log.Printf("error: %d %s error=%v\n", linkq.status,
+				linkq.url, linkq.errScan)
+		}
+		wrk.wg.Done()
+	}()
+
+	var (
+		resultq  = map[string]linkQueue{}
+		httpResp *http.Response
+		err      error
+	)
+	httpResp, err = wrk.fetch(linkq)
+	if err != nil {
+		linkq.status = StatusBadLink
+		linkq.errScan = err
+		resultq[linkq.url] = linkq
+		go wrk.pushResult(resultq)
+		return
+	}
+	defer httpResp.Body.Close()
+
+	linkq.status = httpResp.StatusCode
+	resultq[linkq.url] = linkq
+
+	if httpResp.StatusCode >= http.StatusBadRequest {
+		go wrk.pushResult(resultq)
+		return
+	}
+	if linkq.kind == atom.Img || linkq.isExternal {
+		go wrk.pushResult(resultq)
+		return
+	}
+
+	var doc *html.Node
+	doc, _ = html.Parse(httpResp.Body)
+
+	// After we check the code and test for [html.Parse] there are
+	// no case actual cases where HTML content will return an error.
+	// The only possible error is when reading from body (io.Reader), and
+	// that is also almost impossible.
+	//
+	// [html.Parse]: https://go.googlesource.com/net/+/refs/tags/v0.40.0/html/parse.go#2347
+
+	var scanUrl *url.URL
+
+	scanUrl, err = url.Parse(linkq.url)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	var node *html.Node
+	for node = range doc.Descendants() {
+		if node.Type != html.ElementNode {
+			continue
+		}
+		var nodeLink *linkQueue
+		if node.DataAtom == atom.A {
+			for _, attr := range node.Attr {
+				if attr.Key != `href` {
+					continue
+				}
+				nodeLink = wrk.processLink(scanUrl, attr.Val, atom.A)
+				break
+			}
+		} else if node.DataAtom == atom.Img {
+			for _, attr := range node.Attr {
+				if attr.Key != `src` {
+					continue
+				}
+				nodeLink = wrk.processLink(scanUrl, attr.Val, atom.Img)
+				break
+			}
+		} else {
+			continue
+		}
+		if nodeLink == nil {
+			continue
+		}
+		_, seen := resultq[nodeLink.url]
+		if !seen {
+			nodeLink.checkExternal(wrk)
+			resultq[nodeLink.url] = *nodeLink
+		}
+	}
+	go wrk.pushResult(resultq)
+}
+
+func (wrk *worker) fetch(linkq linkQueue) (
+	httpResp *http.Response,
+	err error,
+) {
+	const maxRetry = 5
+	var retry int
+	for retry < 5 {
+		if linkq.kind == atom.Img {
+			if wrk.opts.IsVerbose {
+				wrk.log.Printf("scan: HEAD %s\n", linkq.url)
+			}
+			httpResp, err = http.Head(linkq.url)
+		} else {
+			if wrk.opts.IsVerbose {
+				wrk.log.Printf("scan: GET %s\n", linkq.url)
+			}
+			httpResp, err = http.Get(linkq.url)
+		}
+		if err == nil {
+			return httpResp, nil
+		}
+		var errDNS *net.DNSError
+		if !errors.As(err, &errDNS) {
+			return nil, err
+		}
+		if errDNS.Timeout() {
+			retry++
+		}
+	}
+	return nil, err
+}
+
+func (wrk *worker) processLink(parentUrl *url.URL, val string, kind atom.Atom) (
+	linkq *linkQueue,
+) {
+	if len(val) == 0 {
+		return nil
+	}
+
+	var newUrl *url.URL
+	var err error
+	newUrl, err = url.Parse(val)
+	if err != nil {
+		return &linkQueue{
+			parentUrl: parentUrl,
+			errScan:   err,
+			url:       val,
+			kind:      kind,
+			status:    StatusBadLink,
+		}
+	}
+	newUrl.Fragment = ""
+	newUrl.RawFragment = ""
+
+	if kind == atom.A && val[0] == '#' {
+		// Ignore link to ID, like `href="#element_id"`.
+		return nil
+	}
+	if strings.HasPrefix(val, `http`) {
+		return &linkQueue{
+			parentUrl: parentUrl,
+			url:       strings.TrimSuffix(newUrl.String(), `/`),
+			kind:      kind,
+		}
+	}
+	if val[0] == '/' {
+		// val is absolute to parent URL.
+		newUrl = wrk.baseUrl.JoinPath(newUrl.Path)
+	} else {
+		// val is relative to parent URL.
+		newUrl = parentUrl.JoinPath(`/`, newUrl.Path)
+	}
+	linkq = &linkQueue{
+		parentUrl: parentUrl,
+		url:       strings.TrimSuffix(newUrl.String(), `/`),
+		kind:      kind,
+	}
+	return linkq
+}
+
+func (wrk *worker) pushResult(resultq map[string]linkQueue) {
+	var tick = time.NewTicker(100 * time.Millisecond)
+	for {
+		select {
+		case wrk.resultq <- resultq:
+			tick.Stop()
+			return
+		case <-tick.C:
+		}
+	}
+}
diff --git a/brokenlinks_test.go b/brokenlinks_test.go
deleted file mode 100644
index 3818fbc..0000000
--- a/brokenlinks_test.go
+++ /dev/null
@@ -1,166 +0,0 @@
-// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
-// SPDX-License-Identifier: GPL-3.0-only
-
-package jarink_test
-
-import (
-	"encoding/json"
-	"net/http"
-	"testing"
-
-	"git.sr.ht/~shulhan/jarink"
-	"git.sr.ht/~shulhan/pakakeh.go/lib/test"
-)
-
-func TestBrokenlinks(t *testing.T) {
-	var testUrl = `http://` + testAddress
-
-	type testCase struct {
-		exp      map[string][]jarink.Broken
-		scanUrl  string
-		expError string
-	}
-
-	listCase := []testCase{{
-		scanUrl:  `127.0.0.1:14594`,
-		expError: `brokenlinks: invalid URL "127.0.0.1:14594"`,
-	}, {
-		scanUrl:  `http://127.0.0.1:14594`,
-		expError: `brokenlinks: Get "http://127.0.0.1:14594": dial tcp 127.0.0.1:14594: connect: connection refused`,
-	}, {
-		scanUrl: testUrl,
-		exp: map[string][]jarink.Broken{
-			testUrl: []jarink.Broken{
-				{
-					Link: testUrl + `/broken.png`,
-					Code: http.StatusNotFound,
-				}, {
-					Link: testUrl + `/brokenPage`,
-					Code: http.StatusNotFound,
-				}, {
-					Link:  `http://127.0.0.1:abc`,
-					Error: `parse "http://127.0.0.1:abc": invalid port ":abc" after host`,
-					Code:  jarink.StatusBadLink,
-				}, {
-					Link:  `http:/127.0.0.1:11836`,
-					Error: `Get "http:/127.0.0.1:11836": http: no Host in request URL`,
-					Code:  jarink.StatusBadLink,
-				},
-			},
-			testUrl + `/broken.html`: []jarink.Broken{
-				{
-					Link: testUrl + `/brokenPage`,
-					Code: http.StatusNotFound,
-				},
-			},
-			testUrl + `/page2`: []jarink.Broken{
-				{
-					Link: testUrl + `/broken.png`,
-					Code: http.StatusNotFound,
-				}, {
-					Link: testUrl + `/page2/broken/relative`,
-					Code: http.StatusNotFound,
-				}, {
-					Link: testUrl + `/page2/broken2.png`,
-					Code: http.StatusNotFound,
-				},
-			},
-		},
-	}, {
-		// Scanning on "/path" should not scan the the "/" or other
-		// pages other than below of "/path" itself.
-		scanUrl: testUrl + `/page2`,
-		exp: map[string][]jarink.Broken{
-			testUrl + `/page2`: []jarink.Broken{
-				{
-					Link: testUrl + `/broken.png`,
-					Code: http.StatusNotFound,
-				}, {
-					Link: testUrl + `/page2/broken/relative`,
-					Code: http.StatusNotFound,
-				}, {
-					Link: testUrl + `/page2/broken2.png`,
-					Code: http.StatusNotFound,
-				},
-			},
-		},
-	}}
-
-	var (
-		result *jarink.BrokenlinksResult
-		err    error
-	)
-	for _, tcase := range listCase {
-		t.Logf(`--- brokenlinks: %s`, tcase.scanUrl)
-		var brokenlinksOpts = jarink.BrokenlinksOptions{
-			Url: tcase.scanUrl,
-		}
-		result, err = jarink.Brokenlinks(brokenlinksOpts)
-		if err != nil {
-			test.Assert(t, tcase.scanUrl+` error`,
-				tcase.expError, err.Error())
-			continue
-		}
-		//got, _ := json.MarshalIndent(result.BrokenLinks, ``, `  `)
-		//t.Logf(`got=%s`, got)
-		test.Assert(t, tcase.scanUrl, tcase.exp, result.BrokenLinks)
-	}
-}
-
-// Test running Brokenlinks with file PastResultFile is set.
-// The PastResultFile is modified to only report errors on "/page2".
-func TestBrokenlinks_pastResult(t *testing.T) {
-	var testUrl = `http://` + testAddress
-
-	type testCase struct {
-		exp      map[string][]jarink.Broken
-		expError string
-		opts     jarink.BrokenlinksOptions
-	}
-
-	listCase := []testCase{{
-		// With invalid file.
-		opts: jarink.BrokenlinksOptions{
-			Url:            testUrl,
-			PastResultFile: `testdata/invalid`,
-		},
-		expError: `brokenlinks: open testdata/invalid: no such file or directory`,
-	}, {
-		// With valid file.
-		opts: jarink.BrokenlinksOptions{
-			Url:            testUrl,
-			PastResultFile: `testdata/past_result.json`,
-		},
-		exp: map[string][]jarink.Broken{
-			testUrl + `/page2`: []jarink.Broken{
-				{
-					Link: testUrl + `/broken.png`,
-					Code: http.StatusNotFound,
-				}, {
-					Link: testUrl + `/page2/broken/relative`,
-					Code: http.StatusNotFound,
-				}, {
-					Link: testUrl + `/page2/broken2.png`,
-					Code: http.StatusNotFound,
-				},
-			},
-		},
-	}}
-
-	var (
-		result *jarink.BrokenlinksResult
-		err    error
-	)
-	for _, tcase := range listCase {
-		t.Logf(`--- brokenlinks: %s`, tcase.opts.Url)
-		result, err = jarink.Brokenlinks(tcase.opts)
-		if err != nil {
-			test.Assert(t, tcase.opts.Url+` error`,
-				tcase.expError, err.Error())
-			continue
-		}
-		got, _ := json.MarshalIndent(result.BrokenLinks, ``, `  `)
-		t.Logf(`got=%s`, got)
-		test.Assert(t, tcase.opts.Url, tcase.exp, result.BrokenLinks)
-	}
-}
diff --git a/brokenlinks_worker.go b/brokenlinks_worker.go
deleted file mode 100644
index a4e854d..0000000
--- a/brokenlinks_worker.go
+++ /dev/null
@@ -1,467 +0,0 @@
-// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
-// SPDX-License-Identifier: GPL-3.0-only
-
-package jarink
-
-import (
-	"encoding/json"
-	"errors"
-	"fmt"
-	"log"
-	"net"
-	"net/http"
-	"net/url"
-	"os"
-	"strings"
-	"sync"
-	"time"
-
-	"golang.org/x/net/html"
-	"golang.org/x/net/html/atom"
-)
-
-type brokenlinksWorker struct {
-	// seenLink store the URL being or has been scanned and its HTTP
-	// status code.
-	seenLink map[string]int
-
-	// resultq channel that collect result from scanning.
-	resultq chan map[string]linkQueue
-
-	// result contains the final result after all of the pages has been
-	// scanned.
-	result *BrokenlinksResult
-
-	// pastResult containts the past scan result, loaded from file
-	// [BrokenlinksOptions.PastResultFile].
-	pastResult *BrokenlinksResult
-
-	// The base URL that will be joined to relative or absolute
-	// links or image.
-	baseUrl *url.URL
-
-	// The URL to scan.
-	scanUrl *url.URL
-
-	log *log.Logger
-
-	opts BrokenlinksOptions
-
-	// wg sync the goroutine scanner.
-	wg sync.WaitGroup
-}
-
-func newWorker(opts BrokenlinksOptions) (wrk *brokenlinksWorker, err error) {
-	wrk = &brokenlinksWorker{
-		opts:     opts,
-		seenLink: map[string]int{},
-		resultq:  make(chan map[string]linkQueue, 100),
-		result:   newBrokenlinksResult(),
-		log:      log.New(os.Stderr, ``, log.LstdFlags),
-	}
-
-	wrk.scanUrl, err = url.Parse(opts.Url)
-	if err != nil {
-		return nil, fmt.Errorf(`invalid URL %q`, opts.Url)
-	}
-	wrk.scanUrl.Path = strings.TrimSuffix(wrk.scanUrl.Path, `/`)
-	wrk.scanUrl.Fragment = ""
-	wrk.scanUrl.RawFragment = ""
-
-	wrk.baseUrl = &url.URL{
-		Scheme: wrk.scanUrl.Scheme,
-		Host:   wrk.scanUrl.Host,
-	}
-
-	if opts.PastResultFile == "" {
-		// Run with normal scan.
-		return wrk, nil
-	}
-
-	pastresult, err := os.ReadFile(opts.PastResultFile)
-	if err != nil {
-		return nil, err
-	}
-
-	wrk.pastResult = newBrokenlinksResult()
-	err = json.Unmarshal(pastresult, &wrk.pastResult)
-	if err != nil {
-		return nil, err
-	}
-
-	return wrk, nil
-}
-
-func (wrk *brokenlinksWorker) run() (result *BrokenlinksResult, err error) {
-	if wrk.pastResult == nil {
-		result, err = wrk.scanAll()
-	} else {
-		result, err = wrk.scanPastResult()
-	}
-	return result, err
-}
-
-// scanAll scan all pages start from [BrokenlinksOptions.Url].
-func (wrk *brokenlinksWorker) scanAll() (result *BrokenlinksResult, err error) {
-	// Scan the first URL to make sure that the server is reachable.
-	var firstLinkq = linkQueue{
-		parentUrl: nil,
-		url:       wrk.scanUrl.String(),
-		status:    http.StatusProcessing,
-	}
-	wrk.seenLink[firstLinkq.url] = http.StatusProcessing
-
-	wrk.wg.Add(1)
-	go wrk.scan(firstLinkq)
-	wrk.wg.Wait()
-
-	var resultq = <-wrk.resultq
-	for _, linkq := range resultq {
-		if linkq.url == firstLinkq.url {
-			if linkq.errScan != nil {
-				return nil, linkq.errScan
-			}
-			wrk.seenLink[linkq.url] = linkq.status
-			continue
-		}
-		if linkq.status >= http.StatusBadRequest {
-			wrk.markBroken(linkq)
-			continue
-		}
-
-		wrk.seenLink[linkq.url] = http.StatusProcessing
-		wrk.wg.Add(1)
-		go wrk.scan(linkq)
-	}
-
-	var tick = time.NewTicker(500 * time.Millisecond)
-	var listWaitStatus []linkQueue
-	var isScanning = true
-	for isScanning {
-		select {
-		case resultq := <-wrk.resultq:
-			listWaitStatus = wrk.processResult(resultq, listWaitStatus)
-
-		case <-tick.C:
-			wrk.wg.Wait()
-			if len(wrk.resultq) != 0 {
-				continue
-			}
-			if len(listWaitStatus) != 0 {
-				// There are links that still waiting for
-				// scanning to be completed.
-				continue
-			}
-			isScanning = false
-		}
-	}
-	wrk.result.sort()
-	return wrk.result, nil
-}
-
-// scanPastResult scan only pages reported inside
-// [BrokenlinksResult.BrokenLinks].
-func (wrk *brokenlinksWorker) scanPastResult() (
-	result *BrokenlinksResult, err error,
-) {
-	go func() {
-		for page := range wrk.pastResult.BrokenLinks {
-			var linkq = linkQueue{
-				parentUrl: nil,
-				url:       page,
-				status:    http.StatusProcessing,
-			}
-			wrk.seenLink[linkq.url] = http.StatusProcessing
-			wrk.wg.Add(1)
-			go wrk.scan(linkq)
-		}
-	}()
-
-	var tick = time.NewTicker(500 * time.Millisecond)
-	var listWaitStatus []linkQueue
-	var isScanning = true
-	for isScanning {
-		select {
-		case resultq := <-wrk.resultq:
-			listWaitStatus = wrk.processResult(resultq, listWaitStatus)
-
-		case <-tick.C:
-			wrk.wg.Wait()
-			if len(wrk.resultq) != 0 {
-				continue
-			}
-			if len(listWaitStatus) != 0 {
-				// There are links that still waiting for
-				// scanning to be completed.
-				continue
-			}
-			isScanning = false
-		}
-	}
-	wrk.result.sort()
-	return wrk.result, nil
-}
-
-// processResult the resultq contains the original URL being scanned
-// and its child links.
-// For example, scanning "http://example.tld" result in
-//
-//	"http://example.tld": {status=200}
-//	"http://example.tld/page": {status=0}
-//	"http://example.tld/image.png": {status=0}
-//	"http://bad:domain/image.png": {status=700}
-func (wrk *brokenlinksWorker) processResult(
-	resultq map[string]linkQueue, listWaitStatus []linkQueue,
-) (
-	newList []linkQueue,
-) {
-	for _, linkq := range resultq {
-		if linkq.status >= http.StatusBadRequest {
-			wrk.markBroken(linkq)
-			continue
-		}
-		if linkq.status != 0 {
-			// linkq is the result of scan with
-			// non error status.
-			wrk.seenLink[linkq.url] = linkq.status
-			continue
-		}
-
-		seenStatus, seen := wrk.seenLink[linkq.url]
-		if !seen {
-			wrk.seenLink[linkq.url] = http.StatusProcessing
-			wrk.wg.Add(1)
-			go wrk.scan(linkq)
-			continue
-		}
-		if seenStatus >= http.StatusBadRequest {
-			linkq.status = seenStatus
-			wrk.markBroken(linkq)
-			continue
-		}
-		if seenStatus >= http.StatusOK {
-			// The link has been processed and its
-			// not an error.
-			continue
-		}
-		// The link being processed by other goroutine.
-		linkq.status = seenStatus
-		newList = append(newList, linkq)
-	}
-	for _, linkq := range listWaitStatus {
-		seenStatus := wrk.seenLink[linkq.url]
-		if seenStatus >= http.StatusBadRequest {
-			linkq.status = seenStatus
-			wrk.markBroken(linkq)
-			continue
-		}
-		if seenStatus >= http.StatusOK {
-			continue
-		}
-		if seenStatus == http.StatusProcessing {
-			// Scanning still in progress.
-			newList = append(newList, linkq)
-			continue
-		}
-	}
-	return newList
-}
-
-func (wrk *brokenlinksWorker) markBroken(linkq linkQueue) {
-	var parentUrl = linkq.parentUrl.String()
-	var listBroken = wrk.result.BrokenLinks[parentUrl]
-	var brokenLink = Broken{
-		Link: linkq.url,
-		Code: linkq.status,
-	}
-	if linkq.errScan != nil {
-		brokenLink.Error = linkq.errScan.Error()
-	}
-	listBroken = append(listBroken, brokenLink)
-	wrk.result.BrokenLinks[parentUrl] = listBroken
-
-	wrk.seenLink[linkq.url] = linkq.status
-}
-
-// scan fetch the HTML page or image to check if its valid.
-func (wrk *brokenlinksWorker) scan(linkq linkQueue) {
-	defer func() {
-		if wrk.opts.IsVerbose && linkq.errScan != nil {
-			wrk.log.Printf("error: %d %s error=%v\n", linkq.status,
-				linkq.url, linkq.errScan)
-		}
-		wrk.wg.Done()
-	}()
-
-	var (
-		resultq  = map[string]linkQueue{}
-		httpResp *http.Response
-		err      error
-	)
-	httpResp, err = wrk.fetch(linkq)
-	if err != nil {
-		linkq.status = StatusBadLink
-		linkq.errScan = err
-		resultq[linkq.url] = linkq
-		go wrk.pushResult(resultq)
-		return
-	}
-	defer httpResp.Body.Close()
-
-	linkq.status = httpResp.StatusCode
-	resultq[linkq.url] = linkq
-
-	if httpResp.StatusCode >= http.StatusBadRequest {
-		go wrk.pushResult(resultq)
-		return
-	}
-	if linkq.kind == atom.Img || linkq.isExternal {
-		go wrk.pushResult(resultq)
-		return
-	}
-
-	var doc *html.Node
-	doc, _ = html.Parse(httpResp.Body)
-
-	// After we check the code and test for [html.Parse] there are
-	// no case actual cases where HTML content will return an error.
-	// The only possible error is when reading from body (io.Reader), and
-	// that is also almost impossible.
-	//
-	// [html.Parse]: https://go.googlesource.com/net/+/refs/tags/v0.40.0/html/parse.go#2347
-
-	var scanUrl *url.URL
-
-	scanUrl, err = url.Parse(linkq.url)
-	if err != nil {
-		log.Fatal(err)
-	}
-
-	var node *html.Node
-	for node = range doc.Descendants() {
-		if node.Type != html.ElementNode {
-			continue
-		}
-		var nodeLink *linkQueue
-		if node.DataAtom == atom.A {
-			for _, attr := range node.Attr {
-				if attr.Key != `href` {
-					continue
-				}
-				nodeLink = wrk.processLink(scanUrl, attr.Val, atom.A)
-				break
-			}
-		} else if node.DataAtom == atom.Img {
-			for _, attr := range node.Attr {
-				if attr.Key != `src` {
-					continue
-				}
-				nodeLink = wrk.processLink(scanUrl, attr.Val, atom.Img)
-				break
-			}
-		} else {
-			continue
-		}
-		if nodeLink == nil {
-			continue
-		}
-		_, seen := resultq[nodeLink.url]
-		if !seen {
-			nodeLink.checkExternal(wrk)
-			resultq[nodeLink.url] = *nodeLink
-		}
-	}
-	go wrk.pushResult(resultq)
-}
-
-func (wrk *brokenlinksWorker) fetch(linkq linkQueue) (
-	httpResp *http.Response,
-	err error,
-) {
-	const maxRetry = 5
-	var retry int
-	for retry < 5 {
-		if linkq.kind == atom.Img {
-			if wrk.opts.IsVerbose {
-				wrk.log.Printf("scan: HEAD %s\n", linkq.url)
-			}
-			httpResp, err = http.Head(linkq.url)
-		} else {
-			if wrk.opts.IsVerbose {
-				wrk.log.Printf("scan: GET %s\n", linkq.url)
-			}
-			httpResp, err = http.Get(linkq.url)
-		}
-		if err == nil {
-			return httpResp, nil
-		}
-		var errDNS *net.DNSError
-		if !errors.As(err, &errDNS) {
-			return nil, err
-		}
-		if errDNS.Timeout() {
-			retry++
-		}
-	}
-	return nil, err
-}
-
-func (wrk *brokenlinksWorker) processLink(parentUrl *url.URL, val string, kind atom.Atom) (
-	linkq *linkQueue,
-) {
-	if len(val) == 0 {
-		return nil
-	}
-
-	var newUrl *url.URL
-	var err error
-	newUrl, err = url.Parse(val)
-	if err != nil {
-		return &linkQueue{
-			parentUrl: parentUrl,
-			errScan:   err,
-			url:       val,
-			kind:      kind,
-			status:    StatusBadLink,
-		}
-	}
-	newUrl.Fragment = ""
-	newUrl.RawFragment = ""
-
-	if kind == atom.A && val[0] == '#' {
-		// Ignore link to ID, like `href="#element_id"`.
-		return nil
-	}
-	if strings.HasPrefix(val, `http`) {
-		return &linkQueue{
-			parentUrl: parentUrl,
-			url:       strings.TrimSuffix(newUrl.String(), `/`),
-			kind:      kind,
-		}
-	}
-	if val[0] == '/' {
-		// val is absolute to parent URL.
-		newUrl = wrk.baseUrl.JoinPath(newUrl.Path)
-	} else {
-		// val is relative to parent URL.
-		newUrl = parentUrl.JoinPath(`/`, newUrl.Path)
-	}
-	linkq = &linkQueue{
-		parentUrl: parentUrl,
-		url:       strings.TrimSuffix(newUrl.String(), `/`),
-		kind:      kind,
-	}
-	return linkq
-}
-
-func (wrk *brokenlinksWorker) pushResult(resultq map[string]linkQueue) {
-	var tick = time.NewTicker(100 * time.Millisecond)
-	for {
-		select {
-		case wrk.resultq <- resultq:
-			tick.Stop()
-			return
-		case <-tick.C:
-		}
-	}
-}
diff --git a/cmd/jarink/main.go b/cmd/jarink/main.go
index cba254f..b384032 100644
--- a/cmd/jarink/main.go
+++ b/cmd/jarink/main.go
@@ -12,17 +12,19 @@ import (
 	"strings"
 
 	"git.sr.ht/~shulhan/jarink"
+	"git.sr.ht/~shulhan/jarink/brokenlinks"
 )
 
 func main() {
 	log.SetFlags(0)
 
-	var brokenlinksOpts = jarink.BrokenlinksOptions{}
+	var optIsVerbose bool
+	var optPastResult string
 
-	flag.BoolVar(&brokenlinksOpts.IsVerbose, `verbose`, false,
+	flag.BoolVar(&optIsVerbose, `verbose`, false,
 		`Print additional information while running.`)
 
-	flag.StringVar(&brokenlinksOpts.PastResultFile, `past-result`, ``,
+	flag.StringVar(&optPastResult, `past-result`, ``,
 		`Scan only pages with broken links from the past JSON result.`)
 
 	flag.Parse()
@@ -31,15 +33,22 @@ func main() {
 	cmd = strings.ToLower(cmd)
 	switch cmd {
 	case `brokenlinks`:
-		brokenlinksOpts.Url = flag.Arg(1)
-		if brokenlinksOpts.Url == "" {
+		var opts = brokenlinks.Options{
+			IsVerbose:      optIsVerbose,
+			PastResultFile: optPastResult,
+		}
+
+		opts.Url = flag.Arg(1)
+		if opts.Url == "" {
 			log.Printf(`Missing argument URL to be scanned.`)
 			goto invalid_command
 		}
 
-		var result *jarink.BrokenlinksResult
-		var err error
-		result, err = jarink.Brokenlinks(brokenlinksOpts)
+		var (
+			result *brokenlinks.Result
+			err    error
+		)
+		result, err = brokenlinks.Scan(opts)
 		if err != nil {
 			log.Fatal(err.Error())
 		}
diff --git a/jarink_test.go b/jarink_test.go
deleted file mode 100644
index 91d38a0..0000000
--- a/jarink_test.go
+++ /dev/null
@@ -1,70 +0,0 @@
-// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
-// SPDX-License-Identifier: GPL-3.0-only
-
-package jarink_test
-
-import (
-	"log"
-	"net/http"
-	"os"
-	"testing"
-	"time"
-
-	libnet "git.sr.ht/~shulhan/pakakeh.go/lib/net"
-)
-
-// The test run two web servers that serve content on "testdata/web/".
-// The first web server is the one that we want to scan.
-// The second web server is external web server, where HTML pages should not
-// be parsed.
-
-const testAddress = `127.0.0.1:11836`
-const testExternalAddress = `127.0.0.1:11900`
-
-func TestMain(m *testing.M) {
-	log.SetFlags(0)
-	var httpDirWeb = http.Dir(`testdata/web`)
-	var fshandle = http.FileServer(httpDirWeb)
-
-	go func() {
-		var mux = http.NewServeMux()
-		mux.Handle(`/`, fshandle)
-		var testServer = &http.Server{
-			Addr:           testAddress,
-			Handler:        mux,
-			ReadTimeout:    10 * time.Second,
-			WriteTimeout:   10 * time.Second,
-			MaxHeaderBytes: 1 << 20,
-		}
-		var err = testServer.ListenAndServe()
-		if err != nil {
-			log.Fatal(err)
-		}
-	}()
-	go func() {
-		var mux = http.NewServeMux()
-		mux.Handle(`/`, fshandle)
-		var testServer = &http.Server{
-			Addr:           testExternalAddress,
-			Handler:        mux,
-			ReadTimeout:    10 * time.Second,
-			WriteTimeout:   10 * time.Second,
-			MaxHeaderBytes: 1 << 20,
-		}
-		var err = testServer.ListenAndServe()
-		if err != nil {
-			log.Fatal(err)
-		}
-	}()
-
-	var err = libnet.WaitAlive(`tcp`, testAddress, 5*time.Second)
-	if err != nil {
-		log.Fatal(err)
-	}
-	err = libnet.WaitAlive(`tcp`, testExternalAddress, 5*time.Second)
-	if err != nil {
-		log.Fatal(err)
-	}
-
-	os.Exit(m.Run())
-}
diff --git a/link_queue.go b/link_queue.go
deleted file mode 100644
index 1470115..0000000
--- a/link_queue.go
+++ /dev/null
@@ -1,55 +0,0 @@
-// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
-// SPDX-License-Identifier: GPL-3.0-only
-
-package jarink
-
-import (
-	"net/url"
-	"strings"
-
-	"golang.org/x/net/html/atom"
-)
-
-type linkQueue struct {
-	parentUrl *url.URL
-
-	// The error from scan.
-	errScan error
-
-	// url being scanned.
-	url string
-
-	// kind of url, its either an anchor or image.
-	// It set to 0 if url is the first URL being scanned.
-	kind atom.Atom
-
-	// isExternal if true the scan will issue HTTP method HEAD instead of
-	// GET.
-	isExternal bool
-
-	// Status of link after scan, its mostly used the HTTP status code.
-	// 0: link is the result of scan, not processed yet.
-	// StatusBadLink: link is invalid, not parseable or unreachable.
-	// 200 - 211: OK.
-	// 400 - 511: Error.
-	status int
-}
-
-// checkExternal set the isExternal field to be true if
-//
-// (1) [linkQueue.url] does not start with [brokenlinksWorker.scanUrl]
-//
-// (2) linkQueue is from scanPastResult, indicated by non-nil
-// [brokenlinksWorker.pastResult].
-// In this case, we did not want to scan the other pages from the same scanUrl
-// domain.
-func (linkq *linkQueue) checkExternal(wrk *brokenlinksWorker) {
-	if !strings.HasPrefix(linkq.url, wrk.scanUrl.String()) {
-		linkq.isExternal = true
-		return
-	}
-	if wrk.pastResult != nil {
-		linkq.isExternal = true
-		return
-	}
-}
diff --git a/testdata/past_result.json b/testdata/past_result.json
deleted file mode 100644
index ca29d35..0000000
--- a/testdata/past_result.json
+++ /dev/null
@@ -1,10 +0,0 @@
-{
-  "broken_links": {
-    "http://127.0.0.1:11836/page2": [
-      {
-        "link": "http://127.0.0.1:11836/",
-        "code": 404
-      }
-    ]
-  }
-}
diff --git a/testdata/past_result.json.license b/testdata/past_result.json.license
deleted file mode 100644
index 22616a9..0000000
--- a/testdata/past_result.json.license
+++ /dev/null
@@ -1,2 +0,0 @@
-SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
-SPDX-License-Identifier: GPL-3.0-only
diff --git a/testdata/web/broken.html b/testdata/web/broken.html
deleted file mode 100644
index 533e542..0000000
--- a/testdata/web/broken.html
+++ /dev/null
@@ -1,7 +0,0 @@
-<html>
-  <head></head>
-  <body>
-    <a href="/brokenPage"
-    <p>
-  </body>
-</html>
diff --git a/testdata/web/gopher.png b/testdata/web/gopher.png
deleted file mode 100644
index 79352be..0000000
Binary files a/testdata/web/gopher.png and /dev/null differ
diff --git a/testdata/web/index.html b/testdata/web/index.html
deleted file mode 100644
index 61a1f39..0000000
--- a/testdata/web/index.html
+++ /dev/null
@@ -1,22 +0,0 @@
-<!--
-SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
-SPDX-License-Identifier: GPL-3.0-only
--->
-<html>
-  <body>
-    <img src="/broken.png" />
-    <a href="/brokenPage">Broken page</a>
-    <img src="/gopher.png" />
-    <img width="200" src="" />
-    <a href="/page2">Page 2</a>
-    <a href="/broken.html">Broken HTML</a>
-    <a href="http://127.0.0.1:11900">External URL</a>
-    <!-- Error when fetching with GET -->
-    <a href="http:/127.0.0.1:11836">Invalid external URL</a>
-    <!-- Error when parsing URL -->
-    <a href="http://127.0.0.1:abc">Invalid URL port</a>
-    <!-- Fragment should be skipped and cleaned up -->
-    <a href="#goto_a">Same with href to "/"</a>
-    <a href="/page2#goto_a">Same with href to "/page2"</a>
-  </body>
-</html>
diff --git a/testdata/web/page2/index.html b/testdata/web/page2/index.html
deleted file mode 100644
index ae6b4ea..0000000
--- a/testdata/web/page2/index.html
+++ /dev/null
@@ -1,14 +0,0 @@
-<!--
-SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
-SPDX-License-Identifier: GPL-3.0-only
--->
-<html>
-  <body>
-    <img src="/broken.png" />
-    <img src="broken2.png" />
-    <a href="broken/relative">broken relative link</a>
-    <a href="/">Back with absolute path</a>
-    <a href="../">Back with relative path</a>
-    <a href="http://127.0.0.1:11900/page2">External URL page2</a>
-  </body>
-</html>
-- 
cgit v1.3