11 files changed, 880 insertions, 0 deletions
diff --git a/brokenlinks/brokenlinks.go b/brokenlinks/brokenlinks.go
new file mode 100644
index 0000000..8ac458f
--- /dev/null
+++ b/brokenlinks/brokenlinks.go
@@ -0,0 +1,39 @@
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+// SPDX-License-Identifier: GPL-3.0-only
+
+package brokenlinks
+
+import (
+	"fmt"
+)
+
+const Version = `0.1.0`
+
+// StatusBadLink status for link that is not parseable by [url.Parse] or not
+// reachable during GET or HEAD, either timeout or IP or domain not exist.
+const StatusBadLink = 700
+
+// Options define the options for scanning broken links.
+type Options struct {
+	Url            string
+	PastResultFile string
+	IsVerbose      bool
+}
+
+// Scan the URL for broken links.
+func Scan(opts Options) (result *Result, err error) {
+	var logp = `brokenlinks`
+	var wrk *worker
+
+	wrk, err = newWorker(opts)
+	if err != nil {
+		return nil, fmt.Errorf(`%s: %s`, logp, err)
+	}
+
+	result, err = wrk.run()
+	if err != nil {
+		return nil, fmt.Errorf(`%s: %s`, logp, err)
+	}
+
+	return result, nil
+}
diff --git a/brokenlinks/brokenlinks_test.go b/brokenlinks/brokenlinks_test.go
new file mode 100644
index 0000000..367ae6c
--- /dev/null
+++ b/brokenlinks/brokenlinks_test.go
@@ -0,0 +1,227 @@
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+// SPDX-License-Identifier: GPL-3.0-only
+
+package brokenlinks_test
+
+import (
+	"encoding/json"
+	"log"
+	"net/http"
+	"os"
+	"testing"
+	"time"
+
+	libnet "git.sr.ht/~shulhan/pakakeh.go/lib/net"
+	"git.sr.ht/~shulhan/pakakeh.go/lib/test"
+
+	"git.sr.ht/~shulhan/jarink/brokenlinks"
+)
+
+// The test run two web servers that serve content on "testdata/web/".
+// The first web server is the one that we want to scan.
+// The second web server is external web server, where HTML pages should not
+// be parsed.
+
+const testAddress = `127.0.0.1:11836`
+const testExternalAddress = `127.0.0.1:11900`
+
+func TestMain(m *testing.M) {
+	log.SetFlags(0)
+	var httpDirWeb = http.Dir(`testdata/web`)
+	var fshandle = http.FileServer(httpDirWeb)
+
+	go func() {
+		var mux = http.NewServeMux()
+		mux.Handle(`/`, fshandle)
+		var testServer = &http.Server{
+			Addr:           testAddress,
+			Handler:        mux,
+			ReadTimeout:    10 * time.Second,
+			WriteTimeout:   10 * time.Second,
+			MaxHeaderBytes: 1 << 20,
+		}
+		var err = testServer.ListenAndServe()
+		if err != nil {
+			log.Fatal(err)
+		}
+	}()
+	go func() {
+		var mux = http.NewServeMux()
+		mux.Handle(`/`, fshandle)
+		var testServer = &http.Server{
+			Addr:           testExternalAddress,
+			Handler:        mux,
+			ReadTimeout:    10 * time.Second,
+			WriteTimeout:   10 * time.Second,
+			MaxHeaderBytes: 1 << 20,
+		}
+		var err = testServer.ListenAndServe()
+		if err != nil {
+			log.Fatal(err)
+		}
+	}()
+
+	var err = libnet.WaitAlive(`tcp`, testAddress, 5*time.Second)
+	if err != nil {
+		log.Fatal(err)
+	}
+	err = libnet.WaitAlive(`tcp`, testExternalAddress, 5*time.Second)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	os.Exit(m.Run())
+}
+
+func TestBrokenlinks(t *testing.T) {
+	var testUrl = `http://` + testAddress
+
+	type testCase struct {
+		exp      map[string][]brokenlinks.Broken
+		scanUrl  string
+		expError string
+	}
+
+	listCase := []testCase{{
+		scanUrl:  `127.0.0.1:14594`,
+		expError: `brokenlinks: invalid URL "127.0.0.1:14594"`,
+	}, {
+		scanUrl:  `http://127.0.0.1:14594`,
+		expError: `brokenlinks: Get "http://127.0.0.1:14594": dial tcp 127.0.0.1:14594: connect: connection refused`,
+	}, {
+		scanUrl: testUrl,
+		exp: map[string][]brokenlinks.Broken{
+			testUrl: []brokenlinks.Broken{
+				{
+					Link: testUrl + `/broken.png`,
+					Code: http.StatusNotFound,
+				}, {
+					Link: testUrl + `/brokenPage`,
+					Code: http.StatusNotFound,
+				}, {
+					Link:  `http://127.0.0.1:abc`,
+					Error: `parse "http://127.0.0.1:abc": invalid port ":abc" after host`,
+					Code:  brokenlinks.StatusBadLink,
+				}, {
+					Link:  `http:/127.0.0.1:11836`,
+					Error: `Get "http:/127.0.0.1:11836": http: no Host in request URL`,
+					Code:  brokenlinks.StatusBadLink,
+				},
+			},
+			testUrl + `/broken.html`: []brokenlinks.Broken{
+				{
+					Link: testUrl + `/brokenPage`,
+					Code: http.StatusNotFound,
+				},
+			},
+			testUrl + `/page2`: []brokenlinks.Broken{
+				{
+					Link: testUrl + `/broken.png`,
+					Code: http.StatusNotFound,
+				}, {
+					Link: testUrl + `/page2/broken/relative`,
+					Code: http.StatusNotFound,
+				}, {
+					Link: testUrl + `/page2/broken2.png`,
+					Code: http.StatusNotFound,
+				},
+			},
+		},
+	}, {
+		// Scanning on "/path" should not scan the the "/" or other
+		// pages other than below of "/path" itself.
+		scanUrl: testUrl + `/page2`,
+		exp: map[string][]brokenlinks.Broken{
+			testUrl + `/page2`: []brokenlinks.Broken{
+				{
+					Link: testUrl + `/broken.png`,
+					Code: http.StatusNotFound,
+				}, {
+					Link: testUrl + `/page2/broken/relative`,
+					Code: http.StatusNotFound,
+				}, {
+					Link: testUrl + `/page2/broken2.png`,
+					Code: http.StatusNotFound,
+				},
+			},
+		},
+	}}
+
+	var (
+		result *brokenlinks.Result
+		err    error
+	)
+	for _, tcase := range listCase {
+		t.Logf(`--- brokenlinks: %s`, tcase.scanUrl)
+		var opts = brokenlinks.Options{
+			Url: tcase.scanUrl,
+		}
+		result, err = brokenlinks.Scan(opts)
+		if err != nil {
+			test.Assert(t, tcase.scanUrl+` error`,
+				tcase.expError, err.Error())
+			continue
+		}
+		//got, _ := json.MarshalIndent(result.BrokenLinks, ``, `  `)
+		//t.Logf(`got=%s`, got)
+		test.Assert(t, tcase.scanUrl, tcase.exp, result.BrokenLinks)
+	}
+}
+
+// Test running Brokenlinks with file PastResultFile is set.
+// The PastResultFile is modified to only report errors on "/page2".
+func TestBrokenlinks_pastResult(t *testing.T) {
+	var testUrl = `http://` + testAddress
+
+	type testCase struct {
+		exp      map[string][]brokenlinks.Broken
+		expError string
+		opts     brokenlinks.Options
+	}
+
+	listCase := []testCase{{
+		// With invalid file.
+		opts: brokenlinks.Options{
+			Url:            testUrl,
+			PastResultFile: `testdata/invalid`,
+		},
+		expError: `brokenlinks: open testdata/invalid: no such file or directory`,
+	}, {
+		// With valid file.
+		opts: brokenlinks.Options{
+			Url:            testUrl,
+			PastResultFile: `testdata/past_result.json`,
+		},
+		exp: map[string][]brokenlinks.Broken{
+			testUrl + `/page2`: []brokenlinks.Broken{
+				{
+					Link: testUrl + `/broken.png`,
+					Code: http.StatusNotFound,
+				}, {
+					Link: testUrl + `/page2/broken/relative`,
+					Code: http.StatusNotFound,
+				}, {
+					Link: testUrl + `/page2/broken2.png`,
+					Code: http.StatusNotFound,
+				},
+			},
+		},
+	}}
+
+	var (
+		result *brokenlinks.Result
+		err    error
+	)
+	for _, tcase := range listCase {
+		t.Logf(`--- brokenlinks: %s`, tcase.opts.Url)
+		result, err = brokenlinks.Scan(tcase.opts)
+		if err != nil {
+			test.Assert(t, tcase.opts.Url+` error`,
+				tcase.expError, err.Error())
+			continue
+		}
+		got, _ := json.MarshalIndent(result.BrokenLinks, ``, `  `)
+		t.Logf(`got=%s`, got)
+		test.Assert(t, tcase.opts.Url, tcase.exp, result.BrokenLinks)
+	}
+}
diff --git a/brokenlinks/link_queue.go b/brokenlinks/link_queue.go
new file mode 100644
index 0000000..164a902
--- /dev/null
+++ b/brokenlinks/link_queue.go
@@ -0,0 +1,55 @@
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+// SPDX-License-Identifier: GPL-3.0-only
+
+package brokenlinks
+
+import (
+	"net/url"
+	"strings"
+
+	"golang.org/x/net/html/atom"
+)
+
+type linkQueue struct {
+	parentUrl *url.URL
+
+	// The error from scan.
+	errScan error
+
+	// url being scanned.
+	url string
+
+	// kind of url, its either an anchor or image.
+	// It set to 0 if url is the first URL being scanned.
+	kind atom.Atom
+
+	// isExternal if true the scan will issue HTTP method HEAD instead of
+	// GET.
+	isExternal bool
+
+	// Status of link after scan, its mostly used the HTTP status code.
+	// 0: link is the result of scan, not processed yet.
+	// StatusBadLink: link is invalid, not parseable or unreachable.
+	// 200 - 211: OK.
+	// 400 - 511: Error.
+	status int
+}
+
+// checkExternal set the isExternal field to be true if
+//
+// (1) [linkQueue.url] does not start with [worker.scanUrl]
+//
+// (2) linkQueue is from scanPastResult, indicated by non-nil
+// [worker.pastResult].
+// In this case, we did not want to scan the other pages from the same scanUrl
+// domain.
+func (linkq *linkQueue) checkExternal(wrk *worker) {
+	if !strings.HasPrefix(linkq.url, wrk.scanUrl.String()) {
+		linkq.isExternal = true
+		return
+	}
+	if wrk.pastResult != nil {
+		linkq.isExternal = true
+		return
+	}
+}
diff --git a/brokenlinks/result.go b/brokenlinks/result.go
new file mode 100644
index 0000000..676859b
--- /dev/null
+++ b/brokenlinks/result.go
@@ -0,0 +1,37 @@
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+// SPDX-License-Identifier: GPL-3.0-only
+
+package brokenlinks
+
+import (
+	"slices"
+	"strings"
+)
+
+// Broken store the broken link, HTTP status code, and the error message that
+// cause it.
+type Broken struct {
+	Link  string `json:"link"`
+	Error string `json:"error,omitempty"`
+	Code  int    `json:"code"`
+}
+
+// Result store the result of scanning for broken links.
+type Result struct {
+	// BrokenLinks store the page and its broken links.
+	BrokenLinks map[string][]Broken `json:"broken_links"`
+}
+
+func newResult() *Result {
+	return &Result{
+		BrokenLinks: map[string][]Broken{},
+	}
+}
+
+func (result *Result) sort() {
+	for _, listBroken := range result.BrokenLinks {
+		slices.SortFunc(listBroken, func(a, b Broken) int {
+			return strings.Compare(a.Link, b.Link)
+		})
+	}
+}
diff --git a/brokenlinks/testdata/past_result.json b/brokenlinks/testdata/past_result.json
new file mode 100644
index 0000000..ca29d35
--- /dev/null
+++ b/brokenlinks/testdata/past_result.json
@@ -0,0 +1,10 @@
+{
+  "broken_links": {
+    "http://127.0.0.1:11836/page2": [
+      {
+        "link": "http://127.0.0.1:11836/",
+        "code": 404
+      }
+    ]
+  }
+}
diff --git a/brokenlinks/testdata/past_result.json.license b/brokenlinks/testdata/past_result.json.license
new file mode 100644
index 0000000..22616a9
--- /dev/null
+++ b/brokenlinks/testdata/past_result.json.license
@@ -0,0 +1,2 @@
+SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+SPDX-License-Identifier: GPL-3.0-only
diff --git a/brokenlinks/testdata/web/broken.html b/brokenlinks/testdata/web/broken.html
new file mode 100644
index 0000000..533e542
--- /dev/null
+++ b/brokenlinks/testdata/web/broken.html
@@ -0,0 +1,7 @@
+<html>
+  <head></head>
+  <body>
+    <a href="/brokenPage"
+    <p>
+  </body>
+</html>
diff --git a/brokenlinks/testdata/web/gopher.png b/brokenlinks/testdata/web/gopher.png
new file mode 100644
index 0000000..79352be
--- /dev/null
+++ b/brokenlinks/testdata/web/gopher.png
diff --git a/brokenlinks/testdata/web/index.html b/brokenlinks/testdata/web/index.html
new file mode 100644
index 0000000..61a1f39
--- /dev/null
+++ b/brokenlinks/testdata/web/index.html
@@ -0,0 +1,22 @@
+<!--
+SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+SPDX-License-Identifier: GPL-3.0-only
+-->
+<html>
+  <body>
+    <img src="/broken.png" />
+    <a href="/brokenPage">Broken page</a>
+    <img src="/gopher.png" />
+    <img width="200" src="" />
+    <a href="/page2">Page 2</a>
+    <a href="/broken.html">Broken HTML</a>
+    <a href="http://127.0.0.1:11900">External URL</a>
+    <!-- Error when fetching with GET -->
+    <a href="http:/127.0.0.1:11836">Invalid external URL</a>
+    <!-- Error when parsing URL -->
+    <a href="http://127.0.0.1:abc">Invalid URL port</a>
+    <!-- Fragment should be skipped and cleaned up -->
+    <a href="#goto_a">Same with href to "/"</a>
+    <a href="/page2#goto_a">Same with href to "/page2"</a>
+  </body>
+</html>
diff --git a/brokenlinks/testdata/web/page2/index.html b/brokenlinks/testdata/web/page2/index.html
new file mode 100644
index 0000000..ae6b4ea
--- /dev/null
+++ b/brokenlinks/testdata/web/page2/index.html
@@ -0,0 +1,14 @@
+<!--
+SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+SPDX-License-Identifier: GPL-3.0-only
+-->
+<html>
+  <body>
+    <img src="/broken.png" />
+    <img src="broken2.png" />
+    <a href="broken/relative">broken relative link</a>
+    <a href="/">Back with absolute path</a>
+    <a href="../">Back with relative path</a>
+    <a href="http://127.0.0.1:11900/page2">External URL page2</a>
+  </body>
+</html>
diff --git a/brokenlinks/worker.go b/brokenlinks/worker.go
new file mode 100644
index 0000000..4ed56d2
--- /dev/null
+++ b/brokenlinks/worker.go
@@ -0,0 +1,467 @@
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+// SPDX-License-Identifier: GPL-3.0-only
+
+package brokenlinks
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"log"
+	"net"
+	"net/http"
+	"net/url"
+	"os"
+	"strings"
+	"sync"
+	"time"
+
+	"golang.org/x/net/html"
+	"golang.org/x/net/html/atom"
+)
+
+type worker struct {
+	// seenLink store the URL being or has been scanned and its HTTP
+	// status code.
+	seenLink map[string]int
+
+	// resultq channel that collect result from scanning.
+	resultq chan map[string]linkQueue
+
+	// result contains the final result after all of the pages has been
+	// scanned.
+	result *Result
+
+	// pastResult containts the past scan result, loaded from file
+	// [Options.PastResultFile].
+	pastResult *Result
+
+	// The base URL that will be joined to relative or absolute
+	// links or image.
+	baseUrl *url.URL
+
+	// The URL to scan.
+	scanUrl *url.URL
+
+	log *log.Logger
+
+	opts Options
+
+	// wg sync the goroutine scanner.
+	wg sync.WaitGroup
+}
+
+func newWorker(opts Options) (wrk *worker, err error) {
+	wrk = &worker{
+		opts:     opts,
+		seenLink: map[string]int{},
+		resultq:  make(chan map[string]linkQueue, 100),
+		result:   newResult(),
+		log:      log.New(os.Stderr, ``, log.LstdFlags),
+	}
+
+	wrk.scanUrl, err = url.Parse(opts.Url)
+	if err != nil {
+		return nil, fmt.Errorf(`invalid URL %q`, opts.Url)
+	}
+	wrk.scanUrl.Path = strings.TrimSuffix(wrk.scanUrl.Path, `/`)
+	wrk.scanUrl.Fragment = ""
+	wrk.scanUrl.RawFragment = ""
+
+	wrk.baseUrl = &url.URL{
+		Scheme: wrk.scanUrl.Scheme,
+		Host:   wrk.scanUrl.Host,
+	}
+
+	if opts.PastResultFile == "" {
+		// Run with normal scan.
+		return wrk, nil
+	}
+
+	pastresult, err := os.ReadFile(opts.PastResultFile)
+	if err != nil {
+		return nil, err
+	}
+
+	wrk.pastResult = newResult()
+	err = json.Unmarshal(pastresult, &wrk.pastResult)
+	if err != nil {
+		return nil, err
+	}
+
+	return wrk, nil
+}
+
+func (wrk *worker) run() (result *Result, err error) {
+	if wrk.pastResult == nil {
+		result, err = wrk.scanAll()
+	} else {
+		result, err = wrk.scanPastResult()
+	}
+	return result, err
+}
+
+// scanAll scan all pages start from [Options.Url].
+func (wrk *worker) scanAll() (result *Result, err error) {
+	// Scan the first URL to make sure that the server is reachable.
+	var firstLinkq = linkQueue{
+		parentUrl: nil,
+		url:       wrk.scanUrl.String(),
+		status:    http.StatusProcessing,
+	}
+	wrk.seenLink[firstLinkq.url] = http.StatusProcessing
+
+	wrk.wg.Add(1)
+	go wrk.scan(firstLinkq)
+	wrk.wg.Wait()
+
+	var resultq = <-wrk.resultq
+	for _, linkq := range resultq {
+		if linkq.url == firstLinkq.url {
+			if linkq.errScan != nil {
+				return nil, linkq.errScan
+			}
+			wrk.seenLink[linkq.url] = linkq.status
+			continue
+		}
+		if linkq.status >= http.StatusBadRequest {
+			wrk.markBroken(linkq)
+			continue
+		}
+
+		wrk.seenLink[linkq.url] = http.StatusProcessing
+		wrk.wg.Add(1)
+		go wrk.scan(linkq)
+	}
+
+	var tick = time.NewTicker(500 * time.Millisecond)
+	var listWaitStatus []linkQueue
+	var isScanning = true
+	for isScanning {
+		select {
+		case resultq := <-wrk.resultq:
+			listWaitStatus = wrk.processResult(resultq, listWaitStatus)
+
+		case <-tick.C:
+			wrk.wg.Wait()
+			if len(wrk.resultq) != 0 {
+				continue
+			}
+			if len(listWaitStatus) != 0 {
+				// There are links that still waiting for
+				// scanning to be completed.
+				continue
+			}
+			isScanning = false
+		}
+	}
+	wrk.result.sort()
+	return wrk.result, nil
+}
+
+// scanPastResult scan only pages reported inside
+// [Result.BrokenLinks].
+func (wrk *worker) scanPastResult() (
+	result *Result, err error,
+) {
+	go func() {
+		for page := range wrk.pastResult.BrokenLinks {
+			var linkq = linkQueue{
+				parentUrl: nil,
+				url:       page,
+				status:    http.StatusProcessing,
+			}
+			wrk.seenLink[linkq.url] = http.StatusProcessing
+			wrk.wg.Add(1)
+			go wrk.scan(linkq)
+		}
+	}()
+
+	var tick = time.NewTicker(500 * time.Millisecond)
+	var listWaitStatus []linkQueue
+	var isScanning = true
+	for isScanning {
+		select {
+		case resultq := <-wrk.resultq:
+			listWaitStatus = wrk.processResult(resultq, listWaitStatus)
+
+		case <-tick.C:
+			wrk.wg.Wait()
+			if len(wrk.resultq) != 0 {
+				continue
+			}
+			if len(listWaitStatus) != 0 {
+				// There are links that still waiting for
+				// scanning to be completed.
+				continue
+			}
+			isScanning = false
+		}
+	}
+	wrk.result.sort()
+	return wrk.result, nil
+}
+
+// processResult the resultq contains the original URL being scanned
+// and its child links.
+// For example, scanning "http://example.tld" result in
+//
+//	"http://example.tld": {status=200}
+//	"http://example.tld/page": {status=0}
+//	"http://example.tld/image.png": {status=0}
+//	"http://bad:domain/image.png": {status=700}
+func (wrk *worker) processResult(
+	resultq map[string]linkQueue, listWaitStatus []linkQueue,
+) (
+	newList []linkQueue,
+) {
+	for _, linkq := range resultq {
+		if linkq.status >= http.StatusBadRequest {
+			wrk.markBroken(linkq)
+			continue
+		}
+		if linkq.status != 0 {
+			// linkq is the result of scan with
+			// non error status.
+			wrk.seenLink[linkq.url] = linkq.status
+			continue
+		}
+
+		seenStatus, seen := wrk.seenLink[linkq.url]
+		if !seen {
+			wrk.seenLink[linkq.url] = http.StatusProcessing
+			wrk.wg.Add(1)
+			go wrk.scan(linkq)
+			continue
+		}
+		if seenStatus >= http.StatusBadRequest {
+			linkq.status = seenStatus
+			wrk.markBroken(linkq)
+			continue
+		}
+		if seenStatus >= http.StatusOK {
+			// The link has been processed and its
+			// not an error.
+			continue
+		}
+		// The link being processed by other goroutine.
+		linkq.status = seenStatus
+		newList = append(newList, linkq)
+	}
+	for _, linkq := range listWaitStatus {
+		seenStatus := wrk.seenLink[linkq.url]
+		if seenStatus >= http.StatusBadRequest {
+			linkq.status = seenStatus
+			wrk.markBroken(linkq)
+			continue
+		}
+		if seenStatus >= http.StatusOK {
+			continue
+		}
+		if seenStatus == http.StatusProcessing {
+			// Scanning still in progress.
+			newList = append(newList, linkq)
+			continue
+		}
+	}
+	return newList
+}
+
+func (wrk *worker) markBroken(linkq linkQueue) {
+	var parentUrl = linkq.parentUrl.String()
+	var listBroken = wrk.result.BrokenLinks[parentUrl]
+	var brokenLink = Broken{
+		Link: linkq.url,
+		Code: linkq.status,
+	}
+	if linkq.errScan != nil {
+		brokenLink.Error = linkq.errScan.Error()
+	}
+	listBroken = append(listBroken, brokenLink)
+	wrk.result.BrokenLinks[parentUrl] = listBroken
+
+	wrk.seenLink[linkq.url] = linkq.status
+}
+
+// scan fetch the HTML page or image to check if its valid.
+func (wrk *worker) scan(linkq linkQueue) {
+	defer func() {
+		if wrk.opts.IsVerbose && linkq.errScan != nil {
+			wrk.log.Printf("error: %d %s error=%v\n", linkq.status,
+				linkq.url, linkq.errScan)
+		}
+		wrk.wg.Done()
+	}()
+
+	var (
+		resultq  = map[string]linkQueue{}
+		httpResp *http.Response
+		err      error
+	)
+	httpResp, err = wrk.fetch(linkq)
+	if err != nil {
+		linkq.status = StatusBadLink
+		linkq.errScan = err
+		resultq[linkq.url] = linkq
+		go wrk.pushResult(resultq)
+		return
+	}
+	defer httpResp.Body.Close()
+
+	linkq.status = httpResp.StatusCode
+	resultq[linkq.url] = linkq
+
+	if httpResp.StatusCode >= http.StatusBadRequest {
+		go wrk.pushResult(resultq)
+		return
+	}
+	if linkq.kind == atom.Img || linkq.isExternal {
+		go wrk.pushResult(resultq)
+		return
+	}
+
+	var doc *html.Node
+	doc, _ = html.Parse(httpResp.Body)
+
+	// After we check the code and test for [html.Parse] there are
+	// no case actual cases where HTML content will return an error.
+	// The only possible error is when reading from body (io.Reader), and
+	// that is also almost impossible.
+	//
+	// [html.Parse]: https://go.googlesource.com/net/+/refs/tags/v0.40.0/html/parse.go#2347
+
+	var scanUrl *url.URL
+
+	scanUrl, err = url.Parse(linkq.url)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	var node *html.Node
+	for node = range doc.Descendants() {
+		if node.Type != html.ElementNode {
+			continue
+		}
+		var nodeLink *linkQueue
+		if node.DataAtom == atom.A {
+			for _, attr := range node.Attr {
+				if attr.Key != `href` {
+					continue
+				}
+				nodeLink = wrk.processLink(scanUrl, attr.Val, atom.A)
+				break
+			}
+		} else if node.DataAtom == atom.Img {
+			for _, attr := range node.Attr {
+				if attr.Key != `src` {
+					continue
+				}
+				nodeLink = wrk.processLink(scanUrl, attr.Val, atom.Img)
+				break
+			}
+		} else {
+			continue
+		}
+		if nodeLink == nil {
+			continue
+		}
+		_, seen := resultq[nodeLink.url]
+		if !seen {
+			nodeLink.checkExternal(wrk)
+			resultq[nodeLink.url] = *nodeLink
+		}
+	}
+	go wrk.pushResult(resultq)
+}
+
+func (wrk *worker) fetch(linkq linkQueue) (
+	httpResp *http.Response,
+	err error,
+) {
+	const maxRetry = 5
+	var retry int
+	for retry < 5 {
+		if linkq.kind == atom.Img {
+			if wrk.opts.IsVerbose {
+				wrk.log.Printf("scan: HEAD %s\n", linkq.url)
+			}
+			httpResp, err = http.Head(linkq.url)
+		} else {
+			if wrk.opts.IsVerbose {
+				wrk.log.Printf("scan: GET %s\n", linkq.url)
+			}
+			httpResp, err = http.Get(linkq.url)
+		}
+		if err == nil {
+			return httpResp, nil
+		}
+		var errDNS *net.DNSError
+		if !errors.As(err, &errDNS) {
+			return nil, err
+		}
+		if errDNS.Timeout() {
+			retry++
+		}
+	}
+	return nil, err
+}
+
+func (wrk *worker) processLink(parentUrl *url.URL, val string, kind atom.Atom) (
+	linkq *linkQueue,
+) {
+	if len(val) == 0 {
+		return nil
+	}
+
+	var newUrl *url.URL
+	var err error
+	newUrl, err = url.Parse(val)
+	if err != nil {
+		return &linkQueue{
+			parentUrl: parentUrl,
+			errScan:   err,
+			url:       val,
+			kind:      kind,
+			status:    StatusBadLink,
+		}
+	}
+	newUrl.Fragment = ""
+	newUrl.RawFragment = ""
+
+	if kind == atom.A && val[0] == '#' {
+		// Ignore link to ID, like `href="#element_id"`.
+		return nil
+	}
+	if strings.HasPrefix(val, `http`) {
+		return &linkQueue{
+			parentUrl: parentUrl,
+			url:       strings.TrimSuffix(newUrl.String(), `/`),
+			kind:      kind,
+		}
+	}
+	if val[0] == '/' {
+		// val is absolute to parent URL.
+		newUrl = wrk.baseUrl.JoinPath(newUrl.Path)
+	} else {
+		// val is relative to parent URL.
+		newUrl = parentUrl.JoinPath(`/`, newUrl.Path)
+	}
+	linkq = &linkQueue{
+		parentUrl: parentUrl,
+		url:       strings.TrimSuffix(newUrl.String(), `/`),
+		kind:      kind,
+	}
+	return linkq
+}
+
+func (wrk *worker) pushResult(resultq map[string]linkQueue) {
+	var tick = time.NewTicker(100 * time.Millisecond)
+	for {
+		select {
+		case wrk.resultq <- resultq:
+			tick.Stop()
+			return
+		case <-tick.C:
+		}
+	}
+}