summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2026-02-05 03:29:49 +0700
committerShulhan <ms@kilabit.info>2026-02-05 03:29:49 +0700
commitd8a892eb2f28b3ef4c2625c682d255f4f616cae2 (patch)
treea29eb320d5f6d482521e619a73694dfca3eb17be
parentfa31e0a656d03fe3744c70a1171e3831647923c9 (diff)
downloadjarink-d8a892eb2f28b3ef4c2625c682d255f4f616cae2.tar.xz
brokenlinks: check for redirect during scan
If the request redirected, use the "Location" value in the response header as the parent URL instead of from the original link in queue.
-rw-r--r--brokenlinks/worker.go10
-rw-r--r--brokenlinks/worker_test.go4
2 files changed, 12 insertions, 2 deletions
diff --git a/brokenlinks/worker.go b/brokenlinks/worker.go
index 7683730..06bdcc6 100644
--- a/brokenlinks/worker.go
+++ b/brokenlinks/worker.go
@@ -282,11 +282,17 @@ func (wrk *worker) scan(linkq jarink.Link) (resultq map[string]jarink.Link) {
var parentUrl *url.URL
- parentUrl, err = url.Parse(linkq.Url)
+ // Check and get the redirect location or use the original URL.
+ location := httpResp.Header.Get(`Location`)
+ if location == `` {
+ location = linkq.Url
+ }
+
+ parentUrl, err = url.Parse(location)
if err != nil {
linkq.StatusCode = StatusBadLink
linkq.ErrScan = err
- resultq[linkq.Url] = linkq
+ resultq[location] = linkq
return resultq
}
diff --git a/brokenlinks/worker_test.go b/brokenlinks/worker_test.go
index 2867338..122221e 100644
--- a/brokenlinks/worker_test.go
+++ b/brokenlinks/worker_test.go
@@ -35,6 +35,10 @@ func TestGenURLRelative(t *testing.T) {
parentURL: `https://domain/a/b/page.HTML`,
relPath: `c`,
expURL: `https://domain/a/b/c`,
+ }, {
+ parentURL: `https://domain/a/b/page.HTML`,
+ relPath: `../c.html`,
+ expURL: `https://domain/a/c.html`,
}}
for _, tc := range listCase {
parentURL, _ := url.Parse(tc.parentURL)