From d8a892eb2f28b3ef4c2625c682d255f4f616cae2 Mon Sep 17 00:00:00 2001 From: Shulhan Date: Thu, 5 Feb 2026 03:29:49 +0700 Subject: brokenlinks: check for redirect during scan If the request redirected, use the "Location" value in the response header as the parent URL instead of from the original link in queue. --- brokenlinks/worker.go | 10 ++++++++-- brokenlinks/worker_test.go | 4 ++++ 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'brokenlinks') diff --git a/brokenlinks/worker.go b/brokenlinks/worker.go index 7683730..06bdcc6 100644 --- a/brokenlinks/worker.go +++ b/brokenlinks/worker.go @@ -282,11 +282,17 @@ func (wrk *worker) scan(linkq jarink.Link) (resultq map[string]jarink.Link) { var parentUrl *url.URL - parentUrl, err = url.Parse(linkq.Url) + // Check and get the redirect location or use the original URL. + location := httpResp.Header.Get(`Location`) + if location == `` { + location = linkq.Url + } + + parentUrl, err = url.Parse(location) if err != nil { linkq.StatusCode = StatusBadLink linkq.ErrScan = err - resultq[linkq.Url] = linkq + resultq[location] = linkq return resultq } diff --git a/brokenlinks/worker_test.go b/brokenlinks/worker_test.go index 2867338..122221e 100644 --- a/brokenlinks/worker_test.go +++ b/brokenlinks/worker_test.go @@ -35,6 +35,10 @@ func TestGenURLRelative(t *testing.T) { parentURL: `https://domain/a/b/page.HTML`, relPath: `c`, expURL: `https://domain/a/b/c`, + }, { + parentURL: `https://domain/a/b/page.HTML`, + relPath: `../c.html`, + expURL: `https://domain/a/c.html`, }} for _, tc := range listCase { parentURL, _ := url.Parse(tc.parentURL) -- cgit v1.3