diff options
| author | Shulhan <ms@kilabit.info> | 2026-02-05 03:29:49 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2026-02-05 03:29:49 +0700 |
| commit | d8a892eb2f28b3ef4c2625c682d255f4f616cae2 (patch) | |
| tree | a29eb320d5f6d482521e619a73694dfca3eb17be | |
| parent | fa31e0a656d03fe3744c70a1171e3831647923c9 (diff) | |
| download | jarink-d8a892eb2f28b3ef4c2625c682d255f4f616cae2.tar.xz | |
brokenlinks: check for redirect during scan
If the request redirected, use the "Location" value in the response
header as the parent URL instead of from the original link in queue.
| -rw-r--r-- | brokenlinks/worker.go | 10 | ||||
| -rw-r--r-- | brokenlinks/worker_test.go | 4 |
2 files changed, 12 insertions, 2 deletions
diff --git a/brokenlinks/worker.go b/brokenlinks/worker.go index 7683730..06bdcc6 100644 --- a/brokenlinks/worker.go +++ b/brokenlinks/worker.go @@ -282,11 +282,17 @@ func (wrk *worker) scan(linkq jarink.Link) (resultq map[string]jarink.Link) { var parentUrl *url.URL - parentUrl, err = url.Parse(linkq.Url) + // Check and get the redirect location or use the original URL. + location := httpResp.Header.Get(`Location`) + if location == `` { + location = linkq.Url + } + + parentUrl, err = url.Parse(location) if err != nil { linkq.StatusCode = StatusBadLink linkq.ErrScan = err - resultq[linkq.Url] = linkq + resultq[location] = linkq return resultq } diff --git a/brokenlinks/worker_test.go b/brokenlinks/worker_test.go index 2867338..122221e 100644 --- a/brokenlinks/worker_test.go +++ b/brokenlinks/worker_test.go @@ -35,6 +35,10 @@ func TestGenURLRelative(t *testing.T) { parentURL: `https://domain/a/b/page.HTML`, relPath: `c`, expURL: `https://domain/a/b/c`, + }, { + parentURL: `https://domain/a/b/page.HTML`, + relPath: `../c.html`, + expURL: `https://domain/a/c.html`, }} for _, tc := range listCase { parentURL, _ := url.Parse(tc.parentURL) |
