diff options
| author | Shulhan <ms@kilabit.info> | 2026-02-05 03:29:49 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2026-02-05 03:29:49 +0700 |
| commit | d8a892eb2f28b3ef4c2625c682d255f4f616cae2 (patch) | |
| tree | a29eb320d5f6d482521e619a73694dfca3eb17be /brokenlinks | |
| parent | fa31e0a656d03fe3744c70a1171e3831647923c9 (diff) | |
| download | jarink-d8a892eb2f28b3ef4c2625c682d255f4f616cae2.tar.xz | |
brokenlinks: check for redirect during scan
If the request redirected, use the "Location" value in the response
header as the parent URL instead of from the original link in queue.
Diffstat (limited to 'brokenlinks')
| -rw-r--r-- | brokenlinks/worker.go | 10 | ||||
| -rw-r--r-- | brokenlinks/worker_test.go | 4 |
2 files changed, 12 insertions, 2 deletions
diff --git a/brokenlinks/worker.go b/brokenlinks/worker.go index 7683730..06bdcc6 100644 --- a/brokenlinks/worker.go +++ b/brokenlinks/worker.go @@ -282,11 +282,17 @@ func (wrk *worker) scan(linkq jarink.Link) (resultq map[string]jarink.Link) { var parentUrl *url.URL - parentUrl, err = url.Parse(linkq.Url) + // Check and get the redirect location or use the original URL. + location := httpResp.Header.Get(`Location`) + if location == `` { + location = linkq.Url + } + + parentUrl, err = url.Parse(location) if err != nil { linkq.StatusCode = StatusBadLink linkq.ErrScan = err - resultq[linkq.Url] = linkq + resultq[location] = linkq return resultq } diff --git a/brokenlinks/worker_test.go b/brokenlinks/worker_test.go index 2867338..122221e 100644 --- a/brokenlinks/worker_test.go +++ b/brokenlinks/worker_test.go @@ -35,6 +35,10 @@ func TestGenURLRelative(t *testing.T) { parentURL: `https://domain/a/b/page.HTML`, relPath: `c`, expURL: `https://domain/a/b/c`, + }, { + parentURL: `https://domain/a/b/page.HTML`, + relPath: `../c.html`, + expURL: `https://domain/a/c.html`, }} for _, tc := range listCase { parentURL, _ := url.Parse(tc.parentURL) |
