From ff0fb55d60f4c29ffb6b8a69c2be3469f794990a Mon Sep 17 00:00:00 2001 From: Shulhan Date: Sun, 1 Jun 2025 16:02:46 +0700 Subject: all: brokenlinks should scan only URL on given path Previously, if we pass the URL with path to brokenlinks, for example "web.tld/path" it will scan all of the pages in the website "web.tld". Now, it only scan the "/path" and its sub paths. --- README | 35 ++++++++++++++++++++++++++++++++++- brokenlinks_test.go | 25 ++----------------------- brokenlinks_worker.go | 2 +- 3 files changed, 37 insertions(+), 25 deletions(-) diff --git a/README b/README index a26722f..2d8b273 100644 --- a/README +++ b/README @@ -20,8 +20,41 @@ Available commands, Links will be scanned on anchor href attribute ("") or on the image src attribute (", + "Error": , + "Code": + }, + ... + ], + ... + } This command accept the following options, diff --git a/brokenlinks_test.go b/brokenlinks_test.go index c1a607f..1c43937 100644 --- a/brokenlinks_test.go +++ b/brokenlinks_test.go @@ -126,31 +126,10 @@ func TestBrokenlinks(t *testing.T) { }, }, }, { + // Scanning on "/path" should not scan the the "/" or other + // pages other than below of "/path" itself. scanUrl: testUrl + `/page2`, exp: map[string][]jarink.Broken{ - testUrl: []jarink.Broken{ - { - Link: testUrl + `/broken.png`, - Code: http.StatusNotFound, - }, { - Link: testUrl + `/brokenPage`, - Code: http.StatusNotFound, - }, { - Link: `http://127.0.0.1:abc`, - Error: `parse "http://127.0.0.1:abc": invalid port ":abc" after host`, - Code: jarink.StatusBadLink, - }, { - Link: `http:/127.0.0.1:11836`, - Error: `Head "http:/127.0.0.1:11836": http: no Host in request URL`, - Code: jarink.StatusBadLink, - }, - }, - testUrl + `/broken.html`: []jarink.Broken{ - { - Link: testUrl + `/brokenPage`, - Code: http.StatusNotFound, - }, - }, testUrl + `/page2`: []jarink.Broken{ { Link: testUrl + `/broken.png`, diff --git a/brokenlinks_worker.go b/brokenlinks_worker.go index a196d2f..5cc8c25 100644 --- a/brokenlinks_worker.go +++ b/brokenlinks_worker.go @@ -303,7 +303,7 @@ func (wrk *brokenlinksWorker) scan(linkq linkQueue) { } _, seen := resultq[nodeLink.url] if !seen { - if !strings.HasPrefix(nodeLink.url, wrk.baseUrl.String()) { + if !strings.HasPrefix(nodeLink.url, wrk.scanUrl.String()) { nodeLink.isExternal = true } resultq[nodeLink.url] = *nodeLink -- cgit v1.3