diff options
| -rw-r--r-- | README | 35 | ||||
| -rw-r--r-- | brokenlinks_test.go | 25 | ||||
| -rw-r--r-- | brokenlinks_worker.go | 2 |
3 files changed, 37 insertions, 25 deletions
@@ -20,8 +20,41 @@ Available commands, Links will be scanned on anchor href attribute ("<a href=...>") or on the image src attribute ("<img src=..."). + The URL can be start from base or from sub path. + Scanning from path only report brokenlinks on that path and their + sub paths. + For example, given a website that have the following pages, + + - web.tld (base) + - web.tld/page1 + - web.tld/page1/sub1 + - web.tld/page2 + - web.tld/page2/sub1 + + Invoking brokenlinks with + + $ jarink brokenlinks https://web.tld + + will scan all of the pages, but invoking brokenlinks on path + "/page2" + + $ jarink brokenlinks https://web.tld/page2 + + only scan "/page2" and "/page2/sub1". + Once finished it will print the page and list of broken links in - JSON format to standard output. + JSON format to standard output, + + { + "$PAGE": [{ + "Link": <string>, + "Error": <string>, + "Code": <integer> + }, + ... + ], + ... + } This command accept the following options, diff --git a/brokenlinks_test.go b/brokenlinks_test.go index c1a607f..1c43937 100644 --- a/brokenlinks_test.go +++ b/brokenlinks_test.go @@ -126,31 +126,10 @@ func TestBrokenlinks(t *testing.T) { }, }, }, { + // Scanning on "/path" should not scan the the "/" or other + // pages other than below of "/path" itself. scanUrl: testUrl + `/page2`, exp: map[string][]jarink.Broken{ - testUrl: []jarink.Broken{ - { - Link: testUrl + `/broken.png`, - Code: http.StatusNotFound, - }, { - Link: testUrl + `/brokenPage`, - Code: http.StatusNotFound, - }, { - Link: `http://127.0.0.1:abc`, - Error: `parse "http://127.0.0.1:abc": invalid port ":abc" after host`, - Code: jarink.StatusBadLink, - }, { - Link: `http:/127.0.0.1:11836`, - Error: `Head "http:/127.0.0.1:11836": http: no Host in request URL`, - Code: jarink.StatusBadLink, - }, - }, - testUrl + `/broken.html`: []jarink.Broken{ - { - Link: testUrl + `/brokenPage`, - Code: http.StatusNotFound, - }, - }, testUrl + `/page2`: []jarink.Broken{ { Link: testUrl + `/broken.png`, diff --git a/brokenlinks_worker.go b/brokenlinks_worker.go index a196d2f..5cc8c25 100644 --- a/brokenlinks_worker.go +++ b/brokenlinks_worker.go @@ -303,7 +303,7 @@ func (wrk *brokenlinksWorker) scan(linkq linkQueue) { } _, seen := resultq[nodeLink.url] if !seen { - if !strings.HasPrefix(nodeLink.url, wrk.baseUrl.String()) { + if !strings.HasPrefix(nodeLink.url, wrk.scanUrl.String()) { nodeLink.isExternal = true } resultq[nodeLink.url] = *nodeLink |
