diff options
| author | Shulhan <ms@kilabit.info> | 2025-06-04 10:07:24 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2025-06-05 00:34:33 +0700 |
| commit | c044c4edd47c260ae6137b7116c0891cd8b979d1 (patch) | |
| tree | 3476ede01fccad10dc763fdad40ce57892c63a31 /link_queue.go | |
| parent | ec44df63c7fdba18201992e83574fdce1e3bdc8f (diff) | |
| download | jarink-c044c4edd47c260ae6137b7116c0891cd8b979d1.tar.xz | |
all: add option to scan pass result
The brokenlinks command now have option "-past-result" that accept
path to JSON file from the past result.
If its set, the program will only scan the pages with broken links
inside that report.
Diffstat (limited to 'link_queue.go')
| -rw-r--r-- | link_queue.go | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/link_queue.go b/link_queue.go index 0b419b8..1470115 100644 --- a/link_queue.go +++ b/link_queue.go @@ -5,6 +5,7 @@ package jarink import ( "net/url" + "strings" "golang.org/x/net/html/atom" ) @@ -33,3 +34,22 @@ type linkQueue struct { // 400 - 511: Error. status int } + +// checkExternal set the isExternal field to be true if +// +// (1) [linkQueue.url] does not start with [brokenlinksWorker.scanUrl] +// +// (2) linkQueue is from scanPastResult, indicated by non-nil +// [brokenlinksWorker.pastResult]. +// In this case, we did not want to scan the other pages from the same scanUrl +// domain. +func (linkq *linkQueue) checkExternal(wrk *brokenlinksWorker) { + if !strings.HasPrefix(linkq.url, wrk.scanUrl.String()) { + linkq.isExternal = true + return + } + if wrk.pastResult != nil { + linkq.isExternal = true + return + } +} |
