diff options
Diffstat (limited to 'link_queue.go')
| -rw-r--r-- | link_queue.go | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/link_queue.go b/link_queue.go index 0b419b8..1470115 100644 --- a/link_queue.go +++ b/link_queue.go @@ -5,6 +5,7 @@ package jarink import ( "net/url" + "strings" "golang.org/x/net/html/atom" ) @@ -33,3 +34,22 @@ type linkQueue struct { // 400 - 511: Error. status int } + +// checkExternal set the isExternal field to be true if +// +// (1) [linkQueue.url] does not start with [brokenlinksWorker.scanUrl] +// +// (2) linkQueue is from scanPastResult, indicated by non-nil +// [brokenlinksWorker.pastResult]. +// In this case, we did not want to scan the other pages from the same scanUrl +// domain. +func (linkq *linkQueue) checkExternal(wrk *brokenlinksWorker) { + if !strings.HasPrefix(linkq.url, wrk.scanUrl.String()) { + linkq.isExternal = true + return + } + if wrk.pastResult != nil { + linkq.isExternal = true + return + } +} |
