aboutsummaryrefslogtreecommitdiff
path: root/brokenlinks/worker.go
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2026-02-04 17:03:47 +0700
committerShulhan <ms@kilabit.info>2026-02-04 17:03:47 +0700
commit6b5ed409a5f11ed437586c8b046bcfc43749361d (patch)
tree4c5d3851d1c8778e7bc2103ef9050d38eb1dfc28 /brokenlinks/worker.go
parenta33efc3992f58355eb98d7a5574df955952924b8 (diff)
downloadjarink-6b5ed409a5f11ed437586c8b046bcfc43749361d.tar.xz
brokenlinks: mark the link in queue as seen with status code 0
This is to fix double URL being pushed to queue.
Diffstat (limited to 'brokenlinks/worker.go')
-rw-r--r--brokenlinks/worker.go21
1 files changed, 11 insertions, 10 deletions
diff --git a/brokenlinks/worker.go b/brokenlinks/worker.go
index 4a9a1d9..09c8b12 100644
--- a/brokenlinks/worker.go
+++ b/brokenlinks/worker.go
@@ -178,14 +178,14 @@ func (wrk *worker) scanPastResult() (result *Result, err error) {
// - Otherwise push it to queue.
func (wrk *worker) processResult(resultq map[string]jarink.Link) {
var seen bool
- for _, linkq := range resultq {
- if linkq.StatusCode != 0 {
- wrk.seen(linkq)
+ for _, result := range resultq {
+ if result.StatusCode != 0 {
+ wrk.seen(result)
continue
}
- if linkq.IsExternal {
- var scannedLink = wrk.cache.Get(linkq.Url)
+ if result.IsExternal {
+ var scannedLink = wrk.cache.Get(result.Url)
if scannedLink != nil {
// The external link has been scanned
// previously.
@@ -193,17 +193,18 @@ func (wrk *worker) processResult(resultq map[string]jarink.Link) {
}
}
- linkq.StatusCode, seen = wrk.seenLink[linkq.Url]
+ result.StatusCode, seen = wrk.seenLink[result.Url]
if seen {
- if linkq.StatusCode >= http.StatusBadRequest {
+ if result.StatusCode >= http.StatusBadRequest {
// Different pages may have the same broken
// link.
- wrk.markAsBroken(linkq)
+ wrk.markAsBroken(result)
}
continue
}
- wrk.queue = append(wrk.queue, linkq)
- wrk.log.Printf(`queue %d: %s`, len(wrk.queue), linkq.Url)
+ wrk.queue = append(wrk.queue, result)
+ wrk.seenLink[result.Url] = result.StatusCode
+ wrk.log.Printf(`queue %d: %s`, len(wrk.queue), result.Url)
}
}