diff options
| author | Shulhan <ms@kilabit.info> | 2026-02-04 17:03:47 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2026-02-04 17:03:47 +0700 |
| commit | 6b5ed409a5f11ed437586c8b046bcfc43749361d (patch) | |
| tree | 4c5d3851d1c8778e7bc2103ef9050d38eb1dfc28 /brokenlinks/worker.go | |
| parent | a33efc3992f58355eb98d7a5574df955952924b8 (diff) | |
| download | jarink-6b5ed409a5f11ed437586c8b046bcfc43749361d.tar.xz | |
brokenlinks: mark the link in queue as seen with status code 0
This is to fix double URL being pushed to queue.
Diffstat (limited to 'brokenlinks/worker.go')
| -rw-r--r-- | brokenlinks/worker.go | 21 |
1 files changed, 11 insertions, 10 deletions
diff --git a/brokenlinks/worker.go b/brokenlinks/worker.go index 4a9a1d9..09c8b12 100644 --- a/brokenlinks/worker.go +++ b/brokenlinks/worker.go @@ -178,14 +178,14 @@ func (wrk *worker) scanPastResult() (result *Result, err error) { // - Otherwise push it to queue. func (wrk *worker) processResult(resultq map[string]jarink.Link) { var seen bool - for _, linkq := range resultq { - if linkq.StatusCode != 0 { - wrk.seen(linkq) + for _, result := range resultq { + if result.StatusCode != 0 { + wrk.seen(result) continue } - if linkq.IsExternal { - var scannedLink = wrk.cache.Get(linkq.Url) + if result.IsExternal { + var scannedLink = wrk.cache.Get(result.Url) if scannedLink != nil { // The external link has been scanned // previously. @@ -193,17 +193,18 @@ func (wrk *worker) processResult(resultq map[string]jarink.Link) { } } - linkq.StatusCode, seen = wrk.seenLink[linkq.Url] + result.StatusCode, seen = wrk.seenLink[result.Url] if seen { - if linkq.StatusCode >= http.StatusBadRequest { + if result.StatusCode >= http.StatusBadRequest { // Different pages may have the same broken // link. - wrk.markAsBroken(linkq) + wrk.markAsBroken(result) } continue } - wrk.queue = append(wrk.queue, linkq) - wrk.log.Printf(`queue %d: %s`, len(wrk.queue), linkq.Url) + wrk.queue = append(wrk.queue, result) + wrk.seenLink[result.Url] = result.StatusCode + wrk.log.Printf(`queue %d: %s`, len(wrk.queue), result.Url) } } |
