diff options
| author | Shulhan <ms@kilabit.info> | 2025-06-21 15:20:01 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2025-06-27 12:19:23 +0700 |
| commit | 1ca561ed0ecfa59b70a10191ac8e58cde90d126e (patch) | |
| tree | 80f0c65f7e9321ad92dfc1a53a444226cee4be3d /brokenlinks/link_queue.go | |
| parent | 8bc8fce1bd80b5a25c452ac5a24b1a1e3f5a4feb (diff) | |
| download | jarink-1ca561ed0ecfa59b70a10191ac8e58cde90d126e.tar.xz | |
brokenlinks: implement caching for external URLs
Any succesful fetch on external URLs, will be recorded into jarink
cache file, located in user's home cache directory.
For example, in Linux it would be `$HOME/.cache/jarink/cache.json`.
This help improve the future rescanning on the same or different target
URL, minimizing network requests.
Diffstat (limited to 'brokenlinks/link_queue.go')
| -rw-r--r-- | brokenlinks/link_queue.go | 21 |
1 files changed, 2 insertions, 19 deletions
diff --git a/brokenlinks/link_queue.go b/brokenlinks/link_queue.go index 6a7dd32..14bf8c7 100644 --- a/brokenlinks/link_queue.go +++ b/brokenlinks/link_queue.go @@ -5,7 +5,6 @@ package brokenlinks import ( "net/url" - "strings" "golang.org/x/net/html/atom" ) @@ -33,23 +32,7 @@ type linkQueue struct { // 200 - 211: OK. // 400 - 511: Error. status int -} -// checkExternal set the isExternal field to be true if -// -// (1) [linkQueue.url] does not start with [Options.Url] -// -// (2) linkQueue is from scanPastResult, indicated by non-nil -// [worker.pastResult]. -// In this case, we did not want to scan the other pages from the same scanUrl -// domain. -func (linkq *linkQueue) checkExternal(wrk *worker) { - if !strings.HasPrefix(linkq.url, wrk.opts.scanUrl.String()) { - linkq.isExternal = true - return - } - if wrk.pastResult != nil { - linkq.isExternal = true - return - } + // Size of the page, derived from HTTP response ContentLength. + size int64 } |
