aboutsummaryrefslogtreecommitdiff
path: root/brokenlinks/link_queue.go
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2025-06-21 15:20:01 +0700
committerShulhan <ms@kilabit.info>2025-06-27 12:19:23 +0700
commit1ca561ed0ecfa59b70a10191ac8e58cde90d126e (patch)
tree80f0c65f7e9321ad92dfc1a53a444226cee4be3d /brokenlinks/link_queue.go
parent8bc8fce1bd80b5a25c452ac5a24b1a1e3f5a4feb (diff)
downloadjarink-1ca561ed0ecfa59b70a10191ac8e58cde90d126e.tar.xz
brokenlinks: implement caching for external URLs
Any succesful fetch on external URLs, will be recorded into jarink cache file, located in user's home cache directory. For example, in Linux it would be `$HOME/.cache/jarink/cache.json`. This help improve the future rescanning on the same or different target URL, minimizing network requests.
Diffstat (limited to 'brokenlinks/link_queue.go')
-rw-r--r--brokenlinks/link_queue.go21
1 files changed, 2 insertions, 19 deletions
diff --git a/brokenlinks/link_queue.go b/brokenlinks/link_queue.go
index 6a7dd32..14bf8c7 100644
--- a/brokenlinks/link_queue.go
+++ b/brokenlinks/link_queue.go
@@ -5,7 +5,6 @@ package brokenlinks
import (
"net/url"
- "strings"
"golang.org/x/net/html/atom"
)
@@ -33,23 +32,7 @@ type linkQueue struct {
// 200 - 211: OK.
// 400 - 511: Error.
status int
-}
-// checkExternal set the isExternal field to be true if
-//
-// (1) [linkQueue.url] does not start with [Options.Url]
-//
-// (2) linkQueue is from scanPastResult, indicated by non-nil
-// [worker.pastResult].
-// In this case, we did not want to scan the other pages from the same scanUrl
-// domain.
-func (linkq *linkQueue) checkExternal(wrk *worker) {
- if !strings.HasPrefix(linkq.url, wrk.opts.scanUrl.String()) {
- linkq.isExternal = true
- return
- }
- if wrk.pastResult != nil {
- linkq.isExternal = true
- return
- }
+ // Size of the page, derived from HTTP response ContentLength.
+ size int64
}