aboutsummaryrefslogtreecommitdiff
path: root/worker.go
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2025-05-29 14:04:51 +0700
committerShulhan <ms@kilabit.info>2025-05-29 14:04:51 +0700
commitb0c320e436ff5cdc70ad38a980a2af2a7f3e5dfd (patch)
treed7fb2016f426d51b72d506f22345634300528fdf /worker.go
parent3d39941514395137610fb1c58768814a390b7c35 (diff)
downloadjarink-b0c320e436ff5cdc70ad38a980a2af2a7f3e5dfd.tar.xz
all: ignore HTML page from external domain
Any HTML link that is from domain other than the scanned domain should net get parsed. It only check if the link is valid or not.
Diffstat (limited to 'worker.go')
-rw-r--r--worker.go4
1 files changed, 4 insertions, 0 deletions
diff --git a/worker.go b/worker.go
index ac25bf4..700c9a5 100644
--- a/worker.go
+++ b/worker.go
@@ -137,6 +137,10 @@ func (wrk *worker) scan(linkq linkQueue) {
if linkq.kind == atom.Img {
return
}
+ if !strings.HasPrefix(linkq.url, wrk.baseUrl.String()) {
+ // Do not parse the page from external domain.
+ return
+ }
err = wrk.parseHTML(linkq.url, httpResp.Body)
if err != nil {
wrk.errq <- fmt.Errorf(`%s %s: %w`, logp, linkq.url, err)