aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2025-07-02 02:07:43 +0700
committerShulhan <ms@kilabit.info>2025-11-20 17:12:19 +0700
commit5301c666eec35699bbb9024678bb37adc057404c (patch)
tree384516772bd4f6fc6cbcd08eea407dda89d4bee8
parentc5d26d9a6c039816ce8369c34dd5ea33fac660a1 (diff)
downloadjarink-5301c666eec35699bbb9024678bb37adc057404c.tar.xz
brokenlinks: fix infinite loop on unknown host
On link with invalid domain, it should break and return the error immediately.
-rw-r--r--brokenlinks/brokenlinks_test.go4
-rw-r--r--brokenlinks/testdata/exp_cache.json4
-rw-r--r--brokenlinks/testdata/web/index.html3
-rw-r--r--brokenlinks/worker.go7
4 files changed, 14 insertions, 4 deletions
diff --git a/brokenlinks/brokenlinks_test.go b/brokenlinks/brokenlinks_test.go
index f957ae3..b49bcc5 100644
--- a/brokenlinks/brokenlinks_test.go
+++ b/brokenlinks/brokenlinks_test.go
@@ -264,6 +264,10 @@ func TestScan(t *testing.T) {
Link: `http:/127.0.0.1:11836`,
Error: `Get "http:/127.0.0.1:11836": http: no Host in request URL`,
Code: brokenlinks.StatusBadLink,
+ }, {
+ Link: `https://domain`,
+ Error: `Get "https://domain": dial tcp: lookup domain: no such host`,
+ Code: 700,
},
},
testUrl + `/broken.html`: []brokenlinks.Broken{
diff --git a/brokenlinks/testdata/exp_cache.json b/brokenlinks/testdata/exp_cache.json
index 563164d..8b84ff7 100644
--- a/brokenlinks/testdata/exp_cache.json
+++ b/brokenlinks/testdata/exp_cache.json
@@ -2,7 +2,7 @@
"scanned_links": {
"http://127.0.0.1:11900": {
"url": "http://127.0.0.1:11900",
- "size": 976,
+ "size": 1064,
"response_code": 200
},
"http://127.0.0.1:11900/page2": {
@@ -12,7 +12,7 @@
},
"https://127.0.0.1:11838": {
"url": "https://127.0.0.1:11838",
- "size": 976,
+ "size": 1064,
"response_code": 200
}
}
diff --git a/brokenlinks/testdata/web/index.html b/brokenlinks/testdata/web/index.html
index 596d374..88f1184 100644
--- a/brokenlinks/testdata/web/index.html
+++ b/brokenlinks/testdata/web/index.html
@@ -29,5 +29,8 @@ SPDX-License-Identifier: GPL-3.0-only
<!-- Pages with insecure TLS -->
<a href="https://127.0.0.1:11838">Insecure pages</a>
+
+ <!-- Pages with invalid domain -->
+ <a href="https://domain">Invalid domain</a>
</body>
</html>
diff --git a/brokenlinks/worker.go b/brokenlinks/worker.go
index 0493a77..3c6f97e 100644
--- a/brokenlinks/worker.go
+++ b/brokenlinks/worker.go
@@ -420,12 +420,12 @@ func (wrk *worker) fetch(linkq linkQueue) (
for retry < 5 {
if linkq.kind == atom.Img {
if wrk.opts.IsVerbose {
- wrk.log.Printf("scan: HEAD %s\n", linkq.url)
+ wrk.log.Printf("fetch: HEAD %s", linkq.url)
}
httpResp, err = wrk.httpc.Head(linkq.url)
} else {
if wrk.opts.IsVerbose {
- wrk.log.Printf("scan: GET %s\n", linkq.url)
+ wrk.log.Printf("fetch: GET %s", linkq.url)
}
httpResp, err = wrk.httpc.Get(linkq.url)
}
@@ -438,7 +438,10 @@ func (wrk *worker) fetch(linkq linkQueue) (
}
if errDNS.Timeout() {
retry++
+ wrk.log.Printf(`fetch %s: %s (%d/%d)`, linkq.url, err, retry, maxRetry)
+ continue
}
+ break
}
return nil, err
}