diff options
| author | Shulhan <ms@kilabit.info> | 2025-07-02 02:07:43 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2025-11-20 17:12:19 +0700 |
| commit | 5301c666eec35699bbb9024678bb37adc057404c (patch) | |
| tree | 384516772bd4f6fc6cbcd08eea407dda89d4bee8 | |
| parent | c5d26d9a6c039816ce8369c34dd5ea33fac660a1 (diff) | |
| download | jarink-5301c666eec35699bbb9024678bb37adc057404c.tar.xz | |
brokenlinks: fix infinite loop on unknown host
On link with invalid domain, it should break and return the error
immediately.
| -rw-r--r-- | brokenlinks/brokenlinks_test.go | 4 | ||||
| -rw-r--r-- | brokenlinks/testdata/exp_cache.json | 4 | ||||
| -rw-r--r-- | brokenlinks/testdata/web/index.html | 3 | ||||
| -rw-r--r-- | brokenlinks/worker.go | 7 |
4 files changed, 14 insertions, 4 deletions
diff --git a/brokenlinks/brokenlinks_test.go b/brokenlinks/brokenlinks_test.go index f957ae3..b49bcc5 100644 --- a/brokenlinks/brokenlinks_test.go +++ b/brokenlinks/brokenlinks_test.go @@ -264,6 +264,10 @@ func TestScan(t *testing.T) { Link: `http:/127.0.0.1:11836`, Error: `Get "http:/127.0.0.1:11836": http: no Host in request URL`, Code: brokenlinks.StatusBadLink, + }, { + Link: `https://domain`, + Error: `Get "https://domain": dial tcp: lookup domain: no such host`, + Code: 700, }, }, testUrl + `/broken.html`: []brokenlinks.Broken{ diff --git a/brokenlinks/testdata/exp_cache.json b/brokenlinks/testdata/exp_cache.json index 563164d..8b84ff7 100644 --- a/brokenlinks/testdata/exp_cache.json +++ b/brokenlinks/testdata/exp_cache.json @@ -2,7 +2,7 @@ "scanned_links": { "http://127.0.0.1:11900": { "url": "http://127.0.0.1:11900", - "size": 976, + "size": 1064, "response_code": 200 }, "http://127.0.0.1:11900/page2": { @@ -12,7 +12,7 @@ }, "https://127.0.0.1:11838": { "url": "https://127.0.0.1:11838", - "size": 976, + "size": 1064, "response_code": 200 } } diff --git a/brokenlinks/testdata/web/index.html b/brokenlinks/testdata/web/index.html index 596d374..88f1184 100644 --- a/brokenlinks/testdata/web/index.html +++ b/brokenlinks/testdata/web/index.html @@ -29,5 +29,8 @@ SPDX-License-Identifier: GPL-3.0-only <!-- Pages with insecure TLS --> <a href="https://127.0.0.1:11838">Insecure pages</a> + + <!-- Pages with invalid domain --> + <a href="https://domain">Invalid domain</a> </body> </html> diff --git a/brokenlinks/worker.go b/brokenlinks/worker.go index 0493a77..3c6f97e 100644 --- a/brokenlinks/worker.go +++ b/brokenlinks/worker.go @@ -420,12 +420,12 @@ func (wrk *worker) fetch(linkq linkQueue) ( for retry < 5 { if linkq.kind == atom.Img { if wrk.opts.IsVerbose { - wrk.log.Printf("scan: HEAD %s\n", linkq.url) + wrk.log.Printf("fetch: HEAD %s", linkq.url) } httpResp, err = wrk.httpc.Head(linkq.url) } else { if wrk.opts.IsVerbose { - wrk.log.Printf("scan: GET %s\n", linkq.url) + wrk.log.Printf("fetch: GET %s", linkq.url) } httpResp, err = wrk.httpc.Get(linkq.url) } @@ -438,7 +438,10 @@ func (wrk *worker) fetch(linkq linkQueue) ( } if errDNS.Timeout() { retry++ + wrk.log.Printf(`fetch %s: %s (%d/%d)`, linkq.url, err, retry, maxRetry) + continue } + break } return nil, err } |
