diff options
| author | Shulhan <ms@kilabit.info> | 2025-06-11 00:52:06 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2025-06-11 00:53:50 +0700 |
| commit | 7e507518fc17e065c972156ca6714c0803a7f47d (patch) | |
| tree | 83a04a978de0e1afcf61d0772a5357cc5ea6f090 | |
| parent | 2d27987c6d8bd173b9a3232cd8bde9f9b8105853 (diff) | |
| download | jarink-7e507518fc17e065c972156ca6714c0803a7f47d.tar.xz | |
all: revert to use HTTP GET on external, non-image URL
Using HTTP HEAD on certain page may return
* 404, not found, for example on
https://support.google.com/accounts/answer/1066447
* 405, method not allowed, for example on
https://aur.archlinux.org/packages/rescached-git
For 405 response code we can check and retry with GET, but for 404 its
impossible to check if the URL is really exist or not, since 404 means
page not found.
| -rw-r--r-- | brokenlinks_test.go | 2 | ||||
| -rw-r--r-- | brokenlinks_worker.go | 2 |
2 files changed, 2 insertions, 2 deletions
diff --git a/brokenlinks_test.go b/brokenlinks_test.go index b2d5c80..0624e73 100644 --- a/brokenlinks_test.go +++ b/brokenlinks_test.go @@ -43,7 +43,7 @@ func TestBrokenlinks(t *testing.T) { Code: jarink.StatusBadLink, }, { Link: `http:/127.0.0.1:11836`, - Error: `Head "http:/127.0.0.1:11836": http: no Host in request URL`, + Error: `Get "http:/127.0.0.1:11836": http: no Host in request URL`, Code: jarink.StatusBadLink, }, }, diff --git a/brokenlinks_worker.go b/brokenlinks_worker.go index 14ad615..81283a0 100644 --- a/brokenlinks_worker.go +++ b/brokenlinks_worker.go @@ -381,7 +381,7 @@ func (wrk *brokenlinksWorker) fetch(linkq linkQueue) ( const maxRetry = 5 var retry int for retry < 5 { - if linkq.kind == atom.Img || linkq.isExternal { + if linkq.kind == atom.Img { if wrk.opts.IsVerbose { wrk.log.Printf("scan: HEAD %s\n", linkq.url) } |
