summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2025-06-11 00:52:06 +0700
committerShulhan <ms@kilabit.info>2025-06-11 00:53:50 +0700
commit7e507518fc17e065c972156ca6714c0803a7f47d (patch)
tree83a04a978de0e1afcf61d0772a5357cc5ea6f090
parent2d27987c6d8bd173b9a3232cd8bde9f9b8105853 (diff)
downloadjarink-7e507518fc17e065c972156ca6714c0803a7f47d.tar.xz
all: revert to use HTTP GET on external, non-image URL
Using HTTP HEAD on certain page may return * 404, not found, for example on https://support.google.com/accounts/answer/1066447 * 405, method not allowed, for example on https://aur.archlinux.org/packages/rescached-git For 405 response code we can check and retry with GET, but for 404 its impossible to check if the URL is really exist or not, since 404 means page not found.
-rw-r--r--brokenlinks_test.go2
-rw-r--r--brokenlinks_worker.go2
2 files changed, 2 insertions, 2 deletions
diff --git a/brokenlinks_test.go b/brokenlinks_test.go
index b2d5c80..0624e73 100644
--- a/brokenlinks_test.go
+++ b/brokenlinks_test.go
@@ -43,7 +43,7 @@ func TestBrokenlinks(t *testing.T) {
Code: jarink.StatusBadLink,
}, {
Link: `http:/127.0.0.1:11836`,
- Error: `Head "http:/127.0.0.1:11836": http: no Host in request URL`,
+ Error: `Get "http:/127.0.0.1:11836": http: no Host in request URL`,
Code: jarink.StatusBadLink,
},
},
diff --git a/brokenlinks_worker.go b/brokenlinks_worker.go
index 14ad615..81283a0 100644
--- a/brokenlinks_worker.go
+++ b/brokenlinks_worker.go
@@ -381,7 +381,7 @@ func (wrk *brokenlinksWorker) fetch(linkq linkQueue) (
const maxRetry = 5
var retry int
for retry < 5 {
- if linkq.kind == atom.Img || linkq.isExternal {
+ if linkq.kind == atom.Img {
if wrk.opts.IsVerbose {
wrk.log.Printf("scan: HEAD %s\n", linkq.url)
}