From 7e507518fc17e065c972156ca6714c0803a7f47d Mon Sep 17 00:00:00 2001 From: Shulhan Date: Wed, 11 Jun 2025 00:52:06 +0700 Subject: all: revert to use HTTP GET on external, non-image URL Using HTTP HEAD on certain page may return * 404, not found, for example on https://support.google.com/accounts/answer/1066447 * 405, method not allowed, for example on https://aur.archlinux.org/packages/rescached-git For 405 response code we can check and retry with GET, but for 404 its impossible to check if the URL is really exist or not, since 404 means page not found. --- brokenlinks_test.go | 2 +- brokenlinks_worker.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/brokenlinks_test.go b/brokenlinks_test.go index b2d5c80..0624e73 100644 --- a/brokenlinks_test.go +++ b/brokenlinks_test.go @@ -43,7 +43,7 @@ func TestBrokenlinks(t *testing.T) { Code: jarink.StatusBadLink, }, { Link: `http:/127.0.0.1:11836`, - Error: `Head "http:/127.0.0.1:11836": http: no Host in request URL`, + Error: `Get "http:/127.0.0.1:11836": http: no Host in request URL`, Code: jarink.StatusBadLink, }, }, diff --git a/brokenlinks_worker.go b/brokenlinks_worker.go index 14ad615..81283a0 100644 --- a/brokenlinks_worker.go +++ b/brokenlinks_worker.go @@ -381,7 +381,7 @@ func (wrk *brokenlinksWorker) fetch(linkq linkQueue) ( const maxRetry = 5 var retry int for retry < 5 { - if linkq.kind == atom.Img || linkq.isExternal { + if linkq.kind == atom.Img { if wrk.opts.IsVerbose { wrk.log.Printf("scan: HEAD %s\n", linkq.url) } -- cgit v1.3