diff options
| author | Shulhan <ms@kilabit.info> | 2025-06-12 21:13:58 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2025-06-12 21:13:58 +0700 |
| commit | a02e915388723a5d8cc3b555fb3dfec477fc2a55 (patch) | |
| tree | aa35678b263646e1edd730a16cb35a66e7b933d8 /brokenlinks/link_queue.go | |
| parent | f408c77795a9dd6d4551fadd2e8352ba08915feb (diff) | |
| download | jarink-a02e915388723a5d8cc3b555fb3dfec477fc2a55.tar.xz | |
all: refactoring, move brokenlinks code to its own package
When two or more struct has the same prefix that means it is time to
move it to group it.
Also, we will group one command to one package in the future.
Diffstat (limited to 'brokenlinks/link_queue.go')
| -rw-r--r-- | brokenlinks/link_queue.go | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/brokenlinks/link_queue.go b/brokenlinks/link_queue.go new file mode 100644 index 0000000..164a902 --- /dev/null +++ b/brokenlinks/link_queue.go @@ -0,0 +1,55 @@ +// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> +// SPDX-License-Identifier: GPL-3.0-only + +package brokenlinks + +import ( + "net/url" + "strings" + + "golang.org/x/net/html/atom" +) + +type linkQueue struct { + parentUrl *url.URL + + // The error from scan. + errScan error + + // url being scanned. + url string + + // kind of url, its either an anchor or image. + // It set to 0 if url is the first URL being scanned. + kind atom.Atom + + // isExternal if true the scan will issue HTTP method HEAD instead of + // GET. + isExternal bool + + // Status of link after scan, its mostly used the HTTP status code. + // 0: link is the result of scan, not processed yet. + // StatusBadLink: link is invalid, not parseable or unreachable. + // 200 - 211: OK. + // 400 - 511: Error. + status int +} + +// checkExternal set the isExternal field to be true if +// +// (1) [linkQueue.url] does not start with [worker.scanUrl] +// +// (2) linkQueue is from scanPastResult, indicated by non-nil +// [worker.pastResult]. +// In this case, we did not want to scan the other pages from the same scanUrl +// domain. +func (linkq *linkQueue) checkExternal(wrk *worker) { + if !strings.HasPrefix(linkq.url, wrk.scanUrl.String()) { + linkq.isExternal = true + return + } + if wrk.pastResult != nil { + linkq.isExternal = true + return + } +} |
