summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2025-06-15 14:29:49 +0700
committerShulhan <ms@kilabit.info>2025-06-16 22:44:37 +0700
commitb4ef535afc6809c14137972d365732cea6b9f5b9 (patch)
treef14e05f4f02b0bff8883c297e1c257f224fd87fc
parentc814c790367dcd167dd0687318bd6dcc71299490 (diff)
downloadjarink-b4ef535afc6809c14137972d365732cea6b9f5b9.tar.xz
brokenlinks: move parsing scanned Url from worker to Options
Before the Options passed to worker, it should be valid, including the URL to be scanned.
-rw-r--r--brokenlinks/brokenlinks_test.go2
-rw-r--r--brokenlinks/link_queue.go4
-rw-r--r--brokenlinks/options.go25
-rw-r--r--brokenlinks/worker.go18
4 files changed, 24 insertions, 25 deletions
diff --git a/brokenlinks/brokenlinks_test.go b/brokenlinks/brokenlinks_test.go
index 9b176b7..ff9b5d6 100644
--- a/brokenlinks/brokenlinks_test.go
+++ b/brokenlinks/brokenlinks_test.go
@@ -125,7 +125,7 @@ func TestBrokenlinks(t *testing.T) {
opts: brokenlinks.Options{
Url: `127.0.0.1:14594`,
},
- expError: `Scan: invalid URL "127.0.0.1:14594"`,
+ expError: `Scan: Options: invalid URL "127.0.0.1:14594"`,
}, {
opts: brokenlinks.Options{
Url: `http://127.0.0.1:14594`,
diff --git a/brokenlinks/link_queue.go b/brokenlinks/link_queue.go
index 164a902..6a7dd32 100644
--- a/brokenlinks/link_queue.go
+++ b/brokenlinks/link_queue.go
@@ -37,14 +37,14 @@ type linkQueue struct {
// checkExternal set the isExternal field to be true if
//
-// (1) [linkQueue.url] does not start with [worker.scanUrl]
+// (1) [linkQueue.url] does not start with [Options.Url]
//
// (2) linkQueue is from scanPastResult, indicated by non-nil
// [worker.pastResult].
// In this case, we did not want to scan the other pages from the same scanUrl
// domain.
func (linkq *linkQueue) checkExternal(wrk *worker) {
- if !strings.HasPrefix(linkq.url, wrk.scanUrl.String()) {
+ if !strings.HasPrefix(linkq.url, wrk.opts.scanUrl.String()) {
linkq.isExternal = true
return
}
diff --git a/brokenlinks/options.go b/brokenlinks/options.go
index 5a73b19..b4b1f3c 100644
--- a/brokenlinks/options.go
+++ b/brokenlinks/options.go
@@ -6,13 +6,17 @@ package brokenlinks
import (
"fmt"
"net/http"
+ "net/url"
"strconv"
"strings"
)
// Options define the options for scanning broken links.
type Options struct {
- Url string
+ // The URL to be scanned.
+ Url string
+ scanUrl *url.URL
+
PastResultFile string
// IgnoreStatus comma separated list HTTP status code that will be
@@ -30,11 +34,19 @@ type Options struct {
}
func (opts *Options) init() (err error) {
- var (
- logp = `Options`
- listCode = strings.Split(opts.IgnoreStatus, ",")
- val string
- )
+ var logp = `Options`
+
+ opts.scanUrl, err = url.Parse(opts.Url)
+ if err != nil {
+ return fmt.Errorf(`%s: invalid URL %q`, logp, opts.Url)
+ }
+ opts.scanUrl.Path = strings.TrimSuffix(opts.scanUrl.Path, `/`)
+ opts.scanUrl.Fragment = ""
+ opts.scanUrl.RawFragment = ""
+
+ var listCode = strings.Split(opts.IgnoreStatus, ",")
+ var val string
+
for _, val = range listCode {
val = strings.TrimSpace(val)
if val == "" {
@@ -49,7 +61,6 @@ func (opts *Options) init() (err error) {
code > http.StatusNetworkAuthenticationRequired {
return fmt.Errorf(`%s: status code %s out of range`, logp, val)
}
-
opts.ignoreStatus = append(opts.ignoreStatus, int(code))
}
return nil
diff --git a/brokenlinks/worker.go b/brokenlinks/worker.go
index 8f278a8..94be90b 100644
--- a/brokenlinks/worker.go
+++ b/brokenlinks/worker.go
@@ -7,7 +7,6 @@ import (
"crypto/tls"
"encoding/json"
"errors"
- "fmt"
"log"
"net"
"net/http"
@@ -42,9 +41,6 @@ type worker struct {
// links or image.
baseUrl *url.URL
- // The URL to scan.
- scanUrl *url.URL
-
log *log.Logger
httpc *http.Client
@@ -83,17 +79,9 @@ func newWorker(opts Options) (wrk *worker, err error) {
},
}
- wrk.scanUrl, err = url.Parse(opts.Url)
- if err != nil {
- return nil, fmt.Errorf(`invalid URL %q`, opts.Url)
- }
- wrk.scanUrl.Path = strings.TrimSuffix(wrk.scanUrl.Path, `/`)
- wrk.scanUrl.Fragment = ""
- wrk.scanUrl.RawFragment = ""
-
wrk.baseUrl = &url.URL{
- Scheme: wrk.scanUrl.Scheme,
- Host: wrk.scanUrl.Host,
+ Scheme: wrk.opts.scanUrl.Scheme,
+ Host: wrk.opts.scanUrl.Host,
}
if opts.PastResultFile == "" {
@@ -129,7 +117,7 @@ func (wrk *worker) scanAll() (result *Result, err error) {
// Scan the first URL to make sure that the server is reachable.
var firstLinkq = linkQueue{
parentUrl: nil,
- url: wrk.scanUrl.String(),
+ url: wrk.opts.scanUrl.String(),
status: http.StatusProcessing,
}
wrk.seenLink[firstLinkq.url] = http.StatusProcessing