diff options
| -rw-r--r-- | brokenlinks/brokenlinks_test.go | 2 | ||||
| -rw-r--r-- | brokenlinks/link_queue.go | 4 | ||||
| -rw-r--r-- | brokenlinks/options.go | 25 | ||||
| -rw-r--r-- | brokenlinks/worker.go | 18 |
4 files changed, 24 insertions, 25 deletions
diff --git a/brokenlinks/brokenlinks_test.go b/brokenlinks/brokenlinks_test.go index 9b176b7..ff9b5d6 100644 --- a/brokenlinks/brokenlinks_test.go +++ b/brokenlinks/brokenlinks_test.go @@ -125,7 +125,7 @@ func TestBrokenlinks(t *testing.T) { opts: brokenlinks.Options{ Url: `127.0.0.1:14594`, }, - expError: `Scan: invalid URL "127.0.0.1:14594"`, + expError: `Scan: Options: invalid URL "127.0.0.1:14594"`, }, { opts: brokenlinks.Options{ Url: `http://127.0.0.1:14594`, diff --git a/brokenlinks/link_queue.go b/brokenlinks/link_queue.go index 164a902..6a7dd32 100644 --- a/brokenlinks/link_queue.go +++ b/brokenlinks/link_queue.go @@ -37,14 +37,14 @@ type linkQueue struct { // checkExternal set the isExternal field to be true if // -// (1) [linkQueue.url] does not start with [worker.scanUrl] +// (1) [linkQueue.url] does not start with [Options.Url] // // (2) linkQueue is from scanPastResult, indicated by non-nil // [worker.pastResult]. // In this case, we did not want to scan the other pages from the same scanUrl // domain. func (linkq *linkQueue) checkExternal(wrk *worker) { - if !strings.HasPrefix(linkq.url, wrk.scanUrl.String()) { + if !strings.HasPrefix(linkq.url, wrk.opts.scanUrl.String()) { linkq.isExternal = true return } diff --git a/brokenlinks/options.go b/brokenlinks/options.go index 5a73b19..b4b1f3c 100644 --- a/brokenlinks/options.go +++ b/brokenlinks/options.go @@ -6,13 +6,17 @@ package brokenlinks import ( "fmt" "net/http" + "net/url" "strconv" "strings" ) // Options define the options for scanning broken links. type Options struct { - Url string + // The URL to be scanned. + Url string + scanUrl *url.URL + PastResultFile string // IgnoreStatus comma separated list HTTP status code that will be @@ -30,11 +34,19 @@ type Options struct { } func (opts *Options) init() (err error) { - var ( - logp = `Options` - listCode = strings.Split(opts.IgnoreStatus, ",") - val string - ) + var logp = `Options` + + opts.scanUrl, err = url.Parse(opts.Url) + if err != nil { + return fmt.Errorf(`%s: invalid URL %q`, logp, opts.Url) + } + opts.scanUrl.Path = strings.TrimSuffix(opts.scanUrl.Path, `/`) + opts.scanUrl.Fragment = "" + opts.scanUrl.RawFragment = "" + + var listCode = strings.Split(opts.IgnoreStatus, ",") + var val string + for _, val = range listCode { val = strings.TrimSpace(val) if val == "" { @@ -49,7 +61,6 @@ func (opts *Options) init() (err error) { code > http.StatusNetworkAuthenticationRequired { return fmt.Errorf(`%s: status code %s out of range`, logp, val) } - opts.ignoreStatus = append(opts.ignoreStatus, int(code)) } return nil diff --git a/brokenlinks/worker.go b/brokenlinks/worker.go index 8f278a8..94be90b 100644 --- a/brokenlinks/worker.go +++ b/brokenlinks/worker.go @@ -7,7 +7,6 @@ import ( "crypto/tls" "encoding/json" "errors" - "fmt" "log" "net" "net/http" @@ -42,9 +41,6 @@ type worker struct { // links or image. baseUrl *url.URL - // The URL to scan. - scanUrl *url.URL - log *log.Logger httpc *http.Client @@ -83,17 +79,9 @@ func newWorker(opts Options) (wrk *worker, err error) { }, } - wrk.scanUrl, err = url.Parse(opts.Url) - if err != nil { - return nil, fmt.Errorf(`invalid URL %q`, opts.Url) - } - wrk.scanUrl.Path = strings.TrimSuffix(wrk.scanUrl.Path, `/`) - wrk.scanUrl.Fragment = "" - wrk.scanUrl.RawFragment = "" - wrk.baseUrl = &url.URL{ - Scheme: wrk.scanUrl.Scheme, - Host: wrk.scanUrl.Host, + Scheme: wrk.opts.scanUrl.Scheme, + Host: wrk.opts.scanUrl.Host, } if opts.PastResultFile == "" { @@ -129,7 +117,7 @@ func (wrk *worker) scanAll() (result *Result, err error) { // Scan the first URL to make sure that the server is reachable. var firstLinkq = linkQueue{ parentUrl: nil, - url: wrk.scanUrl.String(), + url: wrk.opts.scanUrl.String(), status: http.StatusProcessing, } wrk.seenLink[firstLinkq.url] = http.StatusProcessing |
