diff options
| -rw-r--r-- | deadlinks_test.go | 43 | ||||
| -rw-r--r-- | testdata/web/index.html | 1 | ||||
| -rw-r--r-- | testdata/web/page2/index.html | 1 | ||||
| -rw-r--r-- | worker.go | 4 |
4 files changed, 44 insertions, 5 deletions
diff --git a/deadlinks_test.go b/deadlinks_test.go index 519647d..4269e5d 100644 --- a/deadlinks_test.go +++ b/deadlinks_test.go @@ -15,21 +15,54 @@ import ( "git.sr.ht/~shulhan/pakakeh.go/lib/test" ) -const testListenAddress = `127.0.0.1:11836` +// The test run two web servers that serve content on "testdata/web/". +// The first web server is the one that we want to scan. +// The second web server is external web server, where HTML pages should not +// be parsed. + +const testAddress = `127.0.0.1:11836` +const testExternalAddress = `127.0.0.1:11900` func TestMain(m *testing.M) { var httpDirWeb = http.Dir(`testdata/web`) var fshandle = http.FileServer(httpDirWeb) - http.Handle(`/`, fshandle) go func() { - var err = http.ListenAndServe(testListenAddress, nil) + var mux = http.NewServeMux() + mux.Handle(`/`, fshandle) + var testServer = &http.Server{ + Addr: testAddress, + Handler: mux, + ReadTimeout: 10 * time.Second, + WriteTimeout: 10 * time.Second, + MaxHeaderBytes: 1 << 20, + } + var err = testServer.ListenAndServe() + if err != nil { + log.Fatal(err) + } + }() + go func() { + var mux = http.NewServeMux() + mux.Handle(`/`, fshandle) + var testServer = &http.Server{ + Addr: testExternalAddress, + Handler: mux, + ReadTimeout: 10 * time.Second, + WriteTimeout: 10 * time.Second, + MaxHeaderBytes: 1 << 20, + } + var err = testServer.ListenAndServe() if err != nil { log.Fatal(err) } }() - var err = libnet.WaitAlive(`tcp`, testListenAddress, 5*time.Second) + var err = libnet.WaitAlive(`tcp`, testAddress, 5*time.Second) + if err != nil { + log.Fatal(err) + } + err = libnet.WaitAlive(`tcp`, testExternalAddress, 5*time.Second) if err != nil { log.Fatal(err) } @@ -38,7 +71,7 @@ func TestMain(m *testing.M) { } func TestDeadLinks_Scan(t *testing.T) { - var testUrl = `http://` + testListenAddress + var testUrl = `http://` + testAddress type testCase struct { exp map[string][]deadlinks.Broken diff --git a/testdata/web/index.html b/testdata/web/index.html index e4d8bd0..f4f86d8 100644 --- a/testdata/web/index.html +++ b/testdata/web/index.html @@ -9,5 +9,6 @@ SPDX-License-Identifier: GPL-3.0-only <img src="/gopher.png" /> <a href="/page2">Page 2</a> <a href="/broken.html">Broken HTML</a> + <a href="http://127.0.0.1:11900">External URL</a> </body> </html> diff --git a/testdata/web/page2/index.html b/testdata/web/page2/index.html index 0fc7601..ae6b4ea 100644 --- a/testdata/web/page2/index.html +++ b/testdata/web/page2/index.html @@ -9,5 +9,6 @@ SPDX-License-Identifier: GPL-3.0-only <a href="broken/relative">broken relative link</a> <a href="/">Back with absolute path</a> <a href="../">Back with relative path</a> + <a href="http://127.0.0.1:11900/page2">External URL page2</a> </body> </html> @@ -137,6 +137,10 @@ func (wrk *worker) scan(linkq linkQueue) { if linkq.kind == atom.Img { return } + if !strings.HasPrefix(linkq.url, wrk.baseUrl.String()) { + // Do not parse the page from external domain. + return + } err = wrk.parseHTML(linkq.url, httpResp.Body) if err != nil { wrk.errq <- fmt.Errorf(`%s %s: %w`, logp, linkq.url, err) |
