summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2025-05-29 13:15:08 +0700
committerShulhan <ms@kilabit.info>2025-05-29 13:15:08 +0700
commitcce49486f36bb4ba7702973cc598af9bcb1f4103 (patch)
tree2daf641658141834e755f15c526e13ce99f3f98b
parentfa85558a0d1b20e4d203ddb537c3bde51bc5131f (diff)
downloadjarink-cce49486f36bb4ba7702973cc598af9bcb1f4103.tar.xz
all: add case for broken HTML
Turn out broken HTML still get parsed by "net/html" package.
-rw-r--r--deadlinks_test.go10
-rw-r--r--testdata/web/broken.html7
-rw-r--r--testdata/web/index.html1
3 files changed, 18 insertions, 0 deletions
diff --git a/deadlinks_test.go b/deadlinks_test.go
index a43df3d..9570f50 100644
--- a/deadlinks_test.go
+++ b/deadlinks_test.go
@@ -65,6 +65,10 @@ func TestDeadLinks_Scan(t *testing.T) {
Link: `https://kilabit.info/brokenPage`,
Code: http.StatusNotFound,
}},
+ testUrl + `/broken.html`: []deadlinks.Broken{{
+ Link: testUrl + `/brokenPage`,
+ Code: http.StatusNotFound,
+ }},
testUrl + `/page2`: []deadlinks.Broken{{
Link: testUrl + `/broken.png`,
Code: http.StatusNotFound,
@@ -89,6 +93,10 @@ func TestDeadLinks_Scan(t *testing.T) {
Link: `https://kilabit.info/brokenPage`,
Code: http.StatusNotFound,
}},
+ testUrl + `/broken.html`: []deadlinks.Broken{{
+ Link: testUrl + `/brokenPage`,
+ Code: http.StatusNotFound,
+ }},
testUrl + `/page2`: []deadlinks.Broken{{
Link: testUrl + `/broken.png`,
Code: http.StatusNotFound,
@@ -113,6 +121,8 @@ func TestDeadLinks_Scan(t *testing.T) {
tcase.expError, err.Error())
continue
}
+ //got, _ := json.MarshalIndent(result.PageLinks, ``, ` `)
+ //t.Logf(`got=%s`, got)
test.Assert(t, tcase.scanUrl, tcase.exp, result.PageLinks)
}
}
diff --git a/testdata/web/broken.html b/testdata/web/broken.html
new file mode 100644
index 0000000..533e542
--- /dev/null
+++ b/testdata/web/broken.html
@@ -0,0 +1,7 @@
+<html>
+ <head></head>
+ <body>
+ <a href="/brokenPage"
+ <p>
+ </body>
+</html>
diff --git a/testdata/web/index.html b/testdata/web/index.html
index 1124813..19a2bae 100644
--- a/testdata/web/index.html
+++ b/testdata/web/index.html
@@ -8,5 +8,6 @@ SPDX-License-Identifier: GPL-3.0-only
<a href="/brokenPage">Broken page</a>
<a href="/page2">Page 2</a>
<a href="https://kilabit.info/brokenPage">Broken page at kilabit.info</a>
+ <a href="/broken.html">Broken HTML</a>
</body>
</html>