From cce49486f36bb4ba7702973cc598af9bcb1f4103 Mon Sep 17 00:00:00 2001 From: Shulhan Date: Thu, 29 May 2025 13:15:08 +0700 Subject: all: add case for broken HTML Turn out broken HTML still get parsed by "net/html" package. --- deadlinks_test.go | 10 ++++++++++ testdata/web/broken.html | 7 +++++++ testdata/web/index.html | 1 + 3 files changed, 18 insertions(+) create mode 100644 testdata/web/broken.html diff --git a/deadlinks_test.go b/deadlinks_test.go index a43df3d..9570f50 100644 --- a/deadlinks_test.go +++ b/deadlinks_test.go @@ -65,6 +65,10 @@ func TestDeadLinks_Scan(t *testing.T) { Link: `https://kilabit.info/brokenPage`, Code: http.StatusNotFound, }}, + testUrl + `/broken.html`: []deadlinks.Broken{{ + Link: testUrl + `/brokenPage`, + Code: http.StatusNotFound, + }}, testUrl + `/page2`: []deadlinks.Broken{{ Link: testUrl + `/broken.png`, Code: http.StatusNotFound, @@ -89,6 +93,10 @@ func TestDeadLinks_Scan(t *testing.T) { Link: `https://kilabit.info/brokenPage`, Code: http.StatusNotFound, }}, + testUrl + `/broken.html`: []deadlinks.Broken{{ + Link: testUrl + `/brokenPage`, + Code: http.StatusNotFound, + }}, testUrl + `/page2`: []deadlinks.Broken{{ Link: testUrl + `/broken.png`, Code: http.StatusNotFound, @@ -113,6 +121,8 @@ func TestDeadLinks_Scan(t *testing.T) { tcase.expError, err.Error()) continue } + //got, _ := json.MarshalIndent(result.PageLinks, ``, ` `) + //t.Logf(`got=%s`, got) test.Assert(t, tcase.scanUrl, tcase.exp, result.PageLinks) } } diff --git a/testdata/web/broken.html b/testdata/web/broken.html new file mode 100644 index 0000000..533e542 --- /dev/null +++ b/testdata/web/broken.html @@ -0,0 +1,7 @@ + + + + + + diff --git a/testdata/web/index.html b/testdata/web/index.html index 1124813..19a2bae 100644 --- a/testdata/web/index.html +++ b/testdata/web/index.html @@ -8,5 +8,6 @@ SPDX-License-Identifier: GPL-3.0-only Broken page Page 2 Broken page at kilabit.info + Broken HTML -- cgit v1.3