From b0c320e436ff5cdc70ad38a980a2af2a7f3e5dfd Mon Sep 17 00:00:00 2001
From: Shulhan <ms@kilabit.info>
Date: Thu, 29 May 2025 14:04:51 +0700
Subject: all: ignore HTML page from external domain

Any HTML link that is from domain other than the scanned domain should
net get parsed.
It only check if the link is valid or not.
---
 testdata/web/index.html       | 1 +
 testdata/web/page2/index.html | 1 +
 2 files changed, 2 insertions(+)

(limited to 'testdata/web')
diff --git a/testdata/web/index.html b/testdata/web/index.html
index e4d8bd0..f4f86d8 100644
--- a/testdata/web/index.html
+++ b/testdata/web/index.html
@@ -9,5 +9,6 @@ SPDX-License-Identifier: GPL-3.0-only
     <img src="/gopher.png" />
     <a href="/page2">Page 2</a>
     <a href="/broken.html">Broken HTML</a>
+    <a href="http://127.0.0.1:11900">External URL</a>
   </body>
 </html>
diff --git a/testdata/web/page2/index.html b/testdata/web/page2/index.html
index 0fc7601..ae6b4ea 100644
--- a/testdata/web/page2/index.html
+++ b/testdata/web/page2/index.html
@@ -9,5 +9,6 @@ SPDX-License-Identifier: GPL-3.0-only
     <a href="broken/relative">broken relative link</a>
     <a href="/">Back with absolute path</a>
     <a href="../">Back with relative path</a>
+    <a href="http://127.0.0.1:11900/page2">External URL page2</a>
   </body>
 </html>
-- 
cgit v1.3