From fa31e0a656d03fe3744c70a1171e3831647923c9 Mon Sep 17 00:00:00 2001 From: Shulhan Date: Wed, 4 Feb 2026 22:10:42 +0700 Subject: brokenlinks: fix generating relative URL If the parent URL end with .html or .htm, join the directory of parent instead of the current path with the relative path. --- brokenlinks/worker.go | 16 +++++++++++++++- brokenlinks/worker_test.go | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 brokenlinks/worker_test.go diff --git a/brokenlinks/worker.go b/brokenlinks/worker.go index 3e089fc..7683730 100644 --- a/brokenlinks/worker.go +++ b/brokenlinks/worker.go @@ -12,6 +12,7 @@ import ( "net/http" "net/url" "os" + "path" "slices" "strings" "time" @@ -396,7 +397,7 @@ func (wrk *worker) processLink(parentUrl *url.URL, val string, kind int) ( newUrl = wrk.baseUrl.JoinPath(newUrl.Path) } else { // val is relative to parent URL. - newUrl = parentUrl.JoinPath(`/`, newUrl.Path) + newUrl = genURLRelative(parentUrl, newUrl.Path) } } linkq.Url = strings.TrimSuffix(newUrl.String(), `/`) @@ -405,3 +406,16 @@ func (wrk *worker) processLink(parentUrl *url.URL, val string, kind int) ( } return linkq } + +// genURLRelative generate new URL from parent URL and relative path +// `relPath`. +func genURLRelative(parentUrl *url.URL, relPath string) (newUrl *url.URL) { + var parentPath = parentUrl.Path + var ext = strings.ToLower(path.Ext(parentPath)) + if ext == `.html` || ext == `.htm` { + parentPath = path.Dir(parentPath) + } + newUrl, _ = url.Parse(parentUrl.String()) + newUrl.Path = path.Join(parentPath, relPath) + return newUrl +} diff --git a/brokenlinks/worker_test.go b/brokenlinks/worker_test.go new file mode 100644 index 0000000..2867338 --- /dev/null +++ b/brokenlinks/worker_test.go @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-3.0-only +// SPDX-FileCopyrightText: 2026 M. Shulhan + +package brokenlinks + +import ( + "net/url" + "testing" + + "git.sr.ht/~shulhan/pakakeh.go/lib/test" +) + +func TestGenURLRelative(t *testing.T) { + listCase := []struct { + parentURL string + relPath string + expURL string + }{{ + parentURL: `https://domain/a/b/`, + relPath: `c`, + expURL: `https://domain/a/b/c`, + }, { + parentURL: `https://domain/a/b`, + relPath: `c`, + expURL: `https://domain/a/b/c`, + }, { + parentURL: `https://domain/a/b/page.html`, + relPath: `c`, + expURL: `https://domain/a/b/c`, + }, { + parentURL: `https://domain/a/b/page.htm`, + relPath: `c`, + expURL: `https://domain/a/b/c`, + }, { + parentURL: `https://domain/a/b/page.HTML`, + relPath: `c`, + expURL: `https://domain/a/b/c`, + }} + for _, tc := range listCase { + parentURL, _ := url.Parse(tc.parentURL) + got := genURLRelative(parentURL, tc.relPath) + test.Assert(t, ``, tc.expURL, got.String()) + } +} -- cgit v1.3