summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2019-01-04 11:19:00 +0700
committerShulhan <ms@kilabit.info>2019-01-04 15:15:35 +0700
commitd7739491bdadf7b936095dcc2ba09f57e2f3a322 (patch)
treeccfdcc3e4042f4a976db57569a1037c4b59d4c76
parent49e7d14699b299dffa835b2a9240e2f03e8d0dd3 (diff)
downloadhaminer-d7739491bdadf7b936095dcc2ba09f57e2f3a322.tar.xz
Add option to preprocess http_url
-rw-r--r--cmd/haminer/haminer.conf39
-rw-r--r--config.go39
-rw-r--r--config_test.go72
-rw-r--r--halog.go2
-rw-r--r--haminer.go9
-rw-r--r--tagpreprocessor.go58
-rw-r--r--tagpreprocessor_test.go129
-rw-r--r--testdata/haminer.conf5
8 files changed, 353 insertions, 0 deletions
diff --git a/cmd/haminer/haminer.conf b/cmd/haminer/haminer.conf
index 428e354..3dea780 100644
--- a/cmd/haminer/haminer.conf
+++ b/cmd/haminer/haminer.conf
@@ -54,3 +54,42 @@
##
#influxdb_api_write=
+
+##
+## Pre-process tag by substituting replacing their value using regular
+## expression.
+## Each pre-process rules is run from top to bottom, which means if we have
+## two or more rules with the same key, the output of first pre-process will
+## be used as an input # for the second rule.
+##
+## Currently only accept "http_url" as the key.
+##
+## Format
+##
+## TAG-NAME "=" (DQUOTE regex DQUOTE) "=>" (DQUOTE replacement DQUOTE)
+##
+## Examples
+##
+## http_url = /uuid/\\w{8}-\\w{4}-\\w{4}-\\w{4}-\\w{12} => /uuid/-
+## http_url = /id/[0-9]+ => /id/-
+##
+## This will replace "/id/1000" with "/id/-" and/or
+## "/uuid/e7282bca-73b3-48fc-9793-6446ea6ebff3" into "/uuid/-"
+##
+## If the order of key is wrong, you may get the unexpected output.
+## For example,
+##
+## http_url = /id/[0-9]+ => /id/-
+## http_url = /uuid/\\w{8}-\\w{4}-\\w{4}-\\w{4}-\\w{12} => /uuid/-
+##
+## Given URL with UUID that contains number at beginning,
+##
+## /uuid/9845a0b4-f4c3-4600-af13-45b5b0e61630
+##
+## will result in "/uuid/-a0b4-f4c3-4600-af13-45b5b0e61630", not "/uuid/-" as
+## you expected.
+##
+## So, please be careful on how you define the rules.
+##
+[preprocess "tag"]
+#http_url =
diff --git a/config.go b/config.go
index 3161c1f..4bdd57b 100644
--- a/config.go
+++ b/config.go
@@ -49,6 +49,9 @@ type Config struct {
// MaxBufferedLogs define a number of logs that will be keep in buffer
// before being forwarded.
MaxBufferedLogs int
+
+ // retags contains list of pre-processing rules for tag.
+ retags []*tagPreprocessor
}
//
@@ -90,6 +93,10 @@ func (cfg *Config) Load(path string) {
if len(v) > 0 {
cfg.InfluxAPIWrite = v
}
+
+ sec := in.GetSection("preprocess", "tag")
+
+ cfg.parsePreprocessTag(sec)
}
//
@@ -149,3 +156,35 @@ func (cfg *Config) ParseCaptureRequestHeader(v string) {
cfg.RequestHeaders = append(cfg.RequestHeaders, headers[x])
}
}
+
+func (cfg *Config) parsePreprocessTag(sec *ini.Section) {
+ if sec == nil {
+ return
+ }
+
+ for _, v := range sec.Vars {
+ if len(v.KeyLower) == 0 {
+ continue
+ }
+ if v.KeyLower != "http_url" {
+ log.Printf("parsePreprocessTag: unknown tag %q\n",
+ v.KeyLower)
+ continue
+ }
+
+ rep := strings.Split(v.Value, "=>")
+ if len(rep) != 2 {
+ log.Printf("parsePreprocessTag: invalid format %q\n",
+ v.Value)
+ continue
+ }
+
+ retag, err := newTagPreprocessor(v.KeyLower, rep[0], rep[1])
+ if err != nil {
+ log.Printf("parsePreprocessTag: %s\n", err)
+ continue
+ }
+
+ cfg.retags = append(cfg.retags, retag)
+ }
+}
diff --git a/config_test.go b/config_test.go
index 98e5929..53ff75f 100644
--- a/config_test.go
+++ b/config_test.go
@@ -5,8 +5,10 @@
package haminer
import (
+ "regexp"
"testing"
+ "github.com/shuLhan/share/lib/ini"
"github.com/shuLhan/share/lib/test"
)
@@ -68,6 +70,19 @@ func TestLoad(t *testing.T) {
"referrer",
},
InfluxAPIWrite: "http://127.0.0.1:8086/write",
+ retags: []*tagPreprocessor{{
+ name: "http_url",
+ regex: regexp.MustCompile(`/[0-9]+-\w+-\w+-\w+-\w+-\w+`),
+ repl: `/-`,
+ }, {
+ name: "http_url",
+ regex: regexp.MustCompile(`/\w+-\w+-\w+-\w+-\w+`),
+ repl: `/-`,
+ }, {
+ name: "http_url",
+ regex: regexp.MustCompile(`/[0-9]+`),
+ repl: `/-`,
+ }},
},
}}
@@ -212,3 +227,60 @@ func TestParseCaptureRequestHeader(t *testing.T) {
test.Assert(t, "Config", c.exp, got, true)
}
}
+
+func TestParsePreprocessTag(t *testing.T) {
+ cfg := NewConfig()
+
+ cases := []struct {
+ desc string
+ in *ini.Section
+ exp []*tagPreprocessor
+ }{{
+ desc: "With nil",
+ }, {
+ desc: "With unknown key",
+ in: &ini.Section{
+ Vars: []*ini.Variable{{
+ KeyLower: "unknown",
+ }},
+ },
+ }, {
+ desc: "With invalid format",
+ in: &ini.Section{
+ Vars: []*ini.Variable{{
+ KeyLower: "http_url",
+ Value: "",
+ }},
+ },
+ }, {
+ desc: "With empty regex",
+ in: &ini.Section{
+ Vars: []*ini.Variable{{
+ KeyLower: "http_url",
+ Value: "=>",
+ }},
+ },
+ }, {
+ desc: "With valid value",
+ in: &ini.Section{
+ Vars: []*ini.Variable{{
+ KeyLower: "http_url",
+ Value: "/[0-9]+ => /-",
+ }},
+ },
+ exp: []*tagPreprocessor{{
+ name: "http_url",
+ regex: regexp.MustCompile(`/[0-9]+`),
+ repl: "/-",
+ }},
+ }}
+
+ for _, c := range cases {
+ t.Log(c.desc)
+
+ cfg.retags = nil
+ cfg.parsePreprocessTag(c.in)
+
+ test.Assert(t, "retags", c.exp, cfg.retags, true)
+ }
+}
diff --git a/halog.go b/halog.go
index 490540d..4bb7b60 100644
--- a/halog.go
+++ b/halog.go
@@ -55,6 +55,8 @@ type Halog struct { // nolint: maligned
HTTPURL string
HTTPQuery string
HTTPProto string
+
+ tagHTTPURL string
}
//
diff --git a/haminer.go b/haminer.go
index 245b4f1..3f62024 100644
--- a/haminer.go
+++ b/haminer.go
@@ -141,6 +141,13 @@ func (h *Haminer) forwards(halogs []*Halog) {
}
}
+func (h *Haminer) preprocess(halog *Halog) {
+ halog.tagHTTPURL = halog.HTTPURL
+ for _, retag := range h.cfg.retags {
+ halog.tagHTTPURL = retag.preprocess("http_url", halog.tagHTTPURL)
+ }
+}
+
func (h *Haminer) produce() {
halogs := make([]*Halog, 0)
@@ -150,6 +157,8 @@ func (h *Haminer) produce() {
continue
}
+ h.preprocess(halog)
+
halogs = append(halogs, halog)
if len(halogs) >= h.cfg.MaxBufferedLogs {
diff --git a/tagpreprocessor.go b/tagpreprocessor.go
new file mode 100644
index 0000000..2be8846
--- /dev/null
+++ b/tagpreprocessor.go
@@ -0,0 +1,58 @@
+// Copyright 2019, M. Shulhan (ms@kilabit.info). All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package haminer
+
+import (
+ "errors"
+ "regexp"
+ "strings"
+)
+
+type tagPreprocessor struct {
+ name string
+ regex *regexp.Regexp
+ repl string
+}
+
+//
+// newTagPreprocessor create and initialize replace tag pre-processing.
+// The regex and repl strings must be enclosed with double-quote, except for
+// repl it can be empty.
+//
+func newTagPreprocessor(name, regex, repl string) (
+ retag *tagPreprocessor, err error,
+) {
+ name = strings.TrimSpace(name)
+ regex = strings.TrimSpace(regex)
+ repl = strings.TrimSpace(repl)
+
+ if len(name) == 0 {
+ return nil, errors.New("newTagPreprocessor: empty name parameter")
+ }
+ if len(regex) == 0 {
+ return nil, errors.New("newTagPreprocessor: empty regex parameter")
+ }
+
+ re, err := regexp.Compile(regex)
+ if err != nil {
+ return nil, err
+ }
+
+ retag = &tagPreprocessor{
+ name: name,
+ regex: re,
+ repl: repl,
+ }
+
+ return retag, nil
+}
+
+func (tagp *tagPreprocessor) preprocess(name, value string) string {
+ if tagp.name != name {
+ return value
+ }
+ out := tagp.regex.ReplaceAllString(value, tagp.repl)
+ return out
+}
diff --git a/tagpreprocessor_test.go b/tagpreprocessor_test.go
new file mode 100644
index 0000000..c5f59e9
--- /dev/null
+++ b/tagpreprocessor_test.go
@@ -0,0 +1,129 @@
+// Copyright 2019, M. Shulhan (ms@kilabit.info). All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package haminer
+
+import (
+ "regexp"
+ "testing"
+
+ "github.com/shuLhan/share/lib/test"
+)
+
+func TestNewTagPreprocessor(t *testing.T) {
+ cases := []struct {
+ desc string
+ name string
+ regex string
+ repl string
+ exp *tagPreprocessor
+ expErr string
+ }{{
+ desc: "With empty name",
+ expErr: "newTagPreprocessor: empty name parameter",
+ }, {
+ desc: "With empty regex",
+ name: "http_url",
+ expErr: "newTagPreprocessor: empty regex parameter",
+ }, {
+ desc: "With invalid regex",
+ name: "http_url",
+ regex: `/[a-z]*+`,
+ expErr: "error parsing regexp: invalid nested repetition operator: `*+`",
+ }, {
+ desc: "With valid parameters",
+ name: "http_url",
+ regex: `/[0-9]+`,
+ repl: `/-`,
+ exp: &tagPreprocessor{
+ name: "http_url",
+ regex: regexp.MustCompile(`/[0-9]+`),
+ repl: `/-`,
+ },
+ }}
+
+ for _, c := range cases {
+ t.Log(c.desc)
+
+ got, err := newTagPreprocessor(c.name, c.regex, c.repl)
+ if err != nil {
+ test.Assert(t, "error", c.expErr, err.Error(), true)
+ continue
+ }
+
+ test.Assert(t, "TagPreprocessor", c.exp, got, true)
+ }
+}
+
+func TestPreprocess(t *testing.T) {
+ reIDUUID := regexp.MustCompile(`/[0-9]+-\w+-\w+-\w+-\w+-\w+`)
+ reUUID := regexp.MustCompile(`/-?\w+-\w+-\w+-\w+-\w+`)
+ reID := regexp.MustCompile(`/[0-9]+`)
+
+ retags := []*tagPreprocessor{{
+ name: "http_url",
+ regex: reIDUUID,
+ repl: `/-`,
+ }, {
+ name: "http_url",
+ regex: reUUID,
+ repl: `/-`,
+ }, {
+ name: "http_url",
+ regex: reID,
+ repl: `/-`,
+ }}
+
+ cases := []struct {
+ desc string
+ name string
+ in string
+ exp string
+ }{{
+ desc: "With empty name",
+ }, {
+ desc: "With different name",
+ name: "tag",
+ in: "/test/1000",
+ exp: "/test/1000",
+ }, {
+ desc: "With one replacement",
+ name: "http_url",
+ in: "/test/1000",
+ exp: "/test/-",
+ }, {
+ desc: "With two replacements",
+ name: "http_url",
+ in: "/test/1000/param/9845a0b4-f4c3-4600-af13-45b5b0e61630",
+ exp: "/test/-/param/-",
+ }, {
+ desc: "With three replacements",
+ name: "http_url",
+ in: "/group/9845a0b4-f4c3-4600-af13-45b5b0e61630/test/1000/param/1-9845a0b4-f4c3-4600-af13-45b5b0e61630",
+ exp: "/group/-/test/-/param/-",
+ }, {
+ desc: "With invalid UUID",
+ name: "http_url",
+ in: `/v1/threads/900001-fefcd79-0b03-4794-ae90-abe4b51dec75/count-previous/90001`,
+ exp: `/v1/threads/-/count-previous/-`,
+ }, {
+ desc: "With missing ID",
+ name: "http_url",
+ in: `/v1/threads/-fefcd79-0b03-4794-ae90-abe4b51dec75/count-previous/90001`,
+ exp: `/v1/threads/-/count-previous/-`,
+ }}
+
+ for _, c := range cases {
+ t.Log(c.desc)
+
+ got := c.in
+
+ for _, tagp := range retags {
+ got = tagp.preprocess(c.name, got)
+ t.Log("got: ", got)
+ }
+
+ test.Assert(t, "preprocess", c.exp, got, true)
+ }
+}
diff --git a/testdata/haminer.conf b/testdata/haminer.conf
index a635dcd..4e3bf5b 100644
--- a/testdata/haminer.conf
+++ b/testdata/haminer.conf
@@ -3,3 +3,8 @@ listen = 0.0.0.0:8080
accept_backend = ,a , b,
capture_request_header = , host, referrer,
influxdb_api_write = http://127.0.0.1:8086/write
+
+[preprocess "tag"]
+http_url = /[0-9]+-\\w+-\\w+-\\w+-\\w+-\\w+ => /-
+http_url = /\\w+-\\w+-\\w+-\\w+-\\w+ => /-
+http_url = /[0-9]+ => /-