diff options
| -rw-r--r-- | cmd/haminer/haminer.conf | 39 | ||||
| -rw-r--r-- | config.go | 39 | ||||
| -rw-r--r-- | config_test.go | 72 | ||||
| -rw-r--r-- | halog.go | 2 | ||||
| -rw-r--r-- | haminer.go | 9 | ||||
| -rw-r--r-- | tagpreprocessor.go | 58 | ||||
| -rw-r--r-- | tagpreprocessor_test.go | 129 | ||||
| -rw-r--r-- | testdata/haminer.conf | 5 |
8 files changed, 353 insertions, 0 deletions
diff --git a/cmd/haminer/haminer.conf b/cmd/haminer/haminer.conf index 428e354..3dea780 100644 --- a/cmd/haminer/haminer.conf +++ b/cmd/haminer/haminer.conf @@ -54,3 +54,42 @@ ## #influxdb_api_write= + +## +## Pre-process tag by substituting replacing their value using regular +## expression. +## Each pre-process rules is run from top to bottom, which means if we have +## two or more rules with the same key, the output of first pre-process will +## be used as an input # for the second rule. +## +## Currently only accept "http_url" as the key. +## +## Format +## +## TAG-NAME "=" (DQUOTE regex DQUOTE) "=>" (DQUOTE replacement DQUOTE) +## +## Examples +## +## http_url = /uuid/\\w{8}-\\w{4}-\\w{4}-\\w{4}-\\w{12} => /uuid/- +## http_url = /id/[0-9]+ => /id/- +## +## This will replace "/id/1000" with "/id/-" and/or +## "/uuid/e7282bca-73b3-48fc-9793-6446ea6ebff3" into "/uuid/-" +## +## If the order of key is wrong, you may get the unexpected output. +## For example, +## +## http_url = /id/[0-9]+ => /id/- +## http_url = /uuid/\\w{8}-\\w{4}-\\w{4}-\\w{4}-\\w{12} => /uuid/- +## +## Given URL with UUID that contains number at beginning, +## +## /uuid/9845a0b4-f4c3-4600-af13-45b5b0e61630 +## +## will result in "/uuid/-a0b4-f4c3-4600-af13-45b5b0e61630", not "/uuid/-" as +## you expected. +## +## So, please be careful on how you define the rules. +## +[preprocess "tag"] +#http_url = @@ -49,6 +49,9 @@ type Config struct { // MaxBufferedLogs define a number of logs that will be keep in buffer // before being forwarded. MaxBufferedLogs int + + // retags contains list of pre-processing rules for tag. + retags []*tagPreprocessor } // @@ -90,6 +93,10 @@ func (cfg *Config) Load(path string) { if len(v) > 0 { cfg.InfluxAPIWrite = v } + + sec := in.GetSection("preprocess", "tag") + + cfg.parsePreprocessTag(sec) } // @@ -149,3 +156,35 @@ func (cfg *Config) ParseCaptureRequestHeader(v string) { cfg.RequestHeaders = append(cfg.RequestHeaders, headers[x]) } } + +func (cfg *Config) parsePreprocessTag(sec *ini.Section) { + if sec == nil { + return + } + + for _, v := range sec.Vars { + if len(v.KeyLower) == 0 { + continue + } + if v.KeyLower != "http_url" { + log.Printf("parsePreprocessTag: unknown tag %q\n", + v.KeyLower) + continue + } + + rep := strings.Split(v.Value, "=>") + if len(rep) != 2 { + log.Printf("parsePreprocessTag: invalid format %q\n", + v.Value) + continue + } + + retag, err := newTagPreprocessor(v.KeyLower, rep[0], rep[1]) + if err != nil { + log.Printf("parsePreprocessTag: %s\n", err) + continue + } + + cfg.retags = append(cfg.retags, retag) + } +} diff --git a/config_test.go b/config_test.go index 98e5929..53ff75f 100644 --- a/config_test.go +++ b/config_test.go @@ -5,8 +5,10 @@ package haminer import ( + "regexp" "testing" + "github.com/shuLhan/share/lib/ini" "github.com/shuLhan/share/lib/test" ) @@ -68,6 +70,19 @@ func TestLoad(t *testing.T) { "referrer", }, InfluxAPIWrite: "http://127.0.0.1:8086/write", + retags: []*tagPreprocessor{{ + name: "http_url", + regex: regexp.MustCompile(`/[0-9]+-\w+-\w+-\w+-\w+-\w+`), + repl: `/-`, + }, { + name: "http_url", + regex: regexp.MustCompile(`/\w+-\w+-\w+-\w+-\w+`), + repl: `/-`, + }, { + name: "http_url", + regex: regexp.MustCompile(`/[0-9]+`), + repl: `/-`, + }}, }, }} @@ -212,3 +227,60 @@ func TestParseCaptureRequestHeader(t *testing.T) { test.Assert(t, "Config", c.exp, got, true) } } + +func TestParsePreprocessTag(t *testing.T) { + cfg := NewConfig() + + cases := []struct { + desc string + in *ini.Section + exp []*tagPreprocessor + }{{ + desc: "With nil", + }, { + desc: "With unknown key", + in: &ini.Section{ + Vars: []*ini.Variable{{ + KeyLower: "unknown", + }}, + }, + }, { + desc: "With invalid format", + in: &ini.Section{ + Vars: []*ini.Variable{{ + KeyLower: "http_url", + Value: "", + }}, + }, + }, { + desc: "With empty regex", + in: &ini.Section{ + Vars: []*ini.Variable{{ + KeyLower: "http_url", + Value: "=>", + }}, + }, + }, { + desc: "With valid value", + in: &ini.Section{ + Vars: []*ini.Variable{{ + KeyLower: "http_url", + Value: "/[0-9]+ => /-", + }}, + }, + exp: []*tagPreprocessor{{ + name: "http_url", + regex: regexp.MustCompile(`/[0-9]+`), + repl: "/-", + }}, + }} + + for _, c := range cases { + t.Log(c.desc) + + cfg.retags = nil + cfg.parsePreprocessTag(c.in) + + test.Assert(t, "retags", c.exp, cfg.retags, true) + } +} @@ -55,6 +55,8 @@ type Halog struct { // nolint: maligned HTTPURL string HTTPQuery string HTTPProto string + + tagHTTPURL string } // @@ -141,6 +141,13 @@ func (h *Haminer) forwards(halogs []*Halog) { } } +func (h *Haminer) preprocess(halog *Halog) { + halog.tagHTTPURL = halog.HTTPURL + for _, retag := range h.cfg.retags { + halog.tagHTTPURL = retag.preprocess("http_url", halog.tagHTTPURL) + } +} + func (h *Haminer) produce() { halogs := make([]*Halog, 0) @@ -150,6 +157,8 @@ func (h *Haminer) produce() { continue } + h.preprocess(halog) + halogs = append(halogs, halog) if len(halogs) >= h.cfg.MaxBufferedLogs { diff --git a/tagpreprocessor.go b/tagpreprocessor.go new file mode 100644 index 0000000..2be8846 --- /dev/null +++ b/tagpreprocessor.go @@ -0,0 +1,58 @@ +// Copyright 2019, M. Shulhan (ms@kilabit.info). All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package haminer + +import ( + "errors" + "regexp" + "strings" +) + +type tagPreprocessor struct { + name string + regex *regexp.Regexp + repl string +} + +// +// newTagPreprocessor create and initialize replace tag pre-processing. +// The regex and repl strings must be enclosed with double-quote, except for +// repl it can be empty. +// +func newTagPreprocessor(name, regex, repl string) ( + retag *tagPreprocessor, err error, +) { + name = strings.TrimSpace(name) + regex = strings.TrimSpace(regex) + repl = strings.TrimSpace(repl) + + if len(name) == 0 { + return nil, errors.New("newTagPreprocessor: empty name parameter") + } + if len(regex) == 0 { + return nil, errors.New("newTagPreprocessor: empty regex parameter") + } + + re, err := regexp.Compile(regex) + if err != nil { + return nil, err + } + + retag = &tagPreprocessor{ + name: name, + regex: re, + repl: repl, + } + + return retag, nil +} + +func (tagp *tagPreprocessor) preprocess(name, value string) string { + if tagp.name != name { + return value + } + out := tagp.regex.ReplaceAllString(value, tagp.repl) + return out +} diff --git a/tagpreprocessor_test.go b/tagpreprocessor_test.go new file mode 100644 index 0000000..c5f59e9 --- /dev/null +++ b/tagpreprocessor_test.go @@ -0,0 +1,129 @@ +// Copyright 2019, M. Shulhan (ms@kilabit.info). All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package haminer + +import ( + "regexp" + "testing" + + "github.com/shuLhan/share/lib/test" +) + +func TestNewTagPreprocessor(t *testing.T) { + cases := []struct { + desc string + name string + regex string + repl string + exp *tagPreprocessor + expErr string + }{{ + desc: "With empty name", + expErr: "newTagPreprocessor: empty name parameter", + }, { + desc: "With empty regex", + name: "http_url", + expErr: "newTagPreprocessor: empty regex parameter", + }, { + desc: "With invalid regex", + name: "http_url", + regex: `/[a-z]*+`, + expErr: "error parsing regexp: invalid nested repetition operator: `*+`", + }, { + desc: "With valid parameters", + name: "http_url", + regex: `/[0-9]+`, + repl: `/-`, + exp: &tagPreprocessor{ + name: "http_url", + regex: regexp.MustCompile(`/[0-9]+`), + repl: `/-`, + }, + }} + + for _, c := range cases { + t.Log(c.desc) + + got, err := newTagPreprocessor(c.name, c.regex, c.repl) + if err != nil { + test.Assert(t, "error", c.expErr, err.Error(), true) + continue + } + + test.Assert(t, "TagPreprocessor", c.exp, got, true) + } +} + +func TestPreprocess(t *testing.T) { + reIDUUID := regexp.MustCompile(`/[0-9]+-\w+-\w+-\w+-\w+-\w+`) + reUUID := regexp.MustCompile(`/-?\w+-\w+-\w+-\w+-\w+`) + reID := regexp.MustCompile(`/[0-9]+`) + + retags := []*tagPreprocessor{{ + name: "http_url", + regex: reIDUUID, + repl: `/-`, + }, { + name: "http_url", + regex: reUUID, + repl: `/-`, + }, { + name: "http_url", + regex: reID, + repl: `/-`, + }} + + cases := []struct { + desc string + name string + in string + exp string + }{{ + desc: "With empty name", + }, { + desc: "With different name", + name: "tag", + in: "/test/1000", + exp: "/test/1000", + }, { + desc: "With one replacement", + name: "http_url", + in: "/test/1000", + exp: "/test/-", + }, { + desc: "With two replacements", + name: "http_url", + in: "/test/1000/param/9845a0b4-f4c3-4600-af13-45b5b0e61630", + exp: "/test/-/param/-", + }, { + desc: "With three replacements", + name: "http_url", + in: "/group/9845a0b4-f4c3-4600-af13-45b5b0e61630/test/1000/param/1-9845a0b4-f4c3-4600-af13-45b5b0e61630", + exp: "/group/-/test/-/param/-", + }, { + desc: "With invalid UUID", + name: "http_url", + in: `/v1/threads/900001-fefcd79-0b03-4794-ae90-abe4b51dec75/count-previous/90001`, + exp: `/v1/threads/-/count-previous/-`, + }, { + desc: "With missing ID", + name: "http_url", + in: `/v1/threads/-fefcd79-0b03-4794-ae90-abe4b51dec75/count-previous/90001`, + exp: `/v1/threads/-/count-previous/-`, + }} + + for _, c := range cases { + t.Log(c.desc) + + got := c.in + + for _, tagp := range retags { + got = tagp.preprocess(c.name, got) + t.Log("got: ", got) + } + + test.Assert(t, "preprocess", c.exp, got, true) + } +} diff --git a/testdata/haminer.conf b/testdata/haminer.conf index a635dcd..4e3bf5b 100644 --- a/testdata/haminer.conf +++ b/testdata/haminer.conf @@ -3,3 +3,8 @@ listen = 0.0.0.0:8080 accept_backend = ,a , b, capture_request_header = , host, referrer, influxdb_api_write = http://127.0.0.1:8086/write + +[preprocess "tag"] +http_url = /[0-9]+-\\w+-\\w+-\\w+-\\w+-\\w+ => /- +http_url = /\\w+-\\w+-\\w+-\\w+-\\w+ => /- +http_url = /[0-9]+ => /- |
