Skip to content

Commit cd18e56

Browse files
author
Florian Hines
authored
Add regex based kv extraction option (#196)
* Add regex based kv extraction option * add comment * just collect unparsed entries
1 parent 034c09d commit cd18e56

File tree

2 files changed

+72
-56
lines changed

2 files changed

+72
-56
lines changed

parser/user_traffic.go

Lines changed: 52 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -9,39 +9,41 @@ import (
99

1010
//UserTraffic is a single decoded user traffic log line
1111
type UserTraffic struct {
12-
Status int `json:"status"`
13-
RequestSize int64 `json:"request_size"`
14-
ResponseSize int64 `json:"response_size"`
15-
Timing int64 `json:"timing"`
16-
Timestamp time.Time `json:"timestamp"`
17-
RequestID string `json:"request_id"`
18-
Result string `json:"result"`
19-
CSID string `json:"csid"`
20-
CCID string `jsond:"ccid"`
21-
CID string `json:"cid"`
22-
Proto string `json:"proto"`
23-
Method string `json:"method"`
24-
URL string `json:"url"`
25-
SID string `json:"sid"`
26-
AID string `json:"aid"`
27-
DID string `json:"did"`
28-
Cancel string `json:"cancel"`
29-
CCancel string `json:"ccancel"`
30-
ProxyType string `json:"proxy_type"`
31-
FID string `json:"fid"`
32-
ContentType string `json:"content_type"`
33-
Address string `json:"address"`
34-
Country string `json:"country"`
35-
Referrer string `json:"referrer"`
36-
CW string `json:"cw"`
37-
SSLVersion string `json:"ssl_version"`
38-
SSLCipher string `json:"ssl_cipher"`
39-
ENC string `json:"enc"`
40-
UserAgent string `json:"ua"`
41-
Other map[string]string `json:"other"`
12+
Status int `json:"status"`
13+
RequestSize int64 `json:"request_size"`
14+
ResponseSize int64 `json:"response_size"`
15+
Timing int64 `json:"timing"`
16+
Timestamp time.Time `json:"timestamp"`
17+
RequestID string `json:"request_id"`
18+
Result string `json:"result"`
19+
CSID string `json:"csid"`
20+
CCID string `jsond:"ccid"`
21+
CID string `json:"cid"`
22+
Proto string `json:"proto"`
23+
Method string `json:"method"`
24+
URL string `json:"url"`
25+
SID string `json:"sid"`
26+
AID string `json:"aid"`
27+
DID string `json:"did"`
28+
Cancel string `json:"cancel"`
29+
CCancel string `json:"ccancel"`
30+
ProxyType string `json:"proxy_type"`
31+
FID string `json:"fid"`
32+
ContentType string `json:"content_type"`
33+
Address string `json:"address"`
34+
Country string `json:"country"`
35+
Referrer string `json:"referrer"`
36+
CW string `json:"cw"`
37+
SSLVersion string `json:"ssl_version"`
38+
SSLCipher string `json:"ssl_cipher"`
39+
ENC string `json:"enc"`
40+
UserAgent string `json:"ua"`
41+
Unparsed []string `json:"unparsed"`
4242
}
4343

4444
//ParseUserTrafficRecord parses a raw user traffic log line into a UserTraffic struct
45+
//A slice of any unknown kv pairs or unbalanced fields will be appended to the UserTraffic
46+
//structs Unparsed field.
4547
func ParseUserTrafficRecord(raw string) (*UserTraffic, error) {
4648
var ut UserTraffic
4749
var err error
@@ -58,13 +60,14 @@ func ParseUserTrafficRecord(raw string) (*UserTraffic, error) {
5860

5961
for _, field := range strings.Fields(praw[0]) {
6062
parts := strings.SplitN(field, "=", 2)
61-
if len(parts) != 2 {
62-
return nil, fmt.Errorf("found key field with no value: %s", parts)
63+
if len(parts) != 2 { // most commonly due to kv's with duplicate value fields
64+
ut.Unparsed = append(ut.Unparsed, field)
65+
continue
6366
}
6467
switch parts[0] {
6568
case "request_id":
6669
ut.RequestID = parts[1]
67-
case "@timestamp":
70+
case "@timestamp", "timestamp":
6871
tsFloat, err := strconv.ParseFloat(parts[1], 64)
6972
if err != nil {
7073
return nil, fmt.Errorf("malformed field (%s) value: %s", parts[0], parts[1])
@@ -77,11 +80,11 @@ func ParseUserTrafficRecord(raw string) (*UserTraffic, error) {
7780
case "result":
7881
ut.Result = parts[1]
7982
case "csid":
80-
ut.CSID = parts[1]
83+
ut.CSID = strings.TrimSuffix(parts[1], ",")
8184
case "cid":
82-
ut.CID = parts[1]
85+
ut.CID = strings.TrimSuffix(parts[1], ",")
8386
case "ccid":
84-
ut.CCID = parts[1]
87+
ut.CCID = strings.TrimSuffix(parts[1], ",")
8588
case "status":
8689
if ut.Status, err = strconv.Atoi(parts[1]); err != nil {
8790
return nil, fmt.Errorf("malformed field (%s) value: %s", parts[0], parts[1])
@@ -111,34 +114,31 @@ func ParseUserTrafficRecord(raw string) (*UserTraffic, error) {
111114
//proactively handle
112115
ut.AID = strings.TrimSuffix(parts[1], ",")
113116
case "did":
114-
ut.DID = parts[1]
117+
ut.DID = strings.TrimSuffix(parts[1], ",")
115118
case "cancel":
116-
ut.Cancel = parts[1]
119+
ut.Cancel = strings.TrimSuffix(parts[1], ",")
117120
case "proxy_type":
118-
ut.ProxyType = parts[1]
121+
ut.ProxyType = strings.TrimSuffix(parts[1], ",")
119122
case "fid":
120-
ut.FID = parts[1]
123+
ut.FID = strings.TrimSuffix(parts[1], ",")
121124
case "content_type":
122-
ut.ContentType = parts[1]
125+
ut.ContentType = strings.TrimSuffix(parts[1], ",")
123126
case "address":
124-
ut.Address = parts[1]
127+
ut.Address = strings.TrimSuffix(parts[1], ",")
125128
case "country":
126-
ut.Country = parts[1]
129+
ut.Country = strings.TrimSuffix(parts[1], ",")
127130
case "referrer":
128-
ut.Referrer = parts[1]
131+
ut.Referrer = strings.TrimSuffix(parts[1], ",")
129132
case "cw":
130-
ut.CW = parts[1]
133+
ut.CW = strings.TrimSuffix(parts[1], ",")
131134
case "ssl_version":
132-
ut.SSLVersion = parts[1]
135+
ut.SSLVersion = strings.TrimSuffix(parts[1], ",")
133136
case "ssl_cipher":
134-
ut.SSLCipher = parts[1]
137+
ut.SSLCipher = strings.TrimSuffix(parts[1], ",")
135138
case "enc":
136-
ut.ENC = parts[1]
139+
ut.ENC = strings.TrimSuffix(parts[1], ",")
137140
default:
138-
if ut.Other == nil {
139-
ut.Other = make(map[string]string)
140-
}
141-
ut.Other[parts[0]] = parts[1]
141+
ut.Unparsed = append(ut.Unparsed, field)
142142
}
143143
}
144144
return &ut, nil

parser/user_traffic_test.go

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ func TestParseUserTrafficPayload(t *testing.T) {
145145
ENC: "-",
146146
CW: "-",
147147
UserAgent: "Mozilla/5.0 (X11; CrOS x86_64 12239.92.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.136 Safari/537.36",
148-
Other: map[string]string{"stuff": "things", "oneother": "\"onething\""},
148+
Unparsed: []string{"stuff=things", "oneother=\"onething\""},
149149
}
150150

151151
ut, err := ParseUserTrafficRecord(genUserTrafficLine(t, defaultValues()))
@@ -179,6 +179,9 @@ func TestParseUserTrafficPayload(t *testing.T) {
179179
assert.Equal(t, failSafeUT.AID, ut.AID)
180180
assert.Equal(t, failSafeUT.DID, ut.DID)
181181
assert.Equal(t, failSafeUT.Address, ut.Address)
182+
assert.Len(t, ut.Unparsed, 2)
183+
assert.Contains(t, ut.Unparsed, "stuff=things")
184+
assert.Contains(t, ut.Unparsed, "oneother=\"onething\"")
182185
}
183186

184187
func TestSidWithComma(t *testing.T) {
@@ -204,11 +207,12 @@ func TestErrOnExtraTimestamp(t *testing.T) {
204207
require.Nil(t, ut)
205208
}
206209

207-
func TestErrOnKeyWithNoValue(t *testing.T) {
210+
func TestKeyWithNoValue(t *testing.T) {
208211
keyMissingValue := "randomKeyWithNoValue " + genUserTrafficLine(t, defaultValues())
209212
ut, err := ParseUserTrafficRecord(keyMissingValue)
210-
require.Error(t, err)
211-
require.Nil(t, ut)
213+
require.NoError(t, err)
214+
require.Len(t, ut.Unparsed, 3)
215+
require.Contains(t, ut.Unparsed, "randomKeyWithNoValue")
212216
}
213217

214218
func TestMalformedTimestamp(t *testing.T) {
@@ -250,3 +254,15 @@ func TestMalformedResponseSize(t *testing.T) {
250254
require.Error(t, err)
251255
require.Nil(t, ut)
252256
}
257+
258+
func TestDuplicateValueFields(t *testing.T) {
259+
fields := defaultValues()
260+
fields["countryField"] = "US, US"
261+
ut, err := ParseUserTrafficRecord(genUserTrafficLine(t, fields))
262+
require.NoError(t, err)
263+
require.Equal(t, "US", ut.Country)
264+
require.Len(t, ut.Unparsed, 3)
265+
require.Contains(t, ut.Unparsed, "stuff=things")
266+
require.Contains(t, ut.Unparsed, "oneother=\"onething\"")
267+
require.Contains(t, ut.Unparsed, "US")
268+
}

0 commit comments

Comments
 (0)