Skip to content

Commit 7c4d76c

Browse files
Drop pipeline intermediates, fix multi-IP grok, restore file.hashes
- is_alert (notice 2004, suricata 2004): string-builder writes directly to `ocsf.is_alert`; grok-parser converts in place. Drops the `_is_alert_str` intermediate. - DNS answers: stringify directly into `ocsf.answer`; grok extracts `ocsf.answer.rdata` via `a %{data:ocsf.answer.rdata}(,%{data})?` so the comma-separated multi-IP form parses correctly. Drops the `_answers_str` intermediate. - File Hosting tx/rx hosts: stringify directly into `ocsf.{src,dst}_endpoint`; grok extracts `.ip` via `g %{ip:ocsf.{src,dst}_endpoint.ip}(,%{data})?` for multi-IP. Drops the `_tx_hosts_str`/`_rx_hosts_str` intermediates. - Connection 4001: arithmetic-processor writes total bytes directly to `ocsf.traffic.bytes`; the schema-processor remapper becomes a self-map. Drops the `_total_bytes` intermediate (matches the earlier _total_packets/_duration_ms cleanup). - Restore `ocsf.file.hashes`: build `tmp_md5`/`tmp_sha1`/`tmp_sha256` fingerprint objects (algorithm name, integer algorithm_id, value), array-processor append each into `ocsf.file.hashes`, and self-map the array inside the 6006 schema-processor. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent d59cf2a commit 7c4d76c

2 files changed

Lines changed: 167 additions & 34 deletions

File tree

zeek/assets/logs/zeek.yaml

Lines changed: 152 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1698,12 +1698,12 @@ pipeline:
16981698
name: Set is_alert to boolean true
16991699
enabled: true
17001700
template: "true"
1701-
target: _is_alert_str
1701+
target: ocsf.is_alert
17021702
replaceMissing: false
17031703
- type: grok-parser
1704-
name: Convert _is_alert_str to boolean ocsf.is_alert
1704+
name: Convert ocsf.is_alert string to boolean
17051705
enabled: true
1706-
source: _is_alert_str
1706+
source: ocsf.is_alert
17071707
samples:
17081708
- "true"
17091709
grok:
@@ -1913,12 +1913,12 @@ pipeline:
19131913
name: Set is_alert to boolean true
19141914
enabled: true
19151915
template: "true"
1916-
target: _is_alert_str
1916+
target: ocsf.is_alert
19171917
replaceMissing: false
19181918
- type: grok-parser
1919-
name: Convert _is_alert_str to boolean ocsf.is_alert
1919+
name: Convert ocsf.is_alert string to boolean
19201920
enabled: true
1921-
source: _is_alert_str
1921+
source: ocsf.is_alert
19221922
samples:
19231923
- "true"
19241924
grok:
@@ -2163,7 +2163,7 @@ pipeline:
21632163
name: Calculate total bytes
21642164
enabled: true
21652165
expression: (orig_bytes + resp_bytes)
2166-
target: _total_bytes
2166+
target: ocsf.traffic.bytes
21672167
isReplaceMissing: false
21682168
- type: arithmetic-processor
21692169
name: Calculate total packets
@@ -2392,11 +2392,11 @@ pipeline:
23922392
overrideOnConflict: true
23932393
targetFormat: integer
23942394
- type: schema-remapper
2395-
name: Map `_total_bytes` to `ocsf.traffic.bytes`
2395+
name: Map `ocsf.traffic.bytes` to `ocsf.traffic.bytes`
23962396
sources:
2397-
- _total_bytes
2397+
- ocsf.traffic.bytes
23982398
target: ocsf.traffic.bytes
2399-
preserveSource: false
2399+
preserveSource: true
24002400
overrideOnConflict: true
24012401
targetFormat: integer
24022402
- type: schema-remapper
@@ -3115,20 +3115,21 @@ pipeline:
31153115
query: "@_path:(dns OR dns_red)"
31163116
processors:
31173117
- type: string-builder-processor
3118-
name: Stringify answers
3118+
name: Stringify answers into ocsf.answer
31193119
enabled: true
31203120
template: "%{answers}"
3121-
target: _answers_str
3121+
target: ocsf.answer
31223122
replaceMissing: false
31233123
- type: grok-parser
31243124
name: Extract first answer into ocsf.answer.rdata
31253125
enabled: true
3126-
source: _answers_str
3126+
source: ocsf.answer
31273127
samples:
3128-
- '["185.64.148.0"]'
3128+
- "185.64.148.0"
3129+
- "185.64.148.0,8.8.8.8"
31293130
grok:
31303131
supportRules: ""
3131-
matchRules: 'a \[?"?%{notSpace:ocsf.answer.rdata}"?'
3132+
matchRules: 'a %{data:ocsf.answer.rdata}(,%{data})?'
31323133
- type: array-processor
31333134
name: Append ocsf.answer into ocsf.answers array
31343135
enabled: true
@@ -3412,35 +3413,154 @@ pipeline:
34123413
query: "@_path:(files OR files_red)"
34133414
processors:
34143415
- type: string-builder-processor
3415-
name: Stringify tx_hosts
3416+
name: Stringify tx_hosts into ocsf.src_endpoint
34163417
enabled: true
34173418
template: "%{tx_hosts}"
3418-
target: _tx_hosts_str
3419+
target: ocsf.src_endpoint
34193420
replaceMissing: false
34203421
- type: string-builder-processor
3421-
name: Stringify rx_hosts
3422+
name: Stringify rx_hosts into ocsf.dst_endpoint
34223423
enabled: true
34233424
template: "%{rx_hosts}"
3424-
target: _rx_hosts_str
3425+
target: ocsf.dst_endpoint
34253426
replaceMissing: false
34263427
- type: grok-parser
34273428
name: Extract first IP from tx_hosts
34283429
enabled: true
3429-
source: _tx_hosts_str
3430+
source: ocsf.src_endpoint
34303431
samples:
3431-
- '["10.104.10.60"]'
3432+
- "10.104.10.60"
3433+
- "10.104.10.60,10.104.10.61"
34323434
grok:
34333435
supportRules: ""
3434-
matchRules: 'g \[?"?%{ip:ocsf.src_endpoint.ip}"?'
3436+
matchRules: 'g %{ip:ocsf.src_endpoint.ip}(,%{data})?'
34353437
- type: grok-parser
34363438
name: Extract first IP from rx_hosts
34373439
enabled: true
3438-
source: _rx_hosts_str
3440+
source: ocsf.dst_endpoint
3441+
samples:
3442+
- "10.104.10.65"
3443+
- "10.104.10.65,10.104.10.66"
3444+
grok:
3445+
supportRules: ""
3446+
matchRules: 'g %{ip:ocsf.dst_endpoint.ip}(,%{data})?'
3447+
- type: string-builder-processor
3448+
name: Set MD5 algorithm name
3449+
enabled: true
3450+
template: MD5
3451+
target: tmp_md5.algorithm
3452+
replaceMissing: false
3453+
- type: string-builder-processor
3454+
name: Set MD5 algorithm id
3455+
enabled: true
3456+
template: "1"
3457+
target: tmp_md5.algorithm_id
3458+
replaceMissing: false
3459+
- type: grok-parser
3460+
name: Coerce tmp_md5.algorithm_id to integer
3461+
enabled: true
3462+
source: tmp_md5.algorithm_id
34393463
samples:
3440-
- '["10.104.10.65"]'
3464+
- "1"
34413465
grok:
34423466
supportRules: ""
3443-
matchRules: 'g \[?"?%{ip:ocsf.dst_endpoint.ip}"?'
3467+
matchRules: "to_int %{integer:tmp_md5.algorithm_id}"
3468+
- type: attribute-remapper
3469+
name: Map `md5` to `tmp_md5.value`
3470+
enabled: true
3471+
sources:
3472+
- md5
3473+
sourceType: attribute
3474+
target: tmp_md5.value
3475+
targetType: attribute
3476+
preserveSource: true
3477+
overrideOnConflict: false
3478+
- type: array-processor
3479+
name: Append tmp_md5 to ocsf.file.hashes
3480+
enabled: true
3481+
operation:
3482+
source: tmp_md5
3483+
target: ocsf.file.hashes
3484+
preserveSource: false
3485+
type: append
3486+
- type: string-builder-processor
3487+
name: Set SHA1 algorithm name
3488+
enabled: true
3489+
template: SHA-1
3490+
target: tmp_sha1.algorithm
3491+
replaceMissing: false
3492+
- type: string-builder-processor
3493+
name: Set SHA1 algorithm id
3494+
enabled: true
3495+
template: "2"
3496+
target: tmp_sha1.algorithm_id
3497+
replaceMissing: false
3498+
- type: grok-parser
3499+
name: Coerce tmp_sha1.algorithm_id to integer
3500+
enabled: true
3501+
source: tmp_sha1.algorithm_id
3502+
samples:
3503+
- "2"
3504+
grok:
3505+
supportRules: ""
3506+
matchRules: "to_int %{integer:tmp_sha1.algorithm_id}"
3507+
- type: attribute-remapper
3508+
name: Map `sha1` to `tmp_sha1.value`
3509+
enabled: true
3510+
sources:
3511+
- sha1
3512+
sourceType: attribute
3513+
target: tmp_sha1.value
3514+
targetType: attribute
3515+
preserveSource: true
3516+
overrideOnConflict: false
3517+
- type: array-processor
3518+
name: Append tmp_sha1 to ocsf.file.hashes
3519+
enabled: true
3520+
operation:
3521+
source: tmp_sha1
3522+
target: ocsf.file.hashes
3523+
preserveSource: false
3524+
type: append
3525+
- type: string-builder-processor
3526+
name: Set SHA256 algorithm name
3527+
enabled: true
3528+
template: SHA-256
3529+
target: tmp_sha256.algorithm
3530+
replaceMissing: false
3531+
- type: string-builder-processor
3532+
name: Set SHA256 algorithm id
3533+
enabled: true
3534+
template: "3"
3535+
target: tmp_sha256.algorithm_id
3536+
replaceMissing: false
3537+
- type: grok-parser
3538+
name: Coerce tmp_sha256.algorithm_id to integer
3539+
enabled: true
3540+
source: tmp_sha256.algorithm_id
3541+
samples:
3542+
- "3"
3543+
grok:
3544+
supportRules: ""
3545+
matchRules: "to_int %{integer:tmp_sha256.algorithm_id}"
3546+
- type: attribute-remapper
3547+
name: Map `sha256` to `tmp_sha256.value`
3548+
enabled: true
3549+
sources:
3550+
- sha256
3551+
sourceType: attribute
3552+
target: tmp_sha256.value
3553+
targetType: attribute
3554+
preserveSource: true
3555+
overrideOnConflict: false
3556+
- type: array-processor
3557+
name: Append tmp_sha256 to ocsf.file.hashes
3558+
enabled: true
3559+
operation:
3560+
source: tmp_sha256
3561+
target: ocsf.file.hashes
3562+
preserveSource: false
3563+
type: append
34443564
- type: schema-processor
34453565
name: Apply OCSF schema for 6006
34463566
enabled: true
@@ -3477,6 +3597,13 @@ pipeline:
34773597
target: ocsf.dst_endpoint.ip
34783598
preserveSource: true
34793599
overrideOnConflict: true
3600+
- type: schema-remapper
3601+
name: Map `ocsf.file.hashes` to `ocsf.file.hashes`
3602+
sources:
3603+
- ocsf.file.hashes
3604+
target: ocsf.file.hashes
3605+
preserveSource: true
3606+
overrideOnConflict: true
34803607
- type: schema-remapper
34813608
name: Map `mime_type` to `ocsf.file.mime_type`
34823609
sources:

zeek/assets/logs/zeek_tests.yaml

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ tests:
311311
"rejected" : false,
312312
"query" : "win2k16-1-159",
313313
"_write_ts" : "2023-12-12T05:52:50.756358Z",
314-
"answers" : [ "185.64.148.0" ],
314+
"answers" : [ "185.64.148.0", "8.8.8.8" ],
315315
"trans_id" : 38706,
316316
"rcode" : 0,
317317
"_path" : "dns",
@@ -344,7 +344,8 @@ tests:
344344
dns:
345345
answer:
346346
name:
347-
- "185.64.148.0"
347+
- 185.64.148.0
348+
- 8.8.8.8
348349
flags:
349350
rcode: "NOERROR"
350351
id: 38706
@@ -420,6 +421,7 @@ tests:
420421
query: win2k16-1-159
421422
answers:
422423
- 185.64.148.0
424+
- 8.8.8.8
423425
trans_id: 38706
424426
rcode_name: NOERROR
425427
proto: udp
@@ -428,15 +430,14 @@ tests:
428430
resp_h: 185.64.148.0
429431
orig_h: 185.64.148.0
430432
resp_p: 5355
431-
_answers_str: 185.64.148.0
432433
message: |-
433434
{
434435
"AA" : false,
435436
"TTLs" : [ 30.0 ],
436437
"rejected" : false,
437438
"query" : "win2k16-1-159",
438439
"_write_ts" : "2023-12-12T05:52:50.756358Z",
439-
"answers" : [ "185.64.148.0" ],
440+
"answers" : [ "185.64.148.0", "8.8.8.8" ],
440441
"trans_id" : 38706,
441442
"rcode" : 0,
442443
"_path" : "dns",
@@ -994,7 +995,6 @@ tests:
994995
resp_h: 185.64.148.0
995996
orig_h: 185.64.148.0
996997
resp_p: 5355
997-
_answers_str: 185.64.148.0
998998
service: dns
999999
message: <134>Dec 12 05:52:50 machine-name {"_path":"dns","_write_ts":"2023-12-12T05:52:50.756358Z","ts":"2023-12-12T05:52:32.763303Z","uid":"CsOSdHqRMu62rNs31","id.orig_h":"185.64.148.0","id.orig_p":58013,"id.resp_h":"185.64.148.0","id.resp_p":5355,"proto":"udp","trans_id":38706,"rcode":0,"rcode_name":"NOERROR","query":"win2k16-1-159","answers":["185.64.148.0"],"TTLs":[30.0],"AA":false,"TC":false,"RD":false,"RA":false,"Z":0,"rejected":false}
10001000
tags:
@@ -1274,7 +1274,6 @@ tests:
12741274
severity:
12751275
name: High
12761276
id: 4
1277-
_is_alert_str: 'true'
12781277
id:
12791278
orig_p: 54321
12801279
resp_h: 192.168.1.1
@@ -1295,7 +1294,6 @@ tests:
12951294
_write_ts: '2026-05-11T17:59:59.359532Z'
12961295
suri_id: SOHaIDWJ5dBe
12971296
_path: suricata_corelight
1298-
_is_alert_str: 'true'
12991297
tx_id: 0
13001298
network:
13011299
destination:
@@ -1411,6 +1409,16 @@ tests:
14111409
mime_type: text/json
14121410
type_id: 1
14131411
name: FOPDsn3PdkiZsljcj2
1412+
hashes:
1413+
- algorithm_id: 1
1414+
value: 6e6ae0ed19f595687684faafae5499e13
1415+
algorithm: MD5
1416+
- algorithm_id: 2
1417+
value: f6578daa6d398c91398888b91a96d4c0e099c79c
1418+
algorithm: SHA-1
1419+
- algorithm_id: 3
1420+
value: a7d5f44561e9707b3faf6ca1cdec4823e6625dd1c3aba2b7395697d65b47dc8f
1421+
algorithm: SHA-256
14141422
status_id: 1
14151423
class_uid: 6006
14161424
activity_id: 2
@@ -1453,8 +1461,6 @@ tests:
14531461
total_bytes: 253109
14541462
seen_bytes: 253109
14551463
missing_bytes: 0
1456-
_tx_hosts_str: 10.104.10.60
1457-
_rx_hosts_str: 10.104.10.65
14581464
service: files
14591465
message: <134>May 11 19:26:26 ndr-dub-stryker-DC-1 {"_path":"files","_system_name":"ndr-dub-stryker-DC-1","_write_ts":"2026-05-11T19:26:26.082433Z","ts":"2026-05-11T19:26:25.875206Z","uid":"CjTuQU17IDvaVa8Nq2","fuid":"FOPDsn3PdkiZsljcj2","tx_hosts":["10.104.10.60"],"rx_hosts":["10.104.10.65"],"conn_uids":["CjTuQU17IDvaGb8Nq2"],"source":"HTTP","depth":0,"analyzers":["SHA1","MD5","SHA256","DATA_EVENT"],"local_orig":true,"is_orig":false,"seen_bytes":253109,"total_bytes":253109,"missing_bytes":0,"overflow_bytes":0,"timedout":false,"duration":0.2072269916534424,"mime_type":"text/json","md5":"6e6ae0ed19f595687684faafae5499e13","sha1":"f6578daa6d398c91398888b91a96d4c0e099c79c","sha256":"a7d5f44561e9707b3faf6ca1cdec4823e6625dd1c3aba2b7395697d65b47dc8f","id.vlan":1010}
14601466
tags:

0 commit comments

Comments
 (0)