Skip to content

Commit 089db67

Browse files
naa0yamapront
andauthored
feat(enrichment tables): add network CIDR field to lookup results (#24576)
* feat(geoip enrichment table): add network CIDR field to lookup results (#24411) * chore: add changelog fragment for #24411 * docs(geoip enrichment table): add network and Anonymous-IP fields to ENRICHMENT_TABLE_EXPLAINER (#24411) --------- Co-authored-by: Pavlos Rontidis <pavlos.rontidis@gmail.com>
1 parent 1444b2b commit 089db67

5 files changed

Lines changed: 40 additions & 8 deletions

File tree

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
The `geoip` enrichment table now includes a `network` field containing the CIDR network associated with the lookup result, available for all database types (City, ISP/ASN, Connection-Type, Anonymous-IP).
2+
3+
authors: naa0yama

docs/generated/find_enrichment_table_records.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"anchor": "find_enrichment_table_records",
33
"name": "find_enrichment_table_records",
44
"category": "Enrichment",
5-
"description": "Searches an [enrichment table](/docs/reference/glossary/#enrichment-tables) for rows that match the provided condition.\n\nFor `file` enrichment tables, this condition needs to be a VRL object in which\nthe key-value pairs indicate a field to search mapped to a value to search in that field.\nThis function returns the rows that match the provided condition(s). _All_ fields need to\nmatch for rows to be returned; if any fields do not match, then no rows are returned.\n\nThere are three forms of search criteria:\n\n1. **Exact match search**. The given field must match the value exactly. Case sensitivity\n can be specified using the `case_sensitive` argument. An exact match search can use an\n index directly into the dataset, which should make this search fairly \"cheap\" from a\n performance perspective.\n\n2. **Wildcard match search**. The given fields specified by the exact match search may also\n be matched exactly to the value provided to the `wildcard` parameter.\n A wildcard match search can also use an index directly into the dataset.\n\n3. **Date range search**. The given field must be greater than or equal to the `from` date\n and/or less than or equal to the `to` date. A date range search involves\n sequentially scanning through the rows that have been located using any exact match\n criteria. This can be an expensive operation if there are many rows returned by any exact\n match criteria. Therefore, use date ranges as the _only_ criteria when the enrichment\n data set is very small.\n\nFor `geoip` and `mmdb` enrichment tables, this condition needs to be a VRL object with a single key-value pair\nwhose value needs to be a valid IP address. Example: `{\"ip\": .ip }`. If a return field is expected\nand without a value, `null` is used. This table can return the following fields:\n\n* ISP databases:\n * `autonomous_system_number`\n * `autonomous_system_organization`\n * `isp`\n * `organization`\n\n* City databases:\n * `city_name`\n * `continent_code`\n * `country_code`\n * `country_name`\n * `region_code`\n * `region_name`\n * `metro_code`\n * `latitude`\n * `longitude`\n * `postal_code`\n * `timezone`\n\n* Connection-Type databases:\n * `connection_type`\n\nTo use this function, you need to update your configuration to\ninclude an\n[`enrichment_tables`](/docs/reference/configuration/global-options/#enrichment_tables)\nparameter.",
5+
"description": "Searches an [enrichment table](/docs/reference/glossary/#enrichment-tables) for rows that match the provided condition.\n\nFor `file` enrichment tables, this condition needs to be a VRL object in which\nthe key-value pairs indicate a field to search mapped to a value to search in that field.\nThis function returns the rows that match the provided condition(s). _All_ fields need to\nmatch for rows to be returned; if any fields do not match, then no rows are returned.\n\nThere are three forms of search criteria:\n\n1. **Exact match search**. The given field must match the value exactly. Case sensitivity\n can be specified using the `case_sensitive` argument. An exact match search can use an\n index directly into the dataset, which should make this search fairly \"cheap\" from a\n performance perspective.\n\n2. **Wildcard match search**. The given fields specified by the exact match search may also\n be matched exactly to the value provided to the `wildcard` parameter.\n A wildcard match search can also use an index directly into the dataset.\n\n3. **Date range search**. The given field must be greater than or equal to the `from` date\n and/or less than or equal to the `to` date. A date range search involves\n sequentially scanning through the rows that have been located using any exact match\n criteria. This can be an expensive operation if there are many rows returned by any exact\n match criteria. Therefore, use date ranges as the _only_ criteria when the enrichment\n data set is very small.\n\nFor `geoip` and `mmdb` enrichment tables, this condition needs to be a VRL object with a single key-value pair\nwhose value needs to be a valid IP address. Example: `{\"ip\": .ip }`. If a return field is expected\nand without a value, `null` is used. This table can return the following fields:\n\n* ISP databases:\n * `autonomous_system_number`\n * `autonomous_system_organization`\n * `isp`\n * `organization`\n * `network`\n\n* City databases:\n * `city_name`\n * `continent_code`\n * `country_code`\n * `country_name`\n * `region_code`\n * `region_name`\n * `metro_code`\n * `latitude`\n * `longitude`\n * `postal_code`\n * `timezone`\n * `network`\n\n* Connection-Type databases:\n * `connection_type`\n * `network`\n\n* Anonymous-IP databases:\n * `is_anonymous`\n * `is_anonymous_vpn`\n * `is_hosting_provider`\n * `is_public_proxy`\n * `is_residential_proxy`\n * `is_tor_exit_node`\n * `network`\n\nTo use this function, you need to update your configuration to\ninclude an\n[`enrichment_tables`](/docs/reference/configuration/global-options/#enrichment_tables)\nparameter.",
66
"arguments": [
77
{
88
"name": "table",

docs/generated/get_enrichment_table_record.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"anchor": "get_enrichment_table_record",
33
"name": "get_enrichment_table_record",
44
"category": "Enrichment",
5-
"description": "Searches an [enrichment table](/docs/reference/glossary/#enrichment-tables) for a row that matches the provided condition. A single row must be matched. If no rows are found or more than one row is found, an error is returned.\n\nFor `file` enrichment tables, this condition needs to be a VRL object in which\nthe key-value pairs indicate a field to search mapped to a value to search in that field.\nThis function returns the rows that match the provided condition(s). _All_ fields need to\nmatch for rows to be returned; if any fields do not match, then no rows are returned.\n\nThere are three forms of search criteria:\n\n1. **Exact match search**. The given field must match the value exactly. Case sensitivity\n can be specified using the `case_sensitive` argument. An exact match search can use an\n index directly into the dataset, which should make this search fairly \"cheap\" from a\n performance perspective.\n\n2. **Wildcard match search**. The given fields specified by the exact match search may also\n be matched exactly to the value provided to the `wildcard` parameter.\n A wildcard match search can also use an index directly into the dataset.\n\n3. **Date range search**. The given field must be greater than or equal to the `from` date\n and/or less than or equal to the `to` date. A date range search involves\n sequentially scanning through the rows that have been located using any exact match\n criteria. This can be an expensive operation if there are many rows returned by any exact\n match criteria. Therefore, use date ranges as the _only_ criteria when the enrichment\n data set is very small.\n\nFor `geoip` and `mmdb` enrichment tables, this condition needs to be a VRL object with a single key-value pair\nwhose value needs to be a valid IP address. Example: `{\"ip\": .ip }`. If a return field is expected\nand without a value, `null` is used. This table can return the following fields:\n\n* ISP databases:\n * `autonomous_system_number`\n * `autonomous_system_organization`\n * `isp`\n * `organization`\n\n* City databases:\n * `city_name`\n * `continent_code`\n * `country_code`\n * `country_name`\n * `region_code`\n * `region_name`\n * `metro_code`\n * `latitude`\n * `longitude`\n * `postal_code`\n * `timezone`\n\n* Connection-Type databases:\n * `connection_type`\n\nTo use this function, you need to update your configuration to\ninclude an\n[`enrichment_tables`](/docs/reference/configuration/global-options/#enrichment_tables)\nparameter.",
5+
"description": "Searches an [enrichment table](/docs/reference/glossary/#enrichment-tables) for a row that matches the provided condition. A single row must be matched. If no rows are found or more than one row is found, an error is returned.\n\nFor `file` enrichment tables, this condition needs to be a VRL object in which\nthe key-value pairs indicate a field to search mapped to a value to search in that field.\nThis function returns the rows that match the provided condition(s). _All_ fields need to\nmatch for rows to be returned; if any fields do not match, then no rows are returned.\n\nThere are three forms of search criteria:\n\n1. **Exact match search**. The given field must match the value exactly. Case sensitivity\n can be specified using the `case_sensitive` argument. An exact match search can use an\n index directly into the dataset, which should make this search fairly \"cheap\" from a\n performance perspective.\n\n2. **Wildcard match search**. The given fields specified by the exact match search may also\n be matched exactly to the value provided to the `wildcard` parameter.\n A wildcard match search can also use an index directly into the dataset.\n\n3. **Date range search**. The given field must be greater than or equal to the `from` date\n and/or less than or equal to the `to` date. A date range search involves\n sequentially scanning through the rows that have been located using any exact match\n criteria. This can be an expensive operation if there are many rows returned by any exact\n match criteria. Therefore, use date ranges as the _only_ criteria when the enrichment\n data set is very small.\n\nFor `geoip` and `mmdb` enrichment tables, this condition needs to be a VRL object with a single key-value pair\nwhose value needs to be a valid IP address. Example: `{\"ip\": .ip }`. If a return field is expected\nand without a value, `null` is used. This table can return the following fields:\n\n* ISP databases:\n * `autonomous_system_number`\n * `autonomous_system_organization`\n * `isp`\n * `organization`\n * `network`\n\n* City databases:\n * `city_name`\n * `continent_code`\n * `country_code`\n * `country_name`\n * `region_code`\n * `region_name`\n * `metro_code`\n * `latitude`\n * `longitude`\n * `postal_code`\n * `timezone`\n * `network`\n\n* Connection-Type databases:\n * `connection_type`\n * `network`\n\n* Anonymous-IP databases:\n * `is_anonymous`\n * `is_anonymous_vpn`\n * `is_hosting_provider`\n * `is_public_proxy`\n * `is_residential_proxy`\n * `is_tor_exit_node`\n * `network`\n\nTo use this function, you need to update your configuration to\ninclude an\n[`enrichment_tables`](/docs/reference/configuration/global-options/#enrichment_tables)\nparameter.",
66
"arguments": [
77
{
88
"name": "table",

lib/vector-vrl/enrichment/src/lib.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ pub(crate) const ENRICHMENT_TABLE_EXPLAINER: &str = indoc! {r#"
185185
* `autonomous_system_organization`
186186
* `isp`
187187
* `organization`
188+
* `network`
188189
189190
* City databases:
190191
* `city_name`
@@ -198,9 +199,20 @@ pub(crate) const ENRICHMENT_TABLE_EXPLAINER: &str = indoc! {r#"
198199
* `longitude`
199200
* `postal_code`
200201
* `timezone`
202+
* `network`
201203
202204
* Connection-Type databases:
203205
* `connection_type`
206+
* `network`
207+
208+
* Anonymous-IP databases:
209+
* `is_anonymous`
210+
* `is_anonymous_vpn`
211+
* `is_hosting_provider`
212+
* `is_public_proxy`
213+
* `is_residential_proxy`
214+
* `is_tor_exit_node`
215+
* `network`
204216
205217
To use this function, you need to update your configuration to
206218
include an

src/enrichment_tables/geoip.rs

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,15 @@ pub struct Geoip {
118118
fn lookup_value<'de, A: Deserialize<'de>>(
119119
dbreader: &'de Reader<Vec<u8>>,
120120
address: IpAddr,
121-
) -> crate::Result<Option<A>> {
121+
) -> crate::Result<Option<(A, String)>> {
122122
let result = dbreader.lookup(address)?;
123-
Ok(result.decode::<A>()?)
123+
match result.decode::<A>()? {
124+
Some(data) => {
125+
let network = result.network()?.to_string();
126+
Ok(Some((data, network)))
127+
}
128+
None => Ok(None),
129+
}
124130
}
125131

126132
impl Geoip {
@@ -174,7 +180,7 @@ impl Geoip {
174180

175181
match self.dbkind {
176182
DatabaseKind::Asn | DatabaseKind::Isp => {
177-
let data = lookup_value::<Isp>(&self.dbreader, ip).ok()??;
183+
let (data, network) = lookup_value::<Isp>(&self.dbreader, ip).ok()??;
178184

179185
add_field!("autonomous_system_number", data.autonomous_system_number);
180186
add_field!(
@@ -183,9 +189,11 @@ impl Geoip {
183189
);
184190
add_field!("isp", data.isp);
185191
add_field!("organization", data.organization);
192+
add_field!("network", Some(network));
186193
}
187194
DatabaseKind::City => {
188-
let data: City = lookup_value::<City>(&self.dbreader, ip).ok()??;
195+
let (data, network): (City, String) =
196+
lookup_value::<City>(&self.dbreader, ip).ok()??;
189197

190198
add_field!("city_name", self.take_translation(&data.city.names));
191199

@@ -223,21 +231,24 @@ impl Geoip {
223231
subdivision.and_then(|subdivision| subdivision.iso_code)
224232
);
225233
add_field!("postal_code", data.postal.code);
234+
add_field!("network", Some(network));
226235
}
227236
DatabaseKind::ConnectionType => {
228-
let data = lookup_value::<ConnectionType>(&self.dbreader, ip).ok()??;
237+
let (data, network) = lookup_value::<ConnectionType>(&self.dbreader, ip).ok()??;
229238

230239
add_field!("connection_type", data.connection_type);
240+
add_field!("network", Some(network));
231241
}
232242
DatabaseKind::AnonymousIp => {
233-
let data = lookup_value::<AnonymousIp>(&self.dbreader, ip).ok()??;
243+
let (data, network) = lookup_value::<AnonymousIp>(&self.dbreader, ip).ok()??;
234244

235245
add_field!("is_anonymous", data.is_anonymous);
236246
add_field!("is_anonymous_vpn", data.is_anonymous_vpn);
237247
add_field!("is_hosting_provider", data.is_hosting_provider);
238248
add_field!("is_public_proxy", data.is_public_proxy);
239249
add_field!("is_residential_proxy", data.is_residential_proxy);
240250
add_field!("is_tor_exit_node", data.is_tor_exit_node);
251+
add_field!("network", Some(network));
241252
}
242253
}
243254

@@ -367,6 +378,7 @@ mod tests {
367378
expected.insert("longitude".into(), Value::from(-1.25));
368379
expected.insert("postal_code".into(), "OX1".into());
369380
expected.insert("metro_code".into(), Value::Null);
381+
expected.insert("network".into(), "2.125.160.216/29".into());
370382

371383
assert_eq!(values, expected);
372384
}
@@ -403,6 +415,7 @@ mod tests {
403415
expected.insert("longitude".into(), Value::from(90.5));
404416
expected.insert("postal_code".into(), Value::Null);
405417
expected.insert("metro_code".into(), Value::Null);
418+
expected.insert("network".into(), "67.43.156.0/24".into());
406419

407420
assert_eq!(values, expected);
408421
}
@@ -426,6 +439,7 @@ mod tests {
426439
);
427440
expected.insert("isp".into(), "Verizon Business".into());
428441
expected.insert("organization".into(), "Verizon Business".into());
442+
expected.insert("network".into(), "208.192.0.0/10".into());
429443

430444
assert_eq!(values, expected);
431445
}
@@ -442,6 +456,7 @@ mod tests {
442456
);
443457
expected.insert("isp".into(), Value::Null);
444458
expected.insert("organization".into(), Value::Null);
459+
expected.insert("network".into(), "2600:7000::/24".into());
445460

446461
assert_eq!(values, expected);
447462
}
@@ -463,6 +478,7 @@ mod tests {
463478

464479
let mut expected = ObjectMap::new();
465480
expected.insert("connection_type".into(), "Corporate".into());
481+
expected.insert("network".into(), "201.243.200.0/24".into());
466482

467483
assert_eq!(values, expected);
468484
}
@@ -494,6 +510,7 @@ mod tests {
494510
expected.insert("is_tor_exit_node".into(), true.into());
495511
expected.insert("is_public_proxy".into(), Value::Null);
496512
expected.insert("is_residential_proxy".into(), Value::Null);
513+
expected.insert("network".into(), "101.99.92.179/32".into());
497514

498515
assert_eq!(values, expected);
499516
}

0 commit comments

Comments
 (0)