Skip to content

Commit fd1b169

Browse files
committed
fix(cli): validate transform expressions without environment
1 parent 3d6b49f commit fd1b169

7 files changed

Lines changed: 207 additions & 81 deletions

File tree

lib/vector-vrl/enrichment/src/tables.rs

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,31 @@ impl TableRegistry {
139139
}
140140
}
141141

142+
/// Load compile-only table placeholders into the registry.
143+
///
144+
/// These placeholders are useful when compiling VRL in contexts that must
145+
/// not load external enrichment table data. Compilation only needs table
146+
/// names and index registration; runtime lookups still fail against these
147+
/// placeholders.
148+
pub fn load_compile_time_table_ids<I, S>(&self, table_ids: I)
149+
where
150+
I: IntoIterator<Item = S>,
151+
S: Into<String>,
152+
{
153+
let tables = table_ids
154+
.into_iter()
155+
.map(|table_id| {
156+
let table_id = table_id.into();
157+
(
158+
table_id.clone(),
159+
Box::new(CompileTimeTable::new(table_id)) as Box<dyn Table + Send + Sync>,
160+
)
161+
})
162+
.collect();
163+
164+
self.load(tables);
165+
}
166+
142167
/// Adds an index to the given Enrichment Table.
143168
///
144169
/// If we are in the reading stage, this function will error.
@@ -204,6 +229,68 @@ impl std::fmt::Debug for TableRegistry {
204229
}
205230
}
206231

232+
#[derive(Clone, Debug)]
233+
struct CompileTimeTable {
234+
name: String,
235+
indexes: Vec<(Case, Vec<String>)>,
236+
}
237+
238+
impl CompileTimeTable {
239+
fn new(name: String) -> Self {
240+
Self {
241+
name,
242+
indexes: Vec::new(),
243+
}
244+
}
245+
246+
fn lookup_error(&self) -> Error {
247+
Error::TableNotLoaded {
248+
table: self.name.clone(),
249+
}
250+
}
251+
}
252+
253+
impl Table for CompileTimeTable {
254+
fn find_table_row<'a>(
255+
&self,
256+
_case: Case,
257+
_condition: &'a [Condition<'a>],
258+
_select: Option<&[String]>,
259+
_wildcard: Option<&Value>,
260+
_index: Option<IndexHandle>,
261+
) -> Result<ObjectMap, Error> {
262+
Err(self.lookup_error())
263+
}
264+
265+
fn find_table_rows<'a>(
266+
&self,
267+
_case: Case,
268+
_condition: &'a [Condition<'a>],
269+
_select: Option<&[String]>,
270+
_wildcard: Option<&Value>,
271+
_index: Option<IndexHandle>,
272+
) -> Result<Vec<ObjectMap>, Error> {
273+
Err(self.lookup_error())
274+
}
275+
276+
fn add_index(&mut self, case: Case, fields: &[&str]) -> Result<IndexHandle, Error> {
277+
let handle = IndexHandle(self.indexes.len());
278+
self.indexes.push((
279+
case,
280+
fields.iter().map(|field| (*field).to_owned()).collect(),
281+
));
282+
Ok(handle)
283+
}
284+
285+
fn index_fields(&self) -> Vec<(Case, Vec<String>)> {
286+
self.indexes.clone()
287+
}
288+
289+
fn needs_reload(&self) -> bool {
290+
false
291+
}
292+
}
293+
207294
/// Provides read only access to the enrichment tables via the
208295
/// `vrl::EnrichmentTableSearch` trait. Cloning this object is designed to be
209296
/// cheap. The underlying data will be shared by all clones.

src/conditions/mod.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,14 @@ impl AnyCondition {
210210
AnyCondition::Map(m) => m.build(enrichment_tables, metrics_storage),
211211
}
212212
}
213+
214+
pub fn validate(
215+
&self,
216+
enrichment_tables: &vector_lib::enrichment::TableRegistry,
217+
metrics_storage: &MetricsStorage,
218+
) -> crate::Result<()> {
219+
self.build(enrichment_tables, metrics_storage).map(|_| ())
220+
}
213221
}
214222

215223
impl From<ConditionConfig> for AnyCondition {

src/config/transform.rs

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,12 @@ pub struct TransformContext {
162162
/// (e.g. `aws_ec2_metadata`, `throttle`) clone this and pass it to [`crate::cpu_time::spawn_timed`] so
163163
/// their CPU is attributed to the component alongside the main transform task.
164164
pub cpu_ns: Option<Counter>,
165+
166+
/// Whether transform construction should skip checks that require the running environment.
167+
///
168+
/// This is set by `vector validate --no-environment` when it builds transforms to validate
169+
/// configuration that is normally checked in `build`.
170+
pub skip_environment_checks: bool,
165171
}
166172

167173
impl Default for TransformContext {
@@ -176,6 +182,7 @@ impl Default for TransformContext {
176182
schema: SchemaOptions::default(),
177183
extra_context: Default::default(),
178184
cpu_ns: None,
185+
skip_environment_checks: false,
179186
}
180187
}
181188
}
@@ -228,14 +235,6 @@ pub trait TransformConfig: DynClone + NamedComponent + core::fmt::Debug + Send +
228235
/// returned.
229236
async fn build(&self, globals: &TransformContext) -> crate::Result<Transform>;
230237

231-
/// Whether `build` requires environment-dependent setup.
232-
///
233-
/// This is used by `vector validate --no-environment` to skip transforms that intentionally do
234-
/// network or runtime initialization as part of `build`.
235-
fn build_requires_environment(&self) -> bool {
236-
false
237-
}
238-
239238
/// Gets the input configuration for this transform.
240239
fn input(&self) -> Input;
241240

src/topology/builder.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,7 @@ impl<'a> Builder<'a> {
540540
} else {
541541
None
542542
},
543+
skip_environment_checks: false,
543544
};
544545

545546
let node = TransformNode::from_parts(key.clone(), &context, transform, &input_definitions);

src/transforms/aws_ec2_metadata.rs

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -229,12 +229,14 @@ impl TransformConfig for Ec2Metadata {
229229
tags,
230230
);
231231

232-
// If initial metadata is not required, log and proceed. Otherwise return error.
233-
if let Err(error) = client.refresh_metadata().await {
234-
if required {
235-
return Err(error);
236-
} else {
237-
emit!(AwsEc2MetadataRefreshError { error });
232+
if !context.skip_environment_checks {
233+
// If initial metadata is not required, log and proceed. Otherwise return error.
234+
if let Err(error) = client.refresh_metadata().await {
235+
if required {
236+
return Err(error);
237+
} else {
238+
emit!(AwsEc2MetadataRefreshError { error });
239+
}
238240
}
239241
}
240242

@@ -254,13 +256,6 @@ impl TransformConfig for Ec2Metadata {
254256
Ok(Transform::event_task(Ec2MetadataTransform { state }))
255257
}
256258

257-
fn build_requires_environment(&self) -> bool {
258-
// `build` performs network initialization and starts the refresh task, so
259-
// `validate --no-environment` must skip this transform until #25162 is resolved:
260-
// https://github.com/vectordotdev/vector/issues/25162
261-
true
262-
}
263-
264259
fn input(&self) -> Input {
265260
Input::new(DataType::Metric | DataType::Log)
266261
}

src/validate.rs

Lines changed: 43 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use std::{collections::HashMap, fmt, fs::remove_dir_all, path::PathBuf};
55
use clap::Parser;
66
use colored::*;
77
use exitcode::ExitCode;
8+
use vector_vrl_metrics::MetricsStorage;
89

910
use crate::{
1011
config::{self, Config, ConfigDiff, TransformContext, loading::ConfigBuilderLoader},
@@ -119,7 +120,7 @@ pub async fn validate(opts: &Opts, color: bool) -> ExitCode {
119120
};
120121

121122
if opts.no_environment {
122-
validated &= validate_transforms_no_environment(opts, &config, &mut fmt).await;
123+
validated &= validate_transforms_no_environment(&config, &mut fmt).await;
123124
} else if let Some(tmp_directory) = create_tmp_directory(&mut config, &mut fmt) {
124125
validated &= validate_environment(opts, &config, &mut fmt).await;
125126
remove_tmp_directory(tmp_directory);
@@ -181,43 +182,60 @@ pub fn validate_config(opts: &Opts, fmt: &mut Formatter) -> Option<Config> {
181182
Some(config)
182183
}
183184

184-
async fn validate_transforms_no_environment(
185-
opts: &Opts,
186-
config: &Config,
187-
fmt: &mut Formatter,
188-
) -> bool {
185+
async fn validate_transforms_no_environment(config: &Config, fmt: &mut Formatter) -> bool {
186+
let enrichment_tables = vector_lib::enrichment::TableRegistry::default();
187+
enrichment_tables.load_compile_time_table_ids(
188+
config
189+
.enrichment_tables()
190+
.map(|(key, _table)| key.to_string()),
191+
);
192+
193+
let metrics_storage = MetricsStorage::default();
189194
let mut definition_cache = HashMap::new();
190-
let mut warnings = Vec::new();
191195
let mut errors = Vec::new();
192196

193197
for (key, transform) in config.transforms() {
194-
if transform.inner.build_requires_environment() {
195-
warnings.push(format!(
196-
"Transform \"{key}\" skipped build validation because it requires environment-dependent setup."
197-
));
198-
continue;
199-
}
200-
201-
let merged_schema_definition = topology::schema::input_definitions(
198+
let input_definitions = topology::schema::input_definitions(
202199
&transform.inputs,
203200
config,
204-
Default::default(),
201+
enrichment_tables.clone(),
205202
&mut definition_cache,
206203
)
207-
.map(|input_definitions| {
208-
input_definitions
209-
.into_iter()
210-
.map(|(_, definition)| definition)
211-
.reduce(Definition::merge)
212-
.unwrap_or_else(Definition::any)
213-
})
214-
.unwrap_or_else(|_| Definition::any());
204+
.unwrap_or_default();
205+
206+
let merged_schema_definition = input_definitions
207+
.iter()
208+
.map(|(_, definition)| definition.clone())
209+
.reduce(Definition::merge)
210+
.unwrap_or_else(Definition::any);
211+
212+
let schema_definitions = transform
213+
.inner
214+
.outputs(
215+
&TransformContext {
216+
enrichment_tables: enrichment_tables.clone(),
217+
metrics_storage: metrics_storage.clone(),
218+
schema: config.schema,
219+
..Default::default()
220+
},
221+
&input_definitions,
222+
)
223+
.into_iter()
224+
.map(|output| {
225+
let definitions = output.schema_definitions(config.schema.enabled);
226+
(output.port, definitions)
227+
})
228+
.collect();
215229

216230
let context = TransformContext {
217231
key: Some(key.clone()),
218232
globals: config.global.clone(),
233+
enrichment_tables: enrichment_tables.clone(),
234+
metrics_storage: metrics_storage.clone(),
235+
schema_definitions,
219236
merged_schema_definition,
220237
schema: config.schema,
238+
skip_environment_checks: true,
221239
..Default::default()
222240
};
223241

@@ -226,16 +244,9 @@ async fn validate_transforms_no_environment(
226244
}
227245
}
228246

229-
if !warnings.is_empty() {
230-
fmt.title("Transform warnings");
231-
fmt.sub_warning(&warnings);
232-
}
233-
234-
if errors.is_empty() && warnings.is_empty() {
247+
if errors.is_empty() {
235248
fmt.success("Transform configuration");
236249
true
237-
} else if errors.is_empty() {
238-
!opts.deny_warnings
239250
} else {
240251
fmt.title("Transform errors");
241252
fmt.sub_error(errors);

0 commit comments

Comments
 (0)