Skip to content

Commit 00e4665

Browse files
committed
feat(bigquery): add label-based table exclusion
Tables matching any configured exclude label (key-value pair) are skipped during extraction. The check runs after table metadata is fetched since labels are part of TableMetadata. Closes #460
1 parent 86065e4 commit 00e4665

File tree

3 files changed

+70
-0
lines changed

3 files changed

+70
-0
lines changed

plugins/extractors/bigquery/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ source:
1717
- dataset_b
1818
tables:
1919
- dataset_c.table_a
20+
labels:
21+
env: staging
2022
max_page_size: 100
2123
include_column_profile: true
2224
build_view_lineage: true
@@ -53,6 +55,7 @@ source:
5355
| `table_pattern` | `string` | `gofood.fact_` | Regex pattern to whitelist tables to extract | _optional_ |
5456
| `exclude.datasets` | `[]string` | `[dataset_a]` | Dataset IDs to exclude | _optional_ |
5557
| `exclude.tables` | `[]string` | `[dataset_c.table_a]` | Table names in `datasetID.tableID` format to exclude | _optional_ |
58+
| `exclude.labels` | `map[string]string` | `{env: staging}` | Tables with any matching label key-value pair are excluded | _optional_ |
5659
| `max_page_size` | `int` | `100` | Page size hint for BigQuery API list calls | _optional_ |
5760
| `dataset_page_size` | `int` | `10` | Page size for listing datasets. Falls back to `max_page_size` | _optional_ |
5861
| `table_page_size` | `int` | `50` | Page size for listing tables. Falls back to `max_page_size` | _optional_ |

plugins/extractors/bigquery/bigquery.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ type Exclude struct {
6262
Datasets []string `json:"datasets" yaml:"datasets" mapstructure:"datasets"`
6363
// list of tableNames in format - datasetID.tableID
6464
Tables []string `json:"tables" yaml:"tables" mapstructure:"tables"`
65+
// list of label key-value pairs; tables matching any label are excluded
66+
Labels map[string]string `json:"labels" yaml:"labels" mapstructure:"labels"`
6567
}
6668

6769
const (
@@ -83,6 +85,8 @@ exclude:
8385
- dataset_b
8486
tables:
8587
- dataset_c.table_a
88+
labels:
89+
env: staging
8690
max_page_size: 100
8791
include_column_profile: true
8892
build_view_lineage: true
@@ -334,6 +338,14 @@ func (e *Extractor) extractTable(ctx context.Context, ds *bigquery.Dataset, emit
334338
e.logger.Error("failed to fetch table metadata", "err", err, "table", tableFQN)
335339
return nil
336340
}
341+
if IsExcludedByLabels(tmd.Labels, e.config.Exclude.Labels) {
342+
e.excludedTableCtr.Add(ctx, 1, metric.WithAttributes(
343+
attribute.String("bq.project_id", e.config.ProjectID),
344+
attribute.String("bq.dataset_id", ds.DatasetID),
345+
))
346+
e.logger.Debug("excluding table by labels", "dataset_id", ds.DatasetID, "table_id", table.TableID)
347+
return nil
348+
}
337349
record, err := e.buildRecord(ctx, table, tmd)
338350
if err != nil {
339351
e.logger.Error("failed to build record", "err", err, "table", tableFQN)
@@ -799,6 +811,21 @@ func IsExcludedTable(datasetID, tableID string, excludedTables []string) bool {
799811
return false
800812
}
801813

814+
// isExcludedByLabels returns true if the table's labels match any of the
815+
// configured exclude labels. A match means the table has the same key with
816+
// the same value.
817+
func IsExcludedByLabels(tableLabels, excludeLabels map[string]string) bool {
818+
if len(excludeLabels) == 0 {
819+
return false
820+
}
821+
for k, v := range excludeLabels {
822+
if tableLabels[k] == v {
823+
return true
824+
}
825+
}
826+
return false
827+
}
828+
802829
// getMaxPageSize returns max_page_size if configured in recipe, otherwise returns default value
803830
func (e *Extractor) getMaxPageSize() int {
804831
if e.config.MaxPageSize > 0 {
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
package bigquery
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/assert"
7+
)
8+
9+
func TestIsExcludedByLabels(t *testing.T) {
10+
t.Run("should return false when no exclude labels configured", func(t *testing.T) {
11+
tableLabels := map[string]string{"env": "prod"}
12+
assert.False(t, IsExcludedByLabels(tableLabels, nil))
13+
assert.False(t, IsExcludedByLabels(tableLabels, map[string]string{}))
14+
})
15+
16+
t.Run("should return true when table has a matching label", func(t *testing.T) {
17+
tableLabels := map[string]string{"env": "staging", "team": "data"}
18+
excludeLabels := map[string]string{"env": "staging"}
19+
assert.True(t, IsExcludedByLabels(tableLabels, excludeLabels))
20+
})
21+
22+
t.Run("should return false when label key matches but value differs", func(t *testing.T) {
23+
tableLabels := map[string]string{"env": "prod"}
24+
excludeLabels := map[string]string{"env": "staging"}
25+
assert.False(t, IsExcludedByLabels(tableLabels, excludeLabels))
26+
})
27+
28+
t.Run("should return false when table has no labels", func(t *testing.T) {
29+
excludeLabels := map[string]string{"env": "staging"}
30+
assert.False(t, IsExcludedByLabels(nil, excludeLabels))
31+
assert.False(t, IsExcludedByLabels(map[string]string{}, excludeLabels))
32+
})
33+
34+
t.Run("should match any exclude label not all", func(t *testing.T) {
35+
tableLabels := map[string]string{"env": "staging"}
36+
excludeLabels := map[string]string{"env": "staging", "lifecycle": "ephemeral"}
37+
// Matches "env: staging" even though table doesn't have "lifecycle".
38+
assert.True(t, IsExcludedByLabels(tableLabels, excludeLabels))
39+
})
40+
}

0 commit comments

Comments
 (0)