From 684eb68580b5cbc18754fa36c4fe7005f0121fac Mon Sep 17 00:00:00 2001 From: Uday Patel Date: Tue, 21 Apr 2026 18:28:49 -0700 Subject: [PATCH 1/2] Add clampvalues transform to cap numeric field values within a range - Implements ClampValuesView following petl's Table subclass pattern - Handles None values, floats, negative ranges, and boundary equality - Raises ValueError for invalid ranges (low > high) and missing fields - 10 unit tests covering normal cases and edge cases --- petl/test/transform/test_clamp.py | 96 +++++++++++++++++++++++++++++++ petl/transform/__init__.py | 2 + petl/transform/clamp.py | 77 +++++++++++++++++++++++++ 3 files changed, 175 insertions(+) create mode 100644 petl/test/transform/test_clamp.py create mode 100644 petl/transform/clamp.py diff --git a/petl/test/transform/test_clamp.py b/petl/test/transform/test_clamp.py new file mode 100644 index 00000000..3d14b866 --- /dev/null +++ b/petl/test/transform/test_clamp.py @@ -0,0 +1,96 @@ +from __future__ import absolute_import, print_function, division +import pytest +import petl as etl + + +# --------------------------------------------------------------------------- +# Basic behaviour +# --------------------------------------------------------------------------- + +def test_clampvalues_clamps_below_low(): + """Values below the lower bound should be raised to low.""" + table = [['id', 'score'], [1, -10], [2, 50]] + result = list(etl.clampvalues(table, 'score', 0, 100)) + assert result[1][1] == 0 # -10 clamped to 0 + assert result[2][1] == 50 # 50 unchanged + + +def test_clampvalues_clamps_above_high(): + """Values above the upper bound should be lowered to high.""" + table = [['id', 'score'], [1, 150], [2, 50]] + result = list(etl.clampvalues(table, 'score', 0, 100)) + assert result[1][1] == 100 # 150 clamped to 100 + assert result[2][1] == 50 # 50 unchanged + + +def test_clampvalues_within_range_unchanged(): + """Values already within [low, high] must not be modified.""" + table = [['x'], [0], [50], [100]] + result = list(etl.clampvalues(table, 'x', 0, 100)) + assert result[1][0] == 0 + assert result[2][0] == 50 + assert result[3][0] == 100 + + +def test_clampvalues_none_passthrough(): + """None values should pass through without error.""" + table = [['val'], [None], [5]] + result = list(etl.clampvalues(table, 'val', 0, 10)) + assert result[1][0] is None + assert result[2][0] == 5 + + +def test_clampvalues_header_preserved(): + """The header row must be returned unchanged.""" + table = [['id', 'score'], [1, 50]] + result = list(etl.clampvalues(table, 'score', 0, 100)) + assert list(result[0]) == ['id', 'score'] + + +# --------------------------------------------------------------------------- +# Boundary / edge cases +# --------------------------------------------------------------------------- + +def test_clampvalues_float_values(): + """Should work correctly with floating-point numbers.""" + table = [['v'], [-0.5], [0.5], [1.5]] + result = list(etl.clampvalues(table, 'v', 0.0, 1.0)) + assert result[1][0] == 0.0 + assert result[2][0] == 0.5 + assert result[3][0] == 1.0 + + +def test_clampvalues_low_equals_high(): + """When low == high every value should be clamped to that single value.""" + table = [['v'], [0], [5], [10]] + result = list(etl.clampvalues(table, 'v', 5, 5)) + assert result[1][0] == 5 + assert result[2][0] == 5 + assert result[3][0] == 5 + + +def test_clampvalues_negative_range(): + """Should work correctly with a fully negative range.""" + table = [['v'], [-100], [-50], [-1]] + result = list(etl.clampvalues(table, 'v', -75, -25)) + assert result[1][0] == -75 # -100 clamped to -75 + assert result[2][0] == -50 # -50 unchanged + assert result[3][0] == -25 # -1 clamped to -25 + + +# --------------------------------------------------------------------------- +# Error handling +# --------------------------------------------------------------------------- + +def test_clampvalues_invalid_range_raises(): + """low > high should raise ValueError immediately.""" + table = [['v'], [1]] + with pytest.raises(ValueError): + etl.clampvalues(table, 'v', 10, 0) + + +def test_clampvalues_missing_field_raises(): + """Referencing a field not in the header should raise ValueError.""" + table = [['v'], [1]] + with pytest.raises(ValueError): + list(etl.clampvalues(table, 'nonexistent', 0, 10)) diff --git a/petl/transform/__init__.py b/petl/transform/__init__.py index 7c43275d..6a07b4e0 100644 --- a/petl/transform/__init__.py +++ b/petl/transform/__init__.py @@ -11,6 +11,8 @@ replaceall, update, convertnumbers, format, formatall, interpolate, \ interpolateall +from petl.transform.clamp import clampvalues + from petl.transform.sorts import sort, mergesort, issorted from petl.transform.selects import select, selectop, selectcontains, \ diff --git a/petl/transform/clamp.py b/petl/transform/clamp.py new file mode 100644 index 00000000..d1c9da3c --- /dev/null +++ b/petl/transform/clamp.py @@ -0,0 +1,77 @@ +from __future__ import absolute_import, print_function, division +from petl.util.base import Table + + +def clampvalues(table, field, low, high): + """Clamp numeric values in a field so they never fall below `low` or + exceed `high`. Values already within the range are left unchanged. + Non-numeric values (e.g. ``None``) are passed through as-is. + + For example:: + + >>> import petl as etl + >>> table1 = [['id', 'score'], + ... [1, -5], + ... [2, 42], + ... [3, 105], + ... [4, None]] + >>> table2 = etl.clampvalues(table1, 'score', 0, 100) + >>> table2 + +----+-------+ + | id | score | + +====+=======+ + | 1 | 0 | + +----+-------+ + | 2 | 42 | + +----+-------+ + | 3 | 100 | + +----+-------+ + | 4 | None | + +----+-------+ + + The `low` and `high` parameters are both inclusive bounds. Raises + ``ValueError`` if ``low > high``. + + .. versionadded:: 1.8.0 + + """ + return ClampValuesView(table, field, low, high) + + +class ClampValuesView(Table): + """View returned by :func:`clampvalues`.""" + + def __init__(self, table, field, low, high): + if low > high: + raise ValueError( + 'low (%r) must not be greater than high (%r)' % (low, high) + ) + self.table = table + self.field = field + self.low = low + self.high = high + + def __iter__(self): + it = iter(self.table) + header = next(it) + yield header # pass header row through unchanged + + # find the index of the target field + try: + idx = list(header).index(self.field) + except ValueError: + raise ValueError('field %r not found in header %r' % (self.field, header)) + + low, high = self.low, self.high + for row in it: + row = list(row) + val = row[idx] + if val is not None: + try: + if val < low: + row[idx] = low + elif val > high: + row[idx] = high + except TypeError: + pass # non-comparable type: leave value unchanged + yield tuple(row) From 0994508979ed6a536bc04765ef7557b5b734c27d Mon Sep 17 00:00:00 2001 From: Uday Patel Date: Tue, 21 Apr 2026 18:37:34 -0700 Subject: [PATCH 2/2] Add filldown transform to propagate last non-None value downward - Implements FillDownView following petl's Table subclass pattern - Supports filling multiple fields in a single call - Leading None values remain None until a non-None value is seen - Raises FieldSelectionError for missing fields - 9 unit tests covering normal cases and edge cases --- petl/test/transform/test_filldown.py | 104 +++++++++++++++++++++++++++ petl/transform/__init__.py | 2 + petl/transform/filldown.py | 80 +++++++++++++++++++++ 3 files changed, 186 insertions(+) create mode 100644 petl/test/transform/test_filldown.py create mode 100644 petl/transform/filldown.py diff --git a/petl/test/transform/test_filldown.py b/petl/test/transform/test_filldown.py new file mode 100644 index 00000000..38650fc1 --- /dev/null +++ b/petl/test/transform/test_filldown.py @@ -0,0 +1,104 @@ +from __future__ import absolute_import, print_function, division +import pytest +import petl as etl +from petl.errors import FieldSelectionError + + +# --------------------------------------------------------------------------- +# Basic behaviour +# --------------------------------------------------------------------------- + +def test_filldown_basic(): + """None values should be replaced by the last non-None value above.""" + table = [['site', 'reading'], + ['A', 1.2], + [None, 1.5], + [None, 2.0], + ['B', 0.9], + [None, 1.1]] + result = list(etl.filldown(table, 'site')) + assert result[1][0] == 'A' + assert result[2][0] == 'A' # filled + assert result[3][0] == 'A' # filled + assert result[4][0] == 'B' + assert result[5][0] == 'B' # filled + + +def test_filldown_header_preserved(): + """The header row must be returned unchanged.""" + table = [['a', 'b'], [1, None]] + result = list(etl.filldown(table, 'b')) + assert list(result[0]) == ['a', 'b'] + + +def test_filldown_no_nones(): + """A column with no None values should be completely unchanged.""" + table = [['v'], [1], [2], [3]] + result = list(etl.filldown(table, 'v')) + assert result[1][0] == 1 + assert result[2][0] == 2 + assert result[3][0] == 3 + + +def test_filldown_leading_none_stays_none(): + """If the very first value is None it should remain None.""" + table = [['v'], [None], [None], [5]] + result = list(etl.filldown(table, 'v')) + assert result[1][0] is None + assert result[2][0] is None + assert result[3][0] == 5 + + +def test_filldown_non_target_columns_unchanged(): + """Columns not listed in *fields should not be modified.""" + table = [['a', 'b'], [1, 'x'], [None, None]] + result = list(etl.filldown(table, 'a')) + assert result[2][1] is None # 'b' column untouched + + +# --------------------------------------------------------------------------- +# Multiple fields +# --------------------------------------------------------------------------- + +def test_filldown_multiple_fields(): + """Multiple fields can be filled in a single call.""" + table = [['a', 'b'], + [1, 'x'], + [None, None], + [2, None]] + result = list(etl.filldown(table, 'a', 'b')) + assert result[2][0] == 1 # a filled + assert result[2][1] == 'x' # b filled + assert result[3][0] == 2 # a new value + assert result[3][1] == 'x' # b still filled + + +# --------------------------------------------------------------------------- +# Edge cases +# --------------------------------------------------------------------------- + +def test_filldown_all_none(): + """A column that is entirely None should remain entirely None.""" + table = [['v'], [None], [None], [None]] + result = list(etl.filldown(table, 'v')) + assert result[1][0] is None + assert result[2][0] is None + assert result[3][0] is None + + +def test_filldown_single_row(): + """A table with only one data row should work without error.""" + table = [['v'], [42]] + result = list(etl.filldown(table, 'v')) + assert result[1][0] == 42 + + +# --------------------------------------------------------------------------- +# Error handling +# --------------------------------------------------------------------------- + +def test_filldown_missing_field_raises(): + """Referencing a field not in the header should raise ValueError.""" + table = [['v'], [1]] + with pytest.raises(FieldSelectionError): + list(etl.filldown(table, 'nonexistent')) \ No newline at end of file diff --git a/petl/transform/__init__.py b/petl/transform/__init__.py index 6a07b4e0..e2aea63c 100644 --- a/petl/transform/__init__.py +++ b/petl/transform/__init__.py @@ -12,6 +12,8 @@ interpolateall from petl.transform.clamp import clampvalues + +from petl.transform.filldown import filldown from petl.transform.sorts import sort, mergesort, issorted diff --git a/petl/transform/filldown.py b/petl/transform/filldown.py new file mode 100644 index 00000000..7ddbf497 --- /dev/null +++ b/petl/transform/filldown.py @@ -0,0 +1,80 @@ +from __future__ import absolute_import, print_function, division +from petl.util.base import Table +from petl.errors import FieldSelectionError + + +def filldown(table, *fields): + """Propagate the last observed non-``None`` value downward within one or + more fields. Rows where the field already has a value are left unchanged. + If a field's very first data value is ``None`` it stays ``None`` until a + non-``None`` value is encountered. + + For example:: + + >>> import petl as etl + >>> table1 = [['site', 'reading'], + ... ['A', 1.2], + ... [None, 1.5], + ... [None, 2.0], + ... ['B', 0.9], + ... [None, 1.1]] + >>> table2 = etl.filldown(table1, 'site') + >>> table2 + +------+---------+ + | site | reading | + +======+=========+ + | 'A' | 1.2 | + +------+---------+ + | 'A' | 1.5 | + +------+---------+ + | 'A' | 2.0 | + +------+---------+ + | 'B' | 0.9 | + +------+---------+ + | 'B' | 1.1 | + +------+---------+ + + Multiple fields can be filled in a single call:: + + >>> table3 = [['a', 'b'], + ... [1, 'x'], + ... [None, None], + ... [2, None]] + >>> etl.filldown(table3, 'a', 'b') + + .. versionadded:: 1.8.0 + + """ + return FillDownView(table, fields) + + +class FillDownView(Table): + """View returned by :func:`filldown`.""" + + def __init__(self, table, fields): + self.table = table + self.fields = fields + + def __iter__(self): + it = iter(self.table) + header = next(it) + yield header + + # resolve the index of each requested field + header_list = list(header) + try: + indices = [header_list.index(f) for f in self.fields] + except ValueError as e: + raise FieldSelectionError(str(e)) + + # last seen non-None value for each tracked field + last_seen = {idx: None for idx in indices} + + for row in it: + row = list(row) + for idx in indices: + if row[idx] is None: + row[idx] = last_seen[idx] # fill from above + else: + last_seen[idx] = row[idx] # update running value + yield tuple(row) \ No newline at end of file