Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions petl/test/transform/test_clamp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
from __future__ import absolute_import, print_function, division
import pytest
import petl as etl


# ---------------------------------------------------------------------------
# Basic behaviour
# ---------------------------------------------------------------------------

def test_clampvalues_clamps_below_low():
"""Values below the lower bound should be raised to low."""
table = [['id', 'score'], [1, -10], [2, 50]]
result = list(etl.clampvalues(table, 'score', 0, 100))
assert result[1][1] == 0 # -10 clamped to 0
assert result[2][1] == 50 # 50 unchanged


def test_clampvalues_clamps_above_high():
"""Values above the upper bound should be lowered to high."""
table = [['id', 'score'], [1, 150], [2, 50]]
result = list(etl.clampvalues(table, 'score', 0, 100))
assert result[1][1] == 100 # 150 clamped to 100
assert result[2][1] == 50 # 50 unchanged


def test_clampvalues_within_range_unchanged():
"""Values already within [low, high] must not be modified."""
table = [['x'], [0], [50], [100]]
result = list(etl.clampvalues(table, 'x', 0, 100))
assert result[1][0] == 0
assert result[2][0] == 50
assert result[3][0] == 100


def test_clampvalues_none_passthrough():
"""None values should pass through without error."""
table = [['val'], [None], [5]]
result = list(etl.clampvalues(table, 'val', 0, 10))
assert result[1][0] is None
assert result[2][0] == 5


def test_clampvalues_header_preserved():
"""The header row must be returned unchanged."""
table = [['id', 'score'], [1, 50]]
result = list(etl.clampvalues(table, 'score', 0, 100))
assert list(result[0]) == ['id', 'score']


# ---------------------------------------------------------------------------
# Boundary / edge cases
# ---------------------------------------------------------------------------

def test_clampvalues_float_values():
"""Should work correctly with floating-point numbers."""
table = [['v'], [-0.5], [0.5], [1.5]]
result = list(etl.clampvalues(table, 'v', 0.0, 1.0))
assert result[1][0] == 0.0
assert result[2][0] == 0.5
assert result[3][0] == 1.0


def test_clampvalues_low_equals_high():
"""When low == high every value should be clamped to that single value."""
table = [['v'], [0], [5], [10]]
result = list(etl.clampvalues(table, 'v', 5, 5))
assert result[1][0] == 5
assert result[2][0] == 5
assert result[3][0] == 5


def test_clampvalues_negative_range():
"""Should work correctly with a fully negative range."""
table = [['v'], [-100], [-50], [-1]]
result = list(etl.clampvalues(table, 'v', -75, -25))
assert result[1][0] == -75 # -100 clamped to -75
assert result[2][0] == -50 # -50 unchanged
assert result[3][0] == -25 # -1 clamped to -25


# ---------------------------------------------------------------------------
# Error handling
# ---------------------------------------------------------------------------

def test_clampvalues_invalid_range_raises():
"""low > high should raise ValueError immediately."""
table = [['v'], [1]]
with pytest.raises(ValueError):
etl.clampvalues(table, 'v', 10, 0)


def test_clampvalues_missing_field_raises():
"""Referencing a field not in the header should raise ValueError."""
table = [['v'], [1]]
with pytest.raises(ValueError):
list(etl.clampvalues(table, 'nonexistent', 0, 10))
104 changes: 104 additions & 0 deletions petl/test/transform/test_filldown.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
from __future__ import absolute_import, print_function, division
import pytest
import petl as etl
from petl.errors import FieldSelectionError


# ---------------------------------------------------------------------------
# Basic behaviour
# ---------------------------------------------------------------------------

def test_filldown_basic():
"""None values should be replaced by the last non-None value above."""
table = [['site', 'reading'],
['A', 1.2],
[None, 1.5],
[None, 2.0],
['B', 0.9],
[None, 1.1]]
result = list(etl.filldown(table, 'site'))
assert result[1][0] == 'A'
assert result[2][0] == 'A' # filled
assert result[3][0] == 'A' # filled
assert result[4][0] == 'B'
assert result[5][0] == 'B' # filled


def test_filldown_header_preserved():
"""The header row must be returned unchanged."""
table = [['a', 'b'], [1, None]]
result = list(etl.filldown(table, 'b'))
assert list(result[0]) == ['a', 'b']


def test_filldown_no_nones():
"""A column with no None values should be completely unchanged."""
table = [['v'], [1], [2], [3]]
result = list(etl.filldown(table, 'v'))
assert result[1][0] == 1
assert result[2][0] == 2
assert result[3][0] == 3


def test_filldown_leading_none_stays_none():
"""If the very first value is None it should remain None."""
table = [['v'], [None], [None], [5]]
result = list(etl.filldown(table, 'v'))
assert result[1][0] is None
assert result[2][0] is None
assert result[3][0] == 5


def test_filldown_non_target_columns_unchanged():
"""Columns not listed in *fields should not be modified."""
table = [['a', 'b'], [1, 'x'], [None, None]]
result = list(etl.filldown(table, 'a'))
assert result[2][1] is None # 'b' column untouched


# ---------------------------------------------------------------------------
# Multiple fields
# ---------------------------------------------------------------------------

def test_filldown_multiple_fields():
"""Multiple fields can be filled in a single call."""
table = [['a', 'b'],
[1, 'x'],
[None, None],
[2, None]]
result = list(etl.filldown(table, 'a', 'b'))
assert result[2][0] == 1 # a filled
assert result[2][1] == 'x' # b filled
assert result[3][0] == 2 # a new value
assert result[3][1] == 'x' # b still filled


# ---------------------------------------------------------------------------
# Edge cases
# ---------------------------------------------------------------------------

def test_filldown_all_none():
"""A column that is entirely None should remain entirely None."""
table = [['v'], [None], [None], [None]]
result = list(etl.filldown(table, 'v'))
assert result[1][0] is None
assert result[2][0] is None
assert result[3][0] is None


def test_filldown_single_row():
"""A table with only one data row should work without error."""
table = [['v'], [42]]
result = list(etl.filldown(table, 'v'))
assert result[1][0] == 42


# ---------------------------------------------------------------------------
# Error handling
# ---------------------------------------------------------------------------

def test_filldown_missing_field_raises():
"""Referencing a field not in the header should raise ValueError."""
table = [['v'], [1]]
with pytest.raises(FieldSelectionError):
list(etl.filldown(table, 'nonexistent'))
4 changes: 4 additions & 0 deletions petl/transform/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
replaceall, update, convertnumbers, format, formatall, interpolate, \
interpolateall

from petl.transform.clamp import clampvalues

from petl.transform.filldown import filldown

Comment on lines +14 to +17
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

1. Filldown export shadowed 🐞 Bug ≡ Correctness

petl.transform.__init__ imports filldown from the new petl/transform/filldown.py, but later
re-imports filldown from petl/transform/fills.py, overwriting the new symbol. As a result, the newly
added filldown implementation is effectively unused and untested via the public API.
Agent Prompt
### Issue description
`petl/transform/__init__.py` imports `filldown` twice, and the later import from `petl.transform.fills` overwrites the new `petl.transform.filldown.filldown`. This makes the newly added module effectively unreachable from `import petl as etl`.

### Issue Context
There is already an existing `filldown` implementation in `petl/transform/fills.py` (including `Table.filldown = filldown`). The PR adds a second implementation under the same name.

### Fix Focus Areas
- petl/transform/__init__.py[14-37]
- petl/transform/fills.py[10-103]
- petl/transform/filldown.py[1-80]

### Suggested resolution paths
- **Preferred:** Delete `petl/transform/filldown.py` and remove `from petl.transform.filldown import filldown` from `petl/transform/__init__.py` (keep tests if they are intended to validate the existing `fills.filldown`).
- **Alternative (if you truly want a new implementation):** Stop importing `filldown` from `petl.transform.fills` (or rename one of the functions), then ensure the exported one matches petl conventions (supports `missing=`, supports filling all fields when none specified, uses `petl.compat.next`, and registers `Table.filldown`).

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools

from petl.transform.sorts import sort, mergesort, issorted

from petl.transform.selects import select, selectop, selectcontains, \
Expand Down
77 changes: 77 additions & 0 deletions petl/transform/clamp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from __future__ import absolute_import, print_function, division
from petl.util.base import Table


def clampvalues(table, field, low, high):
"""Clamp numeric values in a field so they never fall below `low` or
exceed `high`. Values already within the range are left unchanged.
Non-numeric values (e.g. ``None``) are passed through as-is.

For example::

>>> import petl as etl
>>> table1 = [['id', 'score'],
... [1, -5],
... [2, 42],
... [3, 105],
... [4, None]]
>>> table2 = etl.clampvalues(table1, 'score', 0, 100)
>>> table2
+----+-------+
| id | score |
+====+=======+
| 1 | 0 |
+----+-------+
| 2 | 42 |
+----+-------+
| 3 | 100 |
+----+-------+
| 4 | None |
+----+-------+

The `low` and `high` parameters are both inclusive bounds. Raises
``ValueError`` if ``low > high``.

.. versionadded:: 1.8.0

"""
return ClampValuesView(table, field, low, high)


class ClampValuesView(Table):
"""View returned by :func:`clampvalues`."""

def __init__(self, table, field, low, high):
if low > high:
raise ValueError(
'low (%r) must not be greater than high (%r)' % (low, high)
)
self.table = table
self.field = field
self.low = low
self.high = high

def __iter__(self):
it = iter(self.table)
header = next(it)
yield header # pass header row through unchanged

# find the index of the target field
try:
idx = list(header).index(self.field)
except ValueError:
raise ValueError('field %r not found in header %r' % (self.field, header))
Comment thread
UdayPate marked this conversation as resolved.

low, high = self.low, self.high
for row in it:
row = list(row)
val = row[idx]
if val is not None:
try:
if val < low:
row[idx] = low
elif val > high:
row[idx] = high
except TypeError:
pass # non-comparable type: leave value unchanged
yield tuple(row)
80 changes: 80 additions & 0 deletions petl/transform/filldown.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from __future__ import absolute_import, print_function, division
from petl.util.base import Table
from petl.errors import FieldSelectionError


def filldown(table, *fields):
"""Propagate the last observed non-``None`` value downward within one or
more fields. Rows where the field already has a value are left unchanged.
If a field's very first data value is ``None`` it stays ``None`` until a
non-``None`` value is encountered.

For example::

>>> import petl as etl
>>> table1 = [['site', 'reading'],
... ['A', 1.2],
... [None, 1.5],
... [None, 2.0],
... ['B', 0.9],
... [None, 1.1]]
>>> table2 = etl.filldown(table1, 'site')
>>> table2
+------+---------+
| site | reading |
+======+=========+
| 'A' | 1.2 |
+------+---------+
| 'A' | 1.5 |
+------+---------+
| 'A' | 2.0 |
+------+---------+
| 'B' | 0.9 |
+------+---------+
| 'B' | 1.1 |
+------+---------+

Multiple fields can be filled in a single call::

>>> table3 = [['a', 'b'],
... [1, 'x'],
... [None, None],
... [2, None]]
>>> etl.filldown(table3, 'a', 'b')

.. versionadded:: 1.8.0

"""
return FillDownView(table, fields)


class FillDownView(Table):
"""View returned by :func:`filldown`."""

def __init__(self, table, fields):
self.table = table
self.fields = fields

def __iter__(self):
it = iter(self.table)
header = next(it)
yield header

# resolve the index of each requested field
header_list = list(header)
try:
indices = [header_list.index(f) for f in self.fields]
except ValueError as e:
raise FieldSelectionError(str(e))

# last seen non-None value for each tracked field
last_seen = {idx: None for idx in indices}

for row in it:
row = list(row)
for idx in indices:
if row[idx] is None:
row[idx] = last_seen[idx] # fill from above
else:
last_seen[idx] = row[idx] # update running value
yield tuple(row)
Loading