-
Notifications
You must be signed in to change notification settings - Fork 201
Add clampvalues and filldown transforms #692
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
UdayPate
wants to merge
2
commits into
petl-developers:master
Choose a base branch
from
UdayPate:feature/clamp-transform
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,96 @@ | ||
| from __future__ import absolute_import, print_function, division | ||
| import pytest | ||
| import petl as etl | ||
|
|
||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # Basic behaviour | ||
| # --------------------------------------------------------------------------- | ||
|
|
||
| def test_clampvalues_clamps_below_low(): | ||
| """Values below the lower bound should be raised to low.""" | ||
| table = [['id', 'score'], [1, -10], [2, 50]] | ||
| result = list(etl.clampvalues(table, 'score', 0, 100)) | ||
| assert result[1][1] == 0 # -10 clamped to 0 | ||
| assert result[2][1] == 50 # 50 unchanged | ||
|
|
||
|
|
||
| def test_clampvalues_clamps_above_high(): | ||
| """Values above the upper bound should be lowered to high.""" | ||
| table = [['id', 'score'], [1, 150], [2, 50]] | ||
| result = list(etl.clampvalues(table, 'score', 0, 100)) | ||
| assert result[1][1] == 100 # 150 clamped to 100 | ||
| assert result[2][1] == 50 # 50 unchanged | ||
|
|
||
|
|
||
| def test_clampvalues_within_range_unchanged(): | ||
| """Values already within [low, high] must not be modified.""" | ||
| table = [['x'], [0], [50], [100]] | ||
| result = list(etl.clampvalues(table, 'x', 0, 100)) | ||
| assert result[1][0] == 0 | ||
| assert result[2][0] == 50 | ||
| assert result[3][0] == 100 | ||
|
|
||
|
|
||
| def test_clampvalues_none_passthrough(): | ||
| """None values should pass through without error.""" | ||
| table = [['val'], [None], [5]] | ||
| result = list(etl.clampvalues(table, 'val', 0, 10)) | ||
| assert result[1][0] is None | ||
| assert result[2][0] == 5 | ||
|
|
||
|
|
||
| def test_clampvalues_header_preserved(): | ||
| """The header row must be returned unchanged.""" | ||
| table = [['id', 'score'], [1, 50]] | ||
| result = list(etl.clampvalues(table, 'score', 0, 100)) | ||
| assert list(result[0]) == ['id', 'score'] | ||
|
|
||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # Boundary / edge cases | ||
| # --------------------------------------------------------------------------- | ||
|
|
||
| def test_clampvalues_float_values(): | ||
| """Should work correctly with floating-point numbers.""" | ||
| table = [['v'], [-0.5], [0.5], [1.5]] | ||
| result = list(etl.clampvalues(table, 'v', 0.0, 1.0)) | ||
| assert result[1][0] == 0.0 | ||
| assert result[2][0] == 0.5 | ||
| assert result[3][0] == 1.0 | ||
|
|
||
|
|
||
| def test_clampvalues_low_equals_high(): | ||
| """When low == high every value should be clamped to that single value.""" | ||
| table = [['v'], [0], [5], [10]] | ||
| result = list(etl.clampvalues(table, 'v', 5, 5)) | ||
| assert result[1][0] == 5 | ||
| assert result[2][0] == 5 | ||
| assert result[3][0] == 5 | ||
|
|
||
|
|
||
| def test_clampvalues_negative_range(): | ||
| """Should work correctly with a fully negative range.""" | ||
| table = [['v'], [-100], [-50], [-1]] | ||
| result = list(etl.clampvalues(table, 'v', -75, -25)) | ||
| assert result[1][0] == -75 # -100 clamped to -75 | ||
| assert result[2][0] == -50 # -50 unchanged | ||
| assert result[3][0] == -25 # -1 clamped to -25 | ||
|
|
||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # Error handling | ||
| # --------------------------------------------------------------------------- | ||
|
|
||
| def test_clampvalues_invalid_range_raises(): | ||
| """low > high should raise ValueError immediately.""" | ||
| table = [['v'], [1]] | ||
| with pytest.raises(ValueError): | ||
| etl.clampvalues(table, 'v', 10, 0) | ||
|
|
||
|
|
||
| def test_clampvalues_missing_field_raises(): | ||
| """Referencing a field not in the header should raise ValueError.""" | ||
| table = [['v'], [1]] | ||
| with pytest.raises(ValueError): | ||
| list(etl.clampvalues(table, 'nonexistent', 0, 10)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,104 @@ | ||
| from __future__ import absolute_import, print_function, division | ||
| import pytest | ||
| import petl as etl | ||
| from petl.errors import FieldSelectionError | ||
|
|
||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # Basic behaviour | ||
| # --------------------------------------------------------------------------- | ||
|
|
||
| def test_filldown_basic(): | ||
| """None values should be replaced by the last non-None value above.""" | ||
| table = [['site', 'reading'], | ||
| ['A', 1.2], | ||
| [None, 1.5], | ||
| [None, 2.0], | ||
| ['B', 0.9], | ||
| [None, 1.1]] | ||
| result = list(etl.filldown(table, 'site')) | ||
| assert result[1][0] == 'A' | ||
| assert result[2][0] == 'A' # filled | ||
| assert result[3][0] == 'A' # filled | ||
| assert result[4][0] == 'B' | ||
| assert result[5][0] == 'B' # filled | ||
|
|
||
|
|
||
| def test_filldown_header_preserved(): | ||
| """The header row must be returned unchanged.""" | ||
| table = [['a', 'b'], [1, None]] | ||
| result = list(etl.filldown(table, 'b')) | ||
| assert list(result[0]) == ['a', 'b'] | ||
|
|
||
|
|
||
| def test_filldown_no_nones(): | ||
| """A column with no None values should be completely unchanged.""" | ||
| table = [['v'], [1], [2], [3]] | ||
| result = list(etl.filldown(table, 'v')) | ||
| assert result[1][0] == 1 | ||
| assert result[2][0] == 2 | ||
| assert result[3][0] == 3 | ||
|
|
||
|
|
||
| def test_filldown_leading_none_stays_none(): | ||
| """If the very first value is None it should remain None.""" | ||
| table = [['v'], [None], [None], [5]] | ||
| result = list(etl.filldown(table, 'v')) | ||
| assert result[1][0] is None | ||
| assert result[2][0] is None | ||
| assert result[3][0] == 5 | ||
|
|
||
|
|
||
| def test_filldown_non_target_columns_unchanged(): | ||
| """Columns not listed in *fields should not be modified.""" | ||
| table = [['a', 'b'], [1, 'x'], [None, None]] | ||
| result = list(etl.filldown(table, 'a')) | ||
| assert result[2][1] is None # 'b' column untouched | ||
|
|
||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # Multiple fields | ||
| # --------------------------------------------------------------------------- | ||
|
|
||
| def test_filldown_multiple_fields(): | ||
| """Multiple fields can be filled in a single call.""" | ||
| table = [['a', 'b'], | ||
| [1, 'x'], | ||
| [None, None], | ||
| [2, None]] | ||
| result = list(etl.filldown(table, 'a', 'b')) | ||
| assert result[2][0] == 1 # a filled | ||
| assert result[2][1] == 'x' # b filled | ||
| assert result[3][0] == 2 # a new value | ||
| assert result[3][1] == 'x' # b still filled | ||
|
|
||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # Edge cases | ||
| # --------------------------------------------------------------------------- | ||
|
|
||
| def test_filldown_all_none(): | ||
| """A column that is entirely None should remain entirely None.""" | ||
| table = [['v'], [None], [None], [None]] | ||
| result = list(etl.filldown(table, 'v')) | ||
| assert result[1][0] is None | ||
| assert result[2][0] is None | ||
| assert result[3][0] is None | ||
|
|
||
|
|
||
| def test_filldown_single_row(): | ||
| """A table with only one data row should work without error.""" | ||
| table = [['v'], [42]] | ||
| result = list(etl.filldown(table, 'v')) | ||
| assert result[1][0] == 42 | ||
|
|
||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # Error handling | ||
| # --------------------------------------------------------------------------- | ||
|
|
||
| def test_filldown_missing_field_raises(): | ||
| """Referencing a field not in the header should raise ValueError.""" | ||
| table = [['v'], [1]] | ||
| with pytest.raises(FieldSelectionError): | ||
| list(etl.filldown(table, 'nonexistent')) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,77 @@ | ||
| from __future__ import absolute_import, print_function, division | ||
| from petl.util.base import Table | ||
|
|
||
|
|
||
| def clampvalues(table, field, low, high): | ||
| """Clamp numeric values in a field so they never fall below `low` or | ||
| exceed `high`. Values already within the range are left unchanged. | ||
| Non-numeric values (e.g. ``None``) are passed through as-is. | ||
|
|
||
| For example:: | ||
|
|
||
| >>> import petl as etl | ||
| >>> table1 = [['id', 'score'], | ||
| ... [1, -5], | ||
| ... [2, 42], | ||
| ... [3, 105], | ||
| ... [4, None]] | ||
| >>> table2 = etl.clampvalues(table1, 'score', 0, 100) | ||
| >>> table2 | ||
| +----+-------+ | ||
| | id | score | | ||
| +====+=======+ | ||
| | 1 | 0 | | ||
| +----+-------+ | ||
| | 2 | 42 | | ||
| +----+-------+ | ||
| | 3 | 100 | | ||
| +----+-------+ | ||
| | 4 | None | | ||
| +----+-------+ | ||
|
|
||
| The `low` and `high` parameters are both inclusive bounds. Raises | ||
| ``ValueError`` if ``low > high``. | ||
|
|
||
| .. versionadded:: 1.8.0 | ||
|
|
||
| """ | ||
| return ClampValuesView(table, field, low, high) | ||
|
|
||
|
|
||
| class ClampValuesView(Table): | ||
| """View returned by :func:`clampvalues`.""" | ||
|
|
||
| def __init__(self, table, field, low, high): | ||
| if low > high: | ||
| raise ValueError( | ||
| 'low (%r) must not be greater than high (%r)' % (low, high) | ||
| ) | ||
| self.table = table | ||
| self.field = field | ||
| self.low = low | ||
| self.high = high | ||
|
|
||
| def __iter__(self): | ||
| it = iter(self.table) | ||
| header = next(it) | ||
| yield header # pass header row through unchanged | ||
|
|
||
| # find the index of the target field | ||
| try: | ||
| idx = list(header).index(self.field) | ||
| except ValueError: | ||
| raise ValueError('field %r not found in header %r' % (self.field, header)) | ||
|
UdayPate marked this conversation as resolved.
|
||
|
|
||
| low, high = self.low, self.high | ||
| for row in it: | ||
| row = list(row) | ||
| val = row[idx] | ||
| if val is not None: | ||
| try: | ||
| if val < low: | ||
| row[idx] = low | ||
| elif val > high: | ||
| row[idx] = high | ||
| except TypeError: | ||
| pass # non-comparable type: leave value unchanged | ||
| yield tuple(row) | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,80 @@ | ||
| from __future__ import absolute_import, print_function, division | ||
| from petl.util.base import Table | ||
| from petl.errors import FieldSelectionError | ||
|
|
||
|
|
||
| def filldown(table, *fields): | ||
| """Propagate the last observed non-``None`` value downward within one or | ||
| more fields. Rows where the field already has a value are left unchanged. | ||
| If a field's very first data value is ``None`` it stays ``None`` until a | ||
| non-``None`` value is encountered. | ||
|
|
||
| For example:: | ||
|
|
||
| >>> import petl as etl | ||
| >>> table1 = [['site', 'reading'], | ||
| ... ['A', 1.2], | ||
| ... [None, 1.5], | ||
| ... [None, 2.0], | ||
| ... ['B', 0.9], | ||
| ... [None, 1.1]] | ||
| >>> table2 = etl.filldown(table1, 'site') | ||
| >>> table2 | ||
| +------+---------+ | ||
| | site | reading | | ||
| +======+=========+ | ||
| | 'A' | 1.2 | | ||
| +------+---------+ | ||
| | 'A' | 1.5 | | ||
| +------+---------+ | ||
| | 'A' | 2.0 | | ||
| +------+---------+ | ||
| | 'B' | 0.9 | | ||
| +------+---------+ | ||
| | 'B' | 1.1 | | ||
| +------+---------+ | ||
|
|
||
| Multiple fields can be filled in a single call:: | ||
|
|
||
| >>> table3 = [['a', 'b'], | ||
| ... [1, 'x'], | ||
| ... [None, None], | ||
| ... [2, None]] | ||
| >>> etl.filldown(table3, 'a', 'b') | ||
|
|
||
| .. versionadded:: 1.8.0 | ||
|
|
||
| """ | ||
| return FillDownView(table, fields) | ||
|
|
||
|
|
||
| class FillDownView(Table): | ||
| """View returned by :func:`filldown`.""" | ||
|
|
||
| def __init__(self, table, fields): | ||
| self.table = table | ||
| self.fields = fields | ||
|
|
||
| def __iter__(self): | ||
| it = iter(self.table) | ||
| header = next(it) | ||
| yield header | ||
|
|
||
| # resolve the index of each requested field | ||
| header_list = list(header) | ||
| try: | ||
| indices = [header_list.index(f) for f in self.fields] | ||
| except ValueError as e: | ||
| raise FieldSelectionError(str(e)) | ||
|
|
||
| # last seen non-None value for each tracked field | ||
| last_seen = {idx: None for idx in indices} | ||
|
|
||
| for row in it: | ||
| row = list(row) | ||
| for idx in indices: | ||
| if row[idx] is None: | ||
| row[idx] = last_seen[idx] # fill from above | ||
| else: | ||
| last_seen[idx] = row[idx] # update running value | ||
| yield tuple(row) |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
1. Filldown export shadowed
🐞 Bug≡ CorrectnessAgent Prompt
ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools