Skip to content

Commit 79a0557

Browse files
authored
Remove E501 from ruff ignore list (#2949)
<!-- Thanks for opening a pull request! --> <!-- In the case this PR will resolve an issue, please replace ${GITHUB_ISSUE_ID} below with the actual Github issue id. --> <!-- Closes #${GITHUB_ISSUE_ID} --> # Rationale for this change Closes #2700 (last one!) This removes the exception for E501 from Ruff. https://docs.astral.sh/ruff/rules/line-too-long/ Refactor only ## Are these changes tested? ## Are there any user-facing changes? <!-- In the case of user-facing changes, please add the changelog label. -->
1 parent af17de8 commit 79a0557

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+454
-205
lines changed

pyiceberg/catalog/glue.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,8 @@ def _update_glue_table(self, database_name: str, table_name: str, table_input: "
406406
raise NoSuchTableError(f"Table does not exist: {database_name}.{table_name} (Glue table version {version_id})") from e
407407
except self.glue.exceptions.ConcurrentModificationException as e:
408408
raise CommitFailedException(
409-
f"Cannot commit {database_name}.{table_name} because Glue detected concurrent update to table version {version_id}"
409+
f"Cannot commit {database_name}.{table_name} because Glue detected concurrent update "
410+
f"to table version {version_id}"
410411
) from e
411412

412413
def _get_glue_table(self, database_name: str, table_name: str) -> "TableTypeDef":

pyiceberg/catalog/rest/auth.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,8 @@ def _refresh_token(self) -> None:
181181
expires_in = result.get("expires_in", self.expires_in)
182182
if expires_in is None:
183183
raise ValueError(
184-
"The expiration time of the Token must be provided by the Server in the Access Token Response in `expires_in` field, or by the PyIceberg Client."
184+
"The expiration time of the Token must be provided by the Server in the Access Token Response "
185+
"in `expires_in` field, or by the PyIceberg Client."
185186
)
186187
self._expires_at = time.monotonic() + expires_in - self.refresh_margin
187188

@@ -249,8 +250,9 @@ def auth_header(self) -> str:
249250

250251

251252
class AuthManagerAdapter(AuthBase):
252-
"""A `requests.auth.AuthBase` adapter that integrates an `AuthManager` into a `requests.Session` to automatically attach the appropriate Authorization header to every request.
253+
"""A `requests.auth.AuthBase` adapter for integrating an `AuthManager` into a `requests.Session`.
253254
255+
This adapter automatically attaches the appropriate Authorization header to every request.
254256
This adapter is useful when working with `requests.Session.auth`
255257
and allows reuse of authentication strategies defined by `AuthManager`.
256258
This AuthManagerAdapter is only intended to be used against the REST Catalog

pyiceberg/catalog/sql.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,8 @@ class SqlCatalog(MetastoreCatalog):
109109
And you can have as many levels as you want, but you need at least one. The `SqlCatalog` honors the same convention.
110110
111111
In the `JDBCCatalog` implementation, a `TableIdentifier` is composed of an optional `Namespace` and a table name.
112-
When a `Namespace` is present, the full name will be `'ns1.ns2.ns3.table'`. A valid `TableIdentifier` could be `'name'` (no namespace).
112+
When a `Namespace` is present, the full name will be `'ns1.ns2.ns3.table'`.
113+
A valid `TableIdentifier` could be `'name'` (no namespace).
113114
The `SqlCatalog` has a different convention where a `TableIdentifier` requires a `Namespace`.
114115
"""
115116

pyiceberg/conversions.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,8 +188,9 @@ def to_bytes(
188188
) -> bytes:
189189
"""Convert a built-in python value to bytes.
190190
191-
This conversion follows the serialization scheme for storing single values as individual binary values defined in the Iceberg specification that
192-
can be found at https://iceberg.apache.org/spec/#appendix-d-single-value-serialization
191+
This conversion follows the serialization scheme for storing single values as individual binary values
192+
defined in the Iceberg specification that can be found at
193+
https://iceberg.apache.org/spec/#appendix-d-single-value-serialization
193194
194195
Args:
195196
primitive_type (PrimitiveType): An implementation of the PrimitiveType base class.

pyiceberg/expressions/__init__.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -690,12 +690,14 @@ def bind(self, schema: Schema, case_sensitive: bool = True) -> BoundSetPredicate
690690
def __str__(self) -> str:
691691
"""Return the string representation of the SetPredicate class."""
692692
# Sort to make it deterministic
693-
return f"{str(self.__class__.__name__)}({str(self.term)}, {{{', '.join(sorted([str(literal) for literal in self.literals]))}}})"
693+
literals_str = ", ".join(sorted([str(literal) for literal in self.literals]))
694+
return f"{str(self.__class__.__name__)}({str(self.term)}, {{{literals_str}}})"
694695

695696
def __repr__(self) -> str:
696697
"""Return the string representation of the SetPredicate class."""
697698
# Sort to make it deterministic
698-
return f"{str(self.__class__.__name__)}({repr(self.term)}, {{{', '.join(sorted([repr(literal) for literal in self.literals]))}}})"
699+
literals_repr = ", ".join(sorted([repr(literal) for literal in self.literals]))
700+
return f"{str(self.__class__.__name__)}({repr(self.term)}, {{{literals_repr}}})"
699701

700702
def __eq__(self, other: Any) -> bool:
701703
"""Return the equality of two instances of the SetPredicate class."""
@@ -725,12 +727,14 @@ def value_set(self) -> set[Any]:
725727
def __str__(self) -> str:
726728
"""Return the string representation of the BoundSetPredicate class."""
727729
# Sort to make it deterministic
728-
return f"{str(self.__class__.__name__)}({str(self.term)}, {{{', '.join(sorted([str(literal) for literal in self.literals]))}}})"
730+
literals_str = ", ".join(sorted([str(literal) for literal in self.literals]))
731+
return f"{str(self.__class__.__name__)}({str(self.term)}, {{{literals_str}}})"
729732

730733
def __repr__(self) -> str:
731734
"""Return the string representation of the BoundSetPredicate class."""
732735
# Sort to make it deterministic
733-
return f"{str(self.__class__.__name__)}({repr(self.term)}, {{{', '.join(sorted([repr(literal) for literal in self.literals]))}}})"
736+
literals_repr = ", ".join(sorted([repr(literal) for literal in self.literals]))
737+
return f"{str(self.__class__.__name__)}({repr(self.term)}, {{{literals_repr}}})"
734738

735739
def __eq__(self, other: Any) -> bool:
736740
"""Return the equality of two instances of the BoundSetPredicate class."""

pyiceberg/expressions/visitors.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,8 @@ def visit(obj: BooleanExpression, visitor: BooleanExpressionVisitor[T]) -> T:
139139
140140
Args:
141141
obj (BooleanExpression): An instance of a BooleanExpression.
142-
visitor (BooleanExpressionVisitor[T]): An instance of an implementation of the generic BooleanExpressionVisitor base class.
142+
visitor (BooleanExpressionVisitor[T]): An instance of an implementation of the generic
143+
BooleanExpressionVisitor base class.
143144
144145
Raises:
145146
NotImplementedError: If attempting to visit an unsupported expression.

pyiceberg/io/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,5 +368,6 @@ def load_file_io(properties: Properties = EMPTY_DICT, location: str | None = Non
368368
return PyArrowFileIO(properties)
369369
except ModuleNotFoundError as e:
370370
raise ModuleNotFoundError(
371-
'Could not load a FileIO, please consider installing one: pip3 install "pyiceberg[pyarrow]", for more options refer to the docs.'
371+
"Could not load a FileIO, please consider installing one: "
372+
'pip3 install "pyiceberg[pyarrow]", for more options refer to the docs.'
372373
) from e

pyiceberg/io/pyarrow.py

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,9 @@ def open_output_stream(self, path: str, *args: Any, **kwargs: Any) -> pyarrow.Na
248248

249249

250250
class PyArrowFile(InputFile, OutputFile):
251-
"""A combined InputFile and OutputFile implementation that uses a pyarrow filesystem to generate pyarrow.lib.NativeFile instances.
251+
"""A combined InputFile and OutputFile implementation using pyarrow filesystem.
252+
253+
This class generates pyarrow.lib.NativeFile instances.
252254
253255
Args:
254256
location (str): A URI or a path to a local file.
@@ -645,8 +647,9 @@ def delete(self, location: str | InputFile | OutputFile) -> None:
645647
"""Delete the file at the given location.
646648
647649
Args:
648-
location (Union[str, InputFile, OutputFile]): The URI to the file--if an InputFile instance or an OutputFile instance is provided,
649-
the location attribute for that instance is used as the location to delete.
650+
location (Union[str, InputFile, OutputFile]): The URI to the file--if an InputFile instance or
651+
an OutputFile instance is provided, the location attribute for that instance is used as
652+
the location to delete.
650653
651654
Raises:
652655
FileNotFoundError: When the file at the provided location does not exist.
@@ -1014,7 +1017,10 @@ def collect(
10141017
self,
10151018
expr: BooleanExpression,
10161019
) -> None:
1017-
"""Collect the bound references categorized by having at least one is_null or is_not_null in the expr and the remaining."""
1020+
"""Collect bound references categorized by null predicates.
1021+
1022+
Categorizes by having at least one is_null or is_not_null in the expr and the remaining.
1023+
"""
10181024
boolean_expression_visit(expr, self)
10191025

10201026

@@ -1035,7 +1041,8 @@ def expression_to_pyarrow(expr: BooleanExpression, schema: Schema | None = None)
10351041
def _expression_to_complementary_pyarrow(expr: BooleanExpression, schema: Schema | None = None) -> pc.Expression:
10361042
"""Complementary filter conversion function of expression_to_pyarrow.
10371043
1038-
Could not use expression_to_pyarrow(Not(expr)) to achieve this complementary effect because ~ in pyarrow.compute.Expression does not handle null.
1044+
Could not use expression_to_pyarrow(Not(expr)) to achieve this complementary effect because
1045+
~ in pyarrow.compute.Expression does not handle null.
10391046
"""
10401047
collector = _NullNaNUnmentionedTermsCollector()
10411048
collector.collect(expr)
@@ -1417,7 +1424,9 @@ def primitive(self, primitive: pa.DataType) -> PrimitiveType:
14171424
return TimestampNanoType()
14181425
else:
14191426
raise TypeError(
1420-
"Iceberg does not yet support 'ns' timestamp precision. Use 'downcast-ns-timestamp-to-us-on-write' configuration property to automatically downcast 'ns' to 'us' on write.",
1427+
"Iceberg does not yet support 'ns' timestamp precision. "
1428+
"Use 'downcast-ns-timestamp-to-us-on-write' configuration property to automatically "
1429+
"downcast 'ns' to 'us' on write.",
14211430
)
14221431
else:
14231432
raise TypeError(f"Unsupported precision for timestamp type: {primitive.unit}")
@@ -1580,7 +1589,8 @@ def _task_to_record_batches(
15801589
fragment = arrow_format.make_fragment(fin)
15811590
physical_schema = fragment.physical_schema
15821591

1583-
# For V1 and V2, we only support Timestamp 'us' in Iceberg Schema, therefore it is reasonable to always cast 'ns' timestamp to 'us' on read.
1592+
# For V1 and V2, we only support Timestamp 'us' in Iceberg Schema,
1593+
# therefore it is reasonable to always cast 'ns' timestamp to 'us' on read.
15841594
# For V3 this has to set explicitly to avoid nanosecond timestamp to be down-casted by default
15851595
downcast_ns_timestamp_to_us = (
15861596
downcast_ns_timestamp_to_us if downcast_ns_timestamp_to_us is not None else format_version <= 2
@@ -2450,7 +2460,8 @@ def _partition_value(self, partition_field: PartitionField, schema: Schema) -> A
24502460

24512461
if not iceberg_transform.preserves_order:
24522462
raise ValueError(
2453-
f"Cannot infer partition value from parquet metadata for a non-linear Partition Field: {partition_field.name} with transform {partition_field.transform}"
2463+
f"Cannot infer partition value from parquet metadata for a non-linear Partition Field: "
2464+
f"{partition_field.name} with transform {partition_field.transform}"
24542465
)
24552466

24562467
transform_func = iceberg_transform.transform(source_field.field_type)
@@ -2471,7 +2482,8 @@ def _partition_value(self, partition_field: PartitionField, schema: Schema) -> A
24712482
)
24722483
if lower_value != upper_value:
24732484
raise ValueError(
2474-
f"Cannot infer partition value from parquet metadata as there are more than one partition values for Partition Field: {partition_field.name}. {lower_value=}, {upper_value=}"
2485+
f"Cannot infer partition value from parquet metadata as there are more than one partition values "
2486+
f"for Partition Field: {partition_field.name}. {lower_value=}, {upper_value=}"
24752487
)
24762488

24772489
return lower_value
@@ -2738,7 +2750,8 @@ def _check_pyarrow_schema_compatible(
27382750
)
27392751
additional_names = set(provided_schema._name_to_id.keys()) - set(requested_schema._name_to_id.keys())
27402752
raise ValueError(
2741-
f"PyArrow table contains more columns: {', '.join(sorted(additional_names))}. Update the schema first (hint, use union_by_name)."
2753+
f"PyArrow table contains more columns: {', '.join(sorted(additional_names))}. "
2754+
"Update the schema first (hint, use union_by_name)."
27422755
) from e
27432756
_check_schema_compatible(requested_schema, provided_schema)
27442757

pyiceberg/manifest.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1319,7 +1319,8 @@ def prepare_manifest(self, manifest_file: ManifestFile) -> ManifestFile:
13191319
# To validate this, check that the snapshot id matches the current commit
13201320
if self._commit_snapshot_id != wrapped_manifest_file.added_snapshot_id:
13211321
raise ValueError(
1322-
f"Found unassigned sequence number for a manifest from snapshot: {self._commit_snapshot_id} != {wrapped_manifest_file.added_snapshot_id}"
1322+
f"Found unassigned sequence number for a manifest from snapshot: "
1323+
f"{self._commit_snapshot_id} != {wrapped_manifest_file.added_snapshot_id}"
13231324
)
13241325
wrapped_manifest_file.sequence_number = self._sequence_number
13251326

pyiceberg/schema.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,8 @@ def __str__(self) -> str:
104104

105105
def __repr__(self) -> str:
106106
"""Return the string representation of the Schema class."""
107-
return f"Schema({', '.join(repr(column) for column in self.columns)}, schema_id={self.schema_id}, identifier_field_ids={self.identifier_field_ids})"
107+
columns_repr = ", ".join(repr(column) for column in self.columns)
108+
return f"Schema({columns_repr}, schema_id={self.schema_id}, identifier_field_ids={self.identifier_field_ids})"
108109

109110
def __len__(self) -> int:
110111
"""Return the length of an instance of the Literal class."""
@@ -374,7 +375,8 @@ def check_format_version_compatibility(self, format_version: int) -> None:
374375
for field in self._lazy_id_to_field.values():
375376
if format_version < field.field_type.minimum_format_version():
376377
raise ValueError(
377-
f"{field.field_type} is only supported in {field.field_type.minimum_format_version()} or higher. Current format version is: {format_version}"
378+
f"{field.field_type} is only supported in {field.field_type.minimum_format_version()} or higher. "
379+
f"Current format version is: {format_version}"
378380
)
379381

380382

@@ -1530,7 +1532,8 @@ def field(self, field: NestedField, field_result: IcebergType | None) -> Iceberg
15301532
else:
15311533
if not field.field_type.is_primitive:
15321534
raise ValueError(
1533-
f"Cannot explicitly project List or Map types, {field.field_id}:{field.name} of type {field.field_type} was selected"
1535+
f"Cannot explicitly project List or Map types, "
1536+
f"{field.field_id}:{field.name} of type {field.field_type} was selected"
15341537
)
15351538
# Selected non-struct field
15361539
return field.field_type
@@ -1550,7 +1553,8 @@ def list(self, list_type: ListType, element_result: IcebergType | None) -> Icebe
15501553
else:
15511554
if not list_type.element_type.is_primitive:
15521555
raise ValueError(
1553-
f"Cannot explicitly project List or Map types, {list_type.element_id} of type {list_type.element_type} was selected"
1556+
f"Cannot explicitly project List or Map types, "
1557+
f"{list_type.element_id} of type {list_type.element_type} was selected"
15541558
)
15551559
return list_type
15561560
elif element_result is not None:
@@ -1567,7 +1571,8 @@ def map(self, map_type: MapType, key_result: IcebergType | None, value_result: I
15671571
return self._project_map(map_type, projected_struct)
15681572
if not map_type.value_type.is_primitive:
15691573
raise ValueError(
1570-
f"Cannot explicitly project List or Map types, Map value {map_type.value_id} of type {map_type.value_type} was selected"
1574+
f"Cannot explicitly project List or Map types, "
1575+
f"Map value {map_type.value_id} of type {map_type.value_type} was selected"
15711576
)
15721577
return map_type
15731578
elif value_result is not None:
@@ -1764,9 +1769,17 @@ def _is_field_compatible(self, lhs: NestedField) -> bool:
17641769
# UnknownType can only be promoted to Primitive types
17651770
if isinstance(rhs.field_type, UnknownType):
17661771
if not isinstance(lhs.field_type, PrimitiveType):
1767-
error_msg = f"Null type (UnknownType) cannot be promoted to non-primitive type {lhs.field_type}. UnknownType can only be promoted to primitive types (string, int, boolean, etc.) in V3+ tables."
1772+
error_msg = (
1773+
f"Null type (UnknownType) cannot be promoted to non-primitive type {lhs.field_type}. "
1774+
"UnknownType can only be promoted to primitive types (string, int, boolean, etc.) "
1775+
"in V3+ tables."
1776+
)
17681777
else:
1769-
error_msg = f"Null type (UnknownType) cannot be promoted to {lhs.field_type}. This may be due to table format version limitations (V1/V2 tables don't support UnknownType promotion)."
1778+
error_msg = (
1779+
f"Null type (UnknownType) cannot be promoted to {lhs.field_type}. "
1780+
"This may be due to table format version limitations "
1781+
"(V1/V2 tables don't support UnknownType promotion)."
1782+
)
17701783
self.rich_table.add_row("❌", str(lhs), f"{str(rhs)} - {error_msg}")
17711784
else:
17721785
self.rich_table.add_row("❌", str(lhs), str(rhs))

0 commit comments

Comments
 (0)