-
Notifications
You must be signed in to change notification settings - Fork 324
feat: support timestamp_precision in table schema #2333
Changes from 27 commits
2e1daa8
253ac1f
dc3c498
97f0251
234a3fd
b20159b
a1bc2cb
bc6dcda
518a12c
a8d5f5c
0567adf
1268c45
8603973
cb9f818
696dfff
d24df7d
873bff6
6a93c26
9a4f72f
c146e39
fc08533
0b743f3
2a81ef9
e131b6d
7693537
5d2fbf0
c7c2b47
f87b618
c0e4595
04c5f59
255b87a
657dd84
4cd3df4
e6a3f8b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -480,3 +480,18 @@ class SourceColumnMatch(str, enum.Enum): | |
| NAME = "NAME" | ||
| """Matches by name. This reads the header row as column names and reorders | ||
| columns to match the field names in the schema.""" | ||
|
|
||
|
|
||
| class TimestampPrecision(object): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looking at the code again after our talk, I still think this should be turned into a true enum. With the current code, TimestampPrecision is just a shortcut to creating int or None values. The argument type can't be TimeStampPrecision. This means all the docstrings have to describe all the values as If we change this to actually be an enum, we will have to convert it back to it's raw value before sending it to the backend. But it should make typing a lot cleaner
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But if you want to leave it as-is, that's fine. We'd just need to update the typing and docstrings to be accurate
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As we have discussed offline, I have updated to use enum.Enum, and took the liberty to support None to ensure backward compatibility regarding string representation |
||
| """Precision (maximum number of total digits in base 10) for seconds of | ||
| TIMESTAMP type.""" | ||
|
|
||
| MICROSECOND = None | ||
|
daniel-sanche marked this conversation as resolved.
|
||
| """ | ||
| Default, for TIMESTAMP type with microsecond precision. | ||
| """ | ||
|
|
||
| PICOSECOND = 12 | ||
| """ | ||
| For TIMESTAMP type with picosecond precision. | ||
| """ | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -196,6 +196,14 @@ class SchemaField(object): | |
|
|
||
| Only valid for top-level schema fields (not nested fields). | ||
| If the type is FOREIGN, this field is required. | ||
|
|
||
| timestamp_precision: Union[enums.TimestampPrecision, int, None] | ||
| Precision (maximum number of total digits in base 10) for seconds | ||
| of TIMESTAMP type. | ||
|
|
||
| Defaults to `enums.TimestampPrecision.MICROSECOND` (`None`) for | ||
| microsecond precision. Use `enums.TimestampPrecision.PICOSECOND` | ||
| (`12`) for picosecond precision. | ||
| """ | ||
|
|
||
| def __init__( | ||
|
|
@@ -213,6 +221,7 @@ def __init__( | |
| range_element_type: Union[FieldElementType, str, None] = None, | ||
| rounding_mode: Union[enums.RoundingMode, str, None] = None, | ||
| foreign_type_definition: Optional[str] = None, | ||
| timestamp_precision: Union[enums.TimestampPrecision, int, None] = None, | ||
|
daniel-sanche marked this conversation as resolved.
Outdated
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This type annotation isn't actually right with the current code, since it won't ever be a TimestampPrecision instance
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This annotation is for the type accepted, so I think it is accurate?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updated to reflect the new types accepted: None and enum. The reason why None is accepted is to ensure an empty api representation when there is no user input, so the string representation remains the same if timestamp_precision is not set |
||
| ): | ||
| self._properties: Dict[str, Any] = { | ||
| "name": name, | ||
|
|
@@ -237,6 +246,8 @@ def __init__( | |
| if isinstance(policy_tags, PolicyTagList) | ||
| else None | ||
| ) | ||
| if timestamp_precision is not None: | ||
| self._properties["timestampPrecision"] = timestamp_precision | ||
|
daniel-sanche marked this conversation as resolved.
Outdated
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if you make TimestampPrecision into an enum, you can save this as a primitive here, and then rebuild in the property |
||
| if isinstance(range_element_type, str): | ||
| self._properties["rangeElementType"] = {"type": range_element_type} | ||
| if isinstance(range_element_type, FieldElementType): | ||
|
|
@@ -374,6 +385,13 @@ def policy_tags(self): | |
| resource = self._properties.get("policyTags") | ||
| return PolicyTagList.from_api_repr(resource) if resource is not None else None | ||
|
|
||
| @property | ||
| def timestamp_precision(self): | ||
| """Union[enums.TimestampPrecision, int, None]: Precision (maximum number | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Like I mentioned in a different comment, we should make sure it's stored in a single format, to make things easier when we go back to read it. I'd recommend making this just return the enum type
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done, when accessing the property, the user will always get an enum, while the api representation is int or None. We also limited the acceptable inputs to enum or None.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, this doctring doesn't seem to match the format of the others. Shouldn't it be in a returns block? And I'd suggest adding the type as an annotation, so it can be caught by mypy
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. both docstring and type annotation done |
||
| of total digits in base 10) for seconds of TIMESTAMP type. | ||
| """ | ||
| return _helpers._int_or_none(self._properties.get("timestampPrecision")) | ||
|
|
||
| def to_api_repr(self) -> dict: | ||
| """Return a dictionary representing this schema field. | ||
|
|
||
|
|
@@ -417,6 +435,7 @@ def _key(self): | |
| self.description, | ||
| self.fields, | ||
| policy_tags, | ||
| self.timestamp_precision, | ||
|
daniel-sanche marked this conversation as resolved.
Outdated
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if you decide to do an Enum subclass, this could look like: self.timestamp_precision.value |
||
| ) | ||
|
|
||
| def to_standard_sql(self) -> standard_sql.StandardSqlField: | ||
|
|
@@ -467,10 +486,9 @@ def __hash__(self): | |
| return hash(self._key()) | ||
|
|
||
| def __repr__(self): | ||
| key = self._key() | ||
| policy_tags = key[-1] | ||
| *initial_tags, policy_tags, timestamp_precision_tag = self._key() | ||
| policy_tags_inst = None if policy_tags is None else PolicyTagList(policy_tags) | ||
| adjusted_key = key[:-1] + (policy_tags_inst,) | ||
| adjusted_key = (*initial_tags, policy_tags_inst, timestamp_precision_tag) | ||
| return f"{self.__class__.__name__}{adjusted_key}" | ||
|
|
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -74,6 +74,16 @@ | |
| bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), | ||
| bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), | ||
| ] | ||
| SCHEMA_PICOSECOND = [ | ||
| bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), | ||
| bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), | ||
| bigquery.SchemaField( | ||
| "time_pico", | ||
| "TIMESTAMP", | ||
| mode="REQUIRED", | ||
| timestamp_precision=enums.TimestampPrecision.PICOSECOND, | ||
| ), | ||
| ] | ||
| CLUSTERING_SCHEMA = [ | ||
| bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), | ||
| bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), | ||
|
|
@@ -631,6 +641,19 @@ def test_create_table_w_time_partitioning_w_clustering_fields(self): | |
| self.assertEqual(time_partitioning.field, "transaction_time") | ||
| self.assertEqual(table.clustering_fields, ["user_email", "store_code"]) | ||
|
|
||
| def test_create_table_w_picosecond_timestamp(self): | ||
| dataset = self.temp_dataset(_make_dataset_id("create_table")) | ||
| table_id = "test_table" | ||
| table_arg = Table(dataset.table(table_id), schema=SCHEMA_PICOSECOND) | ||
| self.assertFalse(_table_exists(table_arg)) | ||
|
|
||
| table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) | ||
| self.to_delete.insert(0, table) | ||
|
|
||
| self.assertTrue(_table_exists(table)) | ||
| self.assertEqual(table.table_id, table_id) | ||
| self.assertEqual(table.schema, SCHEMA_PICOSECOND) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we have a test that reads back a timestamp, and makes sure its in the expected range? Or am I misunderstanding?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This PR only involves creating and reading table schema that has picosecond timestamp. I think we can add the tests in the PR supporting writing to and reading from the table. |
||
|
|
||
| def test_delete_dataset_with_string(self): | ||
| dataset_id = _make_dataset_id("delete_table_true_with_string") | ||
| project = Config.CLIENT.project | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.