@@ -146,6 +146,8 @@ catalog.create_table(
146146)
147147` ` `
148148
149+ When the table is created, all IDs in the schema are re-assigned to ensure uniqueness.
150+
149151To create a table using a pyarrow schema :
150152
151153` ` ` python
@@ -278,7 +280,7 @@ tbl.overwrite(df)
278280
279281The data is written to the table, and when the table is read using `tbl.scan().to_arrow()` :
280282
281- ` ` `
283+ ` ` ` python
282284pyarrow.Table
283285city: string
284286lat: double
@@ -301,7 +303,7 @@ tbl.append(df)
301303
302304When reading the table `tbl.scan().to_arrow()` you can see that `Groningen` is now also part of the table :
303305
304- ` ` `
306+ ` ` ` python
305307pyarrow.Table
306308city: string
307309lat: double
@@ -340,7 +342,7 @@ tbl.delete(delete_filter="city == 'Paris'")
340342In the above example, any records where the city field value equals to `Paris` will be deleted.
341343Running `tbl.scan().to_arrow()` will now yield :
342344
343- ` ` `
345+ ` ` ` python
344346pyarrow.Table
345347city: string
346348lat: double
@@ -360,7 +362,6 @@ To explore the table metadata, tables can be inspected.
360362!!! tip "Time Travel"
361363 To inspect a tables's metadata with the time travel feature, call the inspect table method with the `snapshot_id` argument.
362364 Time travel is supported on all metadata tables except `snapshots` and `refs`.
363-
364365 ` ` ` python
365366 table.inspect.entries(snapshot_id=805611270568163028)
366367 ` ` `
@@ -375,7 +376,7 @@ Inspect the snapshots of the table:
375376table.inspect.snapshots()
376377` ` `
377378
378- ```
379+ ` ` ` python
379380pyarrow.Table
380381committed_at: timestamp[ms] not null
381382snapshot_id: int64 not null
@@ -403,7 +404,7 @@ Inspect the partitions of the table:
403404table.inspect.partitions()
404405` ` `
405406
406- ```
407+ ` ` ` python
407408pyarrow.Table
408409partition: struct<dt_month: int32, dt_day: date32[day]> not null
409410 child 0, dt_month: int32
@@ -444,7 +445,7 @@ To show all the table's current manifest entries for both data and delete files.
444445table.inspect.entries()
445446` ` `
446447
447- ```
448+ ` ` ` python
448449pyarrow.Table
449450status: int8 not null
450451snapshot_id: int64 not null
@@ -602,7 +603,7 @@ To show a table's known snapshot references:
602603table.inspect.refs()
603604` ` `
604605
605- ```
606+ ` ` ` python
606607pyarrow.Table
607608name: string not null
608609type: string not null
@@ -627,7 +628,7 @@ To show a table's current file manifests:
627628table.inspect.manifests()
628629` ` `
629630
630- ```
631+ ` ` ` python
631632pyarrow.Table
632633content: int8 not null
633634path: string not null
@@ -677,7 +678,7 @@ To show table metadata log entries:
677678table.inspect.metadata_log_entries()
678679` ` `
679680
680- ```
681+ ` ` ` python
681682pyarrow.Table
682683timestamp: timestamp[ms] not null
683684file: string not null
@@ -700,7 +701,7 @@ To show a table's history:
700701table.inspect.history()
701702` ` `
702703
703- ```
704+ ` ` ` python
704705pyarrow.Table
705706made_current_at: timestamp[ms] not null
706707snapshot_id: int64 not null
@@ -721,7 +722,7 @@ Inspect the data files in the current snapshot of the table:
721722table.inspect.files()
722723` ` `
723724
724- ```
725+ ` ` ` python
725726pyarrow.Table
726727content: int8 not null
727728file_path: string not null
@@ -861,7 +862,7 @@ To show only data files or delete files in the current snapshot, use `table.insp
861862
862863Expert Iceberg users may choose to commit existing parquet files to the Iceberg table as data files, without rewriting them.
863864
864- ```
865+ ` ` ` python
865866# Given that these parquet files have schema consistent with the Iceberg table
866867
867868file_paths = [
@@ -941,7 +942,7 @@ with table.update_schema() as update:
941942
942943Now the table has the union of the two schemas `print(table.schema())` :
943944
944- ```
945+ ` ` ` python
945946table {
946947 1: city: optional string
947948 2: lat: optional double
@@ -1191,7 +1192,7 @@ table.scan(
11911192
11921193This will return a PyArrow table:
11931194
1194- ```
1195+ ``` python
11951196pyarrow.Table
11961197VendorID: int64
11971198tpep_pickup_datetime: timestamp[us, tz=+ 00 :00 ]
@@ -1233,7 +1234,7 @@ table.scan(
12331234
12341235This will return a Pandas dataframe:
12351236
1236- ```
1237+ ``` python
12371238 VendorID tpep_pickup_datetime tpep_dropoff_datetime
123812390 2 2021 - 04 - 01 00 :28 :05 + 00 :00 2021 - 04 - 01 00 :47 :59 + 00 :00
123912401 1 2021 - 04 - 01 00 :39 :01 + 00 :00 2021 - 04 - 01 00 :57 :39 + 00 :00
@@ -1306,7 +1307,7 @@ ray_dataset = table.scan(
13061307
13071308This will return a Ray dataset:
13081309
1309- ```
1310+ ``` python
13101311Dataset(
13111312 num_blocks = 1 ,
13121313 num_rows = 1168798 ,
@@ -1357,7 +1358,7 @@ df = df.select("VendorID", "tpep_pickup_datetime", "tpep_dropoff_datetime")
13571358
13581359This returns a Daft Dataframe which is lazily materialized. Printing ` df ` will display the schema:
13591360
1360- ```
1361+ ``` python
13611362╭──────────┬───────────────────────────────┬───────────────────────────────╮
13621363│ VendorID ┆ tpep_pickup_datetime ┆ tpep_dropoff_datetime │
13631364│ -- - ┆ -- - ┆ -- - │
@@ -1375,7 +1376,7 @@ This is correctly optimized to take advantage of Iceberg features such as hidden
13751376df.show(2 )
13761377```
13771378
1378- ```
1379+ ``` python
13791380╭──────────┬───────────────────────────────┬───────────────────────────────╮
13801381│ VendorID ┆ tpep_pickup_datetime ┆ tpep_dropoff_datetime │
13811382│ -- - ┆ -- - ┆ -- - │
0 commit comments