Skip to content

TypeError when operation field is missing in summary. #1106

@questsul

Description

@questsul

Apache Iceberg version

0.6.0

Please describe the bug 🐞

When attempting to read the metadata.json file, which contains a list of snapshots where some snapshot summaries lack the operation field, PyIceberg encounters the following error:

TypeError: Summary.init() missing 1 required positional argument: 'operation'.

Interestingly, when parsing the same metadata file using the Iceberg Java library, it works without any issues.

Full stack trace:

 File "reader.py", line 91, in _get_iceberg_table
    return StaticTable.from_metadata(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File ".venv/lib/python3.12/site-packages/pyiceberg/table/__init__.py", line 1101, in from_metadata
    metadata = FromInputFile.table_metadata(file)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File ".venv/lib/python3.12/site-packages/pyiceberg/serializers.py", line 113, in table_metadata
    return FromByteStream.table_metadata(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File ".venv/lib/python3.12/site-packages/pyiceberg/serializers.py", line 94, in table_metadata
    return TableMetadataUtil.parse_raw(metadata)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File ".venv/lib/python3.12/site-packages/pyiceberg/table/metadata.py", line 461, in parse_raw
    return TableMetadataWrapper.model_validate_json(data).root
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File ".venv/lib/python3.12/site-packages/pydantic/main.py", line 580, in model_validate_json
    return cls.__pydantic_validator__.validate_json(json_data, strict=strict, context=context)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: Summary.__init__() missing 1 required positional argument: 'operation'

Metadata.json example:

{
  "format-version" : 2,
  "table-uuid" : "9996bcdf-3277-48f4-9e76-9e81766c9e0e",
  "location" : "file://t/some_table/",
  "last-sequence-number" : 45,
  "last-updated-ms" : 1724611070351,
  "last-column-id" : 79,
  "current-schema-id" : 0,
  "schemas" : [ {
    "type" : "struct",
    "schema-id" : 0,
    "fields" : [ {
      "id" : 1,
      "name" : "DATA",
      "required" : false,
      "type" : "string"
    }, {
      "id" : 2,
      "name" : "COLUMN_NAME",
      "required" : false,
      "type" : "string"
    }]
  } ],
  "default-spec-id" : 0,
  "partition-specs" : [ {
    "spec-id" : 0,
    "fields" : [ ]
  } ],
  "last-partition-id" : 999,
  "default-sort-order-id" : 0,
  "sort-orders" : [ {
    "order-id" : 0,
    "fields" : [ ]
  } ],
  "properties" : {
    "format-version" : "2"
  },
  "current-snapshot-id" : 1724611070351000000,
  "snapshots" : [ {
    "sequence-number" : 44,
    "snapshot-id" : 1724610129117000000,
    "timestamp-ms" : 1724610129117,
    "manifest-list" : "file://t/some_table/metadata/snap-1724610129117000000-d9b50309-0dff-472d-8711-86ca70021ffb.avro",
    "schema-id" : 0,
    "summary" : {
      "manifests-created" : "8",
      "total-records" : "26508666891",
      "added-files-size" : "3927895626752",
      "manifests-kept" : "0",
      "total-files-size" : "3927895626752",
      "added-records" : "26508666891",
      "added-data-files" : "231513",
      "manifests-replaced" : "0",
      "total-data-files" : "231513"
    }
  }, {
    "sequence-number" : 43,
    "snapshot-id" : 1724006578422000000,
    "timestamp-ms" : 1724006578422,
    "manifest-list" : "file://t/some_table/metadata/snap-1724006578422000000-289566b5-78fe-4b60-9ffa-ab25dee1edde.avro",
    "schema-id" : 0,
    "summary" : {
      "total-files-size" : "3888310341632",
      "added-records" : "26224534820",
      "added-data-files" : "225313",
      "manifests-replaced" : "0",
      "total-data-files" : "225313",
      "manifests-created" : "56",
      "total-records" : "26224534820",
      "added-files-size" : "3888310341632",
      "manifests-kept" : "0"
    }
  }, {
    "sequence-number" : 45,
    "snapshot-id" : 1724611070351000000,
    "timestamp-ms" : 1724611070351,
    "manifest-list" : "file://t/some_table/metadata/snap-1724611070351000000-6a307203-7148-467f-88eb-f932b32dd7f4.avro",
    "schema-id" : 0,
    "summary" : {
      "added-files-size" : "3929709293568",
      "total-records" : "26508666891",
      "manifests-created" : "8",
      "total-data-files" : "227581",
      "manifests-replaced" : "0",
      "added-data-files" : "227581",
      "added-records" : "26508666891",
      "total-files-size" : "3929709293568",
      "operation" : "append",
      "manifests-kept" : "0"
    }
  } ],
  "snapshot-log" : [ {
    "snapshot-id" : 1724006578422000000,
    "timestamp-ms" : 1724006578422
  }, {
    "snapshot-id" : 1724610129117000000,
    "timestamp-ms" : 1724610129117
  }, {
    "snapshot-id" : 1724611070351000000,
    "timestamp-ms" : 1724611070351
  } ]
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions