Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions cognite/client/_api/raw/rows.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

if TYPE_CHECKING:
import pandas as pd
import polars as pl

from cognite.client import AsyncCogniteClient
from cognite.client.config import ClientConfig
Expand Down Expand Up @@ -517,6 +518,51 @@ async def retrieve_dataframe(
cols = [r.columns for r in rows]
return pd.DataFrame(cols, index=idx, dtype=object if not infer_dtypes else None)

async def retrieve_polars_dataframe(
self,
db_name: str,
table_name: str,
min_last_updated_time: int | None = None,
max_last_updated_time: int | None = None,
columns: list[str] | None = None,
limit: int | None = DEFAULT_LIMIT_READ,
partitions: int | None = None,
include_last_updated_time: bool = False,
) -> pl.DataFrame:
"""Retrieve rows in a table as a polars dataframe.

Rowkeys are used as the index.

Args:
db_name (str): Name of the database.
table_name (str): Name of the table.
min_last_updated_time (int | None): Rows must have been last updated after this time (exclusive). Milliseconds since epoch.
max_last_updated_time (int | None): Rows must have been last updated before this time (inclusive). Milliseconds since epoch.
columns (list[str] | None): List of column keys. Set to `None` to retrieving all, use empty list, [], to retrieve only row keys.
limit (int | None): The number of rows to retrieve. Defaults to 25. Set to -1, float("inf") or None to return all items.
partitions (int | None): Retrieve rows in parallel using this number of workers. Can be used together with a (large) finite limit.
include_last_updated_time (bool): Whether to include the last_updated_time in the dataframe. If True, 'last_updated_time' will be added to the dataframe as a column. Defaults to False.
Returns:
pl.DataFrame: The requested rows in a polars dataframe.

Examples:
Get dataframe:

>>> from cognite.client import CogniteClient, AsyncCogniteClient
>>> client = CogniteClient()
>>> # async_client = AsyncCogniteClient() # another option
>>> df = client.raw.rows.retrieve_polars_dataframe("db1", "t1", limit=5)
"""
pl = local_import("polars")
rows = await self.list(
db_name, table_name, min_last_updated_time, max_last_updated_time, columns, limit, partitions
)
if include_last_updated_time:
data = [{"key": r.key, **(r.columns or {}), "last_updated_time": r.last_updated_time} for r in rows]
else:
data = [{"key": r.key, **(r.columns or {})} for r in rows]
return pl.DataFrame(data)
Comment thread
christoffer-hk marked this conversation as resolved.

async def _get_parallel_cursors(
self,
db_name: str,
Expand Down
54 changes: 53 additions & 1 deletion cognite/client/_sync_api/raw/rows.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions cognite/client/data_classes/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

if TYPE_CHECKING:
import pandas
import polars

from cognite.client import AsyncCogniteClient

Expand Down Expand Up @@ -188,6 +189,14 @@ def _repr_html_(self) -> str:
def _maybe_set_client_ref(self, client: AsyncCogniteClient) -> Self:
return self # Base resource has no client ref set

def to_polars(self) -> polars.DataFrame:
"""Convert the instance into a polars DataFrame.

Returns:
polars.DataFrame: The polars DataFrame representing the instance.
"""
raise NotImplementedError(f"to_polars is not implemented for {type(self).__name__}")


class UnknownCogniteResource(CogniteResource):
def __init__(self, data: dict[str, Any]) -> None:
Expand Down
22 changes: 22 additions & 0 deletions cognite/client/data_classes/raw.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

if TYPE_CHECKING:
import pandas
import polars


class RowCore(WriteableCogniteResource["RowWrite"], ABC):
Expand Down Expand Up @@ -66,6 +67,15 @@ def to_pandas(self) -> pandas.DataFrame: # type: ignore[override]
pd = local_import("pandas")
return pd.DataFrame([self.columns], [self.key])

def to_polars(self) -> polars.DataFrame:
"""Convert the instance into a polars DataFrame.

Returns:
polars.DataFrame: The polars DataFrame representing this instance.
"""
pl = local_import("polars")
return pl.DataFrame(data={"key": self.key, **(self.columns or {})})


T_Row = TypeVar("T_Row", bound=RowCore)

Expand Down Expand Up @@ -133,6 +143,18 @@ def to_pandas(self) -> pandas.DataFrame: # type: ignore[override]
index, data = zip(*((row.key, row.columns) for row in self))
return pd.DataFrame.from_records(data, index=index)

def to_polars(self) -> polars.DataFrame:
"""Convert the instance into a polars DataFrame.

Returns:
polars.DataFrame: The polars DataFrame representing this instance.
"""
pl = local_import("polars")
if not self:
return pl.DataFrame(data=[])
data = ({"key": row.key, **(row.columns or {})} for row in self)
return pl.DataFrame(data=data)
Comment thread
christoffer-hk marked this conversation as resolved.


class RowWriteList(RowListCore[RowWrite]):
_RESOURCE = RowWrite
Expand Down
85 changes: 84 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ dev = [
"types-authlib (>=1.6.2.20250825, <2)",
"anyio (>=4.11.0, <5)",
"testbook (>=0.4.2, <0.5)",
"ipykernel (>=7.2.0, <8)"
"ipykernel (>=7.2.0, <8)",
"polars[rtcompat] (>=1.41.2,<2.0.0)",
]
docs = [
"sphinx ==8.1.*", # Increase when Python 3.10 support is dropped
Expand Down
15 changes: 14 additions & 1 deletion scripts/sync_client_codegen/codegen_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,12 @@
from cognite.client._api_client import APIClient
from cognite.client.config import ClientConfig
from cognite.client.credentials import Token
from scripts.sync_client_codegen.constants import ASYNC_METHODS_TO_KEEP, MAYBE_IMPORTS, SYNC_METHODS_TO_KEEP
from scripts.sync_client_codegen.constants import (
ASYNC_METHODS_TO_KEEP,
FOUR_SPACES,
MAYBE_IMPORTS,
SYNC_METHODS_TO_KEEP,
)


def get_api_class_by_attribute(cls_: object, parent_name: tuple[str, ...] = ()) -> dict[str, type[APIClient]]:
Expand Down Expand Up @@ -332,3 +337,11 @@ def get_canonical_source(source: Path | str) -> str:

tree = ast.parse(source)
return ast.unparse(tree)


def create_type_checking_imports_block(type_checking_imports: str) -> str:
"""Creates a block of code for type checking imports. If there are no type checking imports, it returns an empty string."""
if not type_checking_imports.strip():
return ""
parsed_imports = "\n".join([f"{FOUR_SPACES}{line}" for line in type_checking_imports.split("\n")])
return f"if TYPE_CHECKING:\n{parsed_imports}"
5 changes: 3 additions & 2 deletions scripts/sync_client_codegen/create_sync_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from scripts.sync_client_codegen.codegen_utils import (
clean_up_files,
create_type_checking_imports_block,
ensure_parent_dir,
file_has_changed,
find_api_class_name,
Expand Down Expand Up @@ -150,15 +151,15 @@ def _generate_code_for_single_sync_api(
api_names, nested_apis = find_self_assignments(class_def)
all_imports = fix_imports_for_sync_apis(all_imports, api_names)
module_constants = get_module_level_constants(tree)

type_checking_imports_block = create_type_checking_imports_block(type_checking_imports)
# Combine everything 🤞
return (
textwrap.dedent(
SYNC_API_TEMPLATE.format(
file_hash=file_hash,
class_name=foolish_cls_name_rewrite(class_name),
existing_imports=all_imports,
type_checking_imports=type_checking_imports,
type_checking_imports=type_checking_imports_block,
nested_apis_init="\n ".join(nested_apis),
module_constants=module_constants,
)
Expand Down
4 changes: 1 addition & 3 deletions scripts/sync_client_codegen/sync_api_template.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@ from cognite.client.utils._concurrency import _get_event_loop_executor
from typing import Any, Iterator, TYPE_CHECKING, overload
from collections.abc import Coroutine

if TYPE_CHECKING:
import pandas as pd
{type_checking_imports}
{type_checking_imports}

{module_constants}

Expand Down
Loading
Loading