Skip to content

Commit a7401da

Browse files
committed
First pass at near-complete dataframe compatibility
1 parent a344b97 commit a7401da

8 files changed

Lines changed: 912 additions & 87 deletions

File tree

datamatrix/_datamatrix/_basecolumn.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@
2020
---
2121
"""
2222

23-
from datamatrix import utils
24-
from datamatrix._datamatrix._index import Index
25-
from datamatrix._ordered_state import OrderedState
26-
from datamatrix._datamatrix._callable_values import CallableFloat
27-
from datamatrix._datamatrix._sort import sortable, fastnumbers
28-
import collections
23+
from .. import utils
24+
from .._ordered_state import OrderedState
25+
from ._index import Index
26+
from ._dataframe_compat_mixin import SeriesCompatMixin
27+
from ._callable_values import CallableFloat
28+
from ._sort import sortable, fastnumbers
2929
import numbers
3030
import operator
3131
import math
@@ -60,7 +60,7 @@
6060
Ellipsis = None # was introduced in Python 3.10
6161

6262

63-
class BaseColumn(OrderedState):
63+
class BaseColumn(OrderedState, SeriesCompatMixin):
6464

6565
"""
6666
desc:
@@ -69,6 +69,7 @@ class BaseColumn(OrderedState):
6969
"""
7070

7171
default_value = u''
72+
dtype = object
7273
ndim = 1
7374

7475
def __init__(self, datamatrix, rowid=None, seq=None, metadata=None):
@@ -1042,3 +1043,4 @@ def __rpow__(self, other):
10421043
return self._operate(other, operator.pow, flip=True)
10431044
def __matmul__(self, other):
10441045
return self._map(other)
1046+
Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
"""Implements various functionality for compatibility with pandas. By executing
2+
this module as a script, the _dataframe_compat_mixin module is generated. This
3+
needs to be done only when there are changed in the pandas API.
4+
"""
5+
6+
def _to_pandas(obj):
7+
"""
8+
Convert a DataMatrix or BaseColumn object to a pandas DataFrame or Series.
9+
10+
This function checks the type of the input object and converts it to the corresponding
11+
pandas object if it is an instance of DataMatrix or BaseColumn. If the input is a list
12+
or dictionary, it recursively converts each element. Otherwise, it returns the object as-is.
13+
14+
Args:
15+
obj: The object to be converted. Can be a DataMatrix, BaseColumn, list, dictionary, or any other type.
16+
17+
Returns:
18+
The converted pandas DataFrame, Series, or the original object if no conversion is needed.
19+
"""
20+
from ._datamatrix import DataMatrix
21+
from ._basecolumn import BaseColumn
22+
from datamatrix import convert as cnv
23+
24+
if isinstance(obj, DataMatrix):
25+
return cnv.to_dataframe(obj)
26+
if isinstance(obj, BaseColumn):
27+
return cnv.to_series(obj)
28+
if isinstance(obj, list):
29+
return [_to_pandas(val) for val in obj]
30+
if isinstance(obj, dict):
31+
return {key: _to_pandas(val) for key, val in obj.items()}
32+
return obj
33+
34+
def _from_pandas(obj):
35+
"""
36+
Convert a pandas DataFrame or Series to a DataMatrix or BaseColumn object.
37+
38+
This function checks the type of the input object and converts it to the corresponding
39+
DataMatrix or BaseColumn object if it is an instance of pandas DataFrame or Series.
40+
If the input is a list or dictionary, it recursively converts each element.
41+
Otherwise, it returns the object as-is.
42+
43+
Args:
44+
obj: The object to be converted. Can be a pandas DataFrame, Series, list, dictionary, or any other type.
45+
46+
Returns:
47+
The converted DataMatrix, BaseColumn, or the original object if no conversion is needed.
48+
49+
Raises:
50+
ImportError: If pandas is not installed.
51+
"""
52+
try:
53+
import pandas as pd
54+
except ImportError:
55+
raise ImportError('Trying to emulate pandas API, but pandas is not installed')
56+
from datamatrix import convert as cnv
57+
58+
if isinstance(obj, pd.DataFrame):
59+
return cnv.from_dataframe(obj)
60+
if isinstance(obj, list):
61+
return [_from_pandas(val) for val in obj]
62+
if isinstance(obj, dict):
63+
return {key: _from_pandas(val) for key, val in obj.items()}
64+
return obj
65+
66+
def _inner(self, function_name, *args, **kwargs):
67+
"""
68+
Internal function to handle method calls on DataMatrix objects using pandas API.
69+
70+
This function checks if the method exists in the pandas API and calls it with the provided arguments.
71+
It also handles the conversion of the input and output objects between DataMatrix and pandas DataFrame.
72+
73+
Args:
74+
self: The DataMatrix object on which the method is called.
75+
function_name (str): The name of the pandas method to be called.
76+
*args: Positional arguments to be passed to the pandas method.
77+
**kwargs: Keyword arguments to be passed to the pandas method.
78+
79+
Returns:
80+
The result of the pandas method call, converted back to a DataMatrix object if necessary.
81+
82+
Raises:
83+
ValueError: If inplace=True is provided as a keyword argument.
84+
NotImplementedError: If the specified method is not found in the pandas API.
85+
"""
86+
if kwargs.get('inplace', False):
87+
raise ValueError('inplace=True is not supported')
88+
try:
89+
fnc = getattr(_to_pandas(self), function_name)
90+
except AttributeError:
91+
raise NotImplementedError(f'{function_name} not found in pandas API')
92+
if callable(fnc):
93+
result = fnc(*_to_pandas(args), **_to_pandas(kwargs))
94+
else:
95+
result = fnc
96+
return _from_pandas(result)
97+
98+
def df_compat_function(function_name):
99+
"""
100+
Decorator to create a function that can be called on a DataMatrix object using the pandas API.
101+
102+
This decorator wraps a function so that it can be called on a DataMatrix object and internally
103+
uses the pandas API to perform the operation.
104+
105+
Args:
106+
function_name (str): The name of the pandas function to be called.
107+
108+
Returns:
109+
A function that can be called on a DataMatrix object.
110+
"""
111+
def inner(self, *args, **kwargs):
112+
return _inner(self, function_name, *args, **kwargs)
113+
return inner
114+
115+
def df_compat_property(function_name):
116+
"""
117+
Decorator to create a property that can be accessed on a DataMatrix object using the pandas API.
118+
119+
This decorator wraps a property so that it can be accessed on a DataMatrix object and internally
120+
uses the pandas API to perform the operation.
121+
122+
Args:
123+
function_name (str): The name of the pandas property to be accessed.
124+
125+
Returns:
126+
A property that can be accessed on a DataMatrix object.
127+
"""
128+
@property
129+
def inner(self, *args, **kwargs):
130+
return _inner(self, function_name, *args, **kwargs)
131+
return inner
132+
133+
def df_compat_staticmethod(function_name):
134+
"""
135+
Decorator to create a static method that can be called on a DataMatrix object using the pandas API.
136+
137+
This decorator wraps a static method so that it can be called on a DataMatrix object and internally
138+
uses the pandas API to perform the operation.
139+
140+
Args:
141+
function_name (str): The name of the pandas static method to be called.
142+
143+
Returns:
144+
A static method that can be called on a DataMatrix object.
145+
"""
146+
def inner(*args, **kwargs):
147+
from datamatrix import convert as cnv
148+
import pandas as pd
149+
try:
150+
fnc = getattr(pd.DataFrame, function_name)
151+
except AttributeError:
152+
raise NotImplementedError(f'{function_name} not found in pandas API')
153+
result = fnc(*args, **kwargs)
154+
if isinstance(result, pd.DataFrame):
155+
return cnv.from_dataframe(result)
156+
return result
157+
return inner
158+
159+
160+
if __name__ == '__main__':
161+
162+
import sys
163+
from pathlib import Path
164+
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
165+
from datamatrix import DataMatrix
166+
from datamatrix._datamatrix._basecolumn import BaseColumn
167+
import pandas as pd
168+
169+
def get_attribute_type(attr):
170+
"""Determine the type of an attribute (property, function, staticmethod, etc.)."""
171+
if isinstance(attr, property):
172+
return "property"
173+
elif isinstance(attr, staticmethod):
174+
return "staticmethod"
175+
elif isinstance(attr, classmethod):
176+
return "classmethod"
177+
elif callable(attr):
178+
return "function"
179+
else:
180+
return None
181+
182+
def get_class_attributes(cls):
183+
"""Get all relevant attributes of a class, excluding magic methods."""
184+
attributes = {}
185+
for attr_name in dir(cls):
186+
# Skip magic methods
187+
if not attr_name.startswith('_'):
188+
try:
189+
attr = getattr(cls, attr_name)
190+
attr_type = get_attribute_type(attr)
191+
if attr_type:
192+
attributes[attr_name] = attr_type
193+
except:
194+
# Skip attributes that can't be accessed
195+
continue
196+
return attributes
197+
198+
def compare_classes(cls1, cls2):
199+
"""Compare two classes and find attributes in cls2 not present in cls1."""
200+
cls1_attrs = get_class_attributes(cls1)
201+
cls2_attrs = get_class_attributes(cls2)
202+
203+
missing_attributes = {}
204+
205+
for attr_name, attr_type in cls2_attrs.items():
206+
if attr_name not in cls1_attrs:
207+
missing_attributes[attr_name] = attr_type
208+
209+
return missing_attributes
210+
211+
212+
with open('datamatrix/_datamatrix/_dataframe_compat_mixin.py', 'w') as file:
213+
file.write(f'''"""Auto-generated mixins for pandas compatibility.
214+
Based on pandas {pd.__version__}.
215+
"""
216+
from ._dataframe_compat import df_compat_function, df_compat_property, df_compat_staticmethod
217+
218+
class DataFrameCompatMixin:
219+
''')
220+
result = compare_classes(DataMatrix, pd.DataFrame)
221+
result['iterrows'] = 'function'
222+
result['__dataframe__'] = 'function'
223+
for attr, attr_type in result.items():
224+
file.write(f" {attr} = df_compat_{attr_type}('{attr}')\n")
225+
file.write(f'''
226+
class SeriesCompatMixin:
227+
''')
228+
result = compare_classes(BaseColumn, pd.Series)
229+
for attr, attr_type in result.items():
230+
file.write(f" {attr} = df_compat_{attr_type}('{attr}')\n")

0 commit comments

Comments
 (0)