Skip to content

Commit 4130e4b

Browse files
author
Hoyt Koepke
committed
Flex types always converted correctly in cy_variant
Updated the logic in cy_variant.pyx to ensure that flexible types are converted correctly. Before, some types that should have been treated as flexible types were converted to variant types instead. This PR ensures that such types are indeed converted correctly. Tests included.
1 parent fbcab89 commit 4130e4b

3 files changed

Lines changed: 112 additions & 29 deletions

File tree

oss_src/unity/python/sframe/cython/cy_flexible_type.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1270,7 +1270,7 @@ cdef flexible_type _ft_translate(object v, int tr_code) except *:
12701270

12711271
# These are optimized by the cython compiler into a big switch statement.
12721272
if tr_code == FT_INT_TYPE:
1273-
ret.set_int(<long>v)
1273+
ret.set_int(<flex_int>v)
12741274
return ret
12751275
elif tr_code == FT_FLOAT_TYPE:
12761276
ret.set_double(<double>v)

oss_src/unity/python/sframe/cython/cy_variant.pyx

Lines changed: 88 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,11 @@ from .cy_model cimport UnityModel
2222
from .cy_sframe cimport UnitySFrameProxy
2323
from .cy_sarray cimport UnitySArrayProxy
2424

25-
from .cy_flexible_type cimport flexible_type, flex_list, flex_dict
25+
from .cy_flexible_type cimport flexible_type, flex_list, flex_dict, flex_int
2626
from .cy_flexible_type cimport flexible_type_from_pyobject
2727
from .cy_flexible_type cimport pyobject_from_flexible_type
2828
from .cy_flexible_type cimport check_list_to_vector_translation
29+
from .cy_flexible_type cimport flex_type_enum, UNDEFINED
2930

3031
from .cy_dataframe cimport gl_dataframe
3132
from .cy_dataframe cimport is_pandas_dataframe
@@ -53,10 +54,10 @@ from libcpp.map cimport map
5354

5455
# Fast track simple flexible types
5556
DEF VAR_TR_FT_INT = 0
56-
DEF VAR_TR_FT_LONG = 1
57-
DEF VAR_TR_FT_FLOAT = 2
58-
DEF VAR_TR_FT_STR = 3
59-
DEF VAR_TR_FT_UNICODE = 4
57+
DEF VAR_TR_FT_FLOAT = 1
58+
DEF VAR_TR_FT_STR = 2
59+
DEF VAR_TR_FT_UNICODE = 3
60+
DEF VAR_TR_FT_NONE = 4
6061

6162
# Nested types
6263
DEF VAR_TR_DICT = 5
@@ -139,16 +140,18 @@ cdef int _get_tr_code_by_type_string(object v) except -1:
139140
if not internal_classes_set:
140141
import_internal_classes()
141142

142-
if type(v) is int:
143+
if type(v) is int or type(v) is long:
143144
ret = VAR_TR_FT_INT
144-
elif type(v) is long:
145-
ret = VAR_TR_FT_LONG
146145
elif type(v) is float:
147146
ret = VAR_TR_FT_FLOAT
148147
elif type(v) is str:
149148
ret = VAR_TR_FT_STR
150149
elif type(v) is unicode_type:
151150
ret = VAR_TR_FT_UNICODE
151+
elif type(v) is bool:
152+
ret = VAR_TR_FT_INT
153+
elif v is None:
154+
ret = VAR_TR_FT_NONE
152155
elif type(v) is list:
153156
ret = VAR_TR_LIST
154157
elif type(v) is tuple:
@@ -306,10 +309,7 @@ cdef bint _var_set_listlike_internal(variant_vector_type& ret_as_vv,
306309
element_stored_in_flex_list = False
307310

308311
if tr_code == VAR_TR_FT_INT:
309-
ret_as_fl[i].set_int(<int>x)
310-
element_stored_in_flex_list = True
311-
elif tr_code == VAR_TR_FT_LONG:
312-
ret_as_fl[i].set_int(<long>x)
312+
ret_as_fl[i].set_int(<flex_int>x)
313313
element_stored_in_flex_list = True
314314
elif tr_code == VAR_TR_FT_FLOAT:
315315
ret_as_fl[i].set_double(<double>x)
@@ -320,6 +320,9 @@ cdef bint _var_set_listlike_internal(variant_vector_type& ret_as_vv,
320320
elif tr_code == VAR_TR_FT_UNICODE:
321321
ret_as_fl[i].set_string((<unicode>x))
322322
element_stored_in_flex_list = True
323+
elif tr_code == VAR_TR_FT_NONE:
324+
ret_as_fl[i] = flexible_type(UNDEFINED)
325+
element_stored_in_flex_list = True
323326
elif tr_code == VAR_TR_LIST or tr_code == VAR_TR_TUPLE:
324327
ret_as_fl[i].set_list(flex_list())
325328

@@ -406,8 +409,10 @@ cdef inline bint _var_set_dict_internal(variant_map_type& ret_as_vm, flex_dict&
406409

407410
cdef long pos
408411
cdef int tr_code
409-
cdef bint writing_to_flex_dict
410-
412+
cdef bint writing_to_flex_dict, sub_is_flex_type
413+
cdef variant_vector_type sub_vv = variant_vector_type()
414+
cdef variant_map_type sub_vm = variant_map_type()
415+
411416
if output_can_be_varmap:
412417
# First see if it can be translated as flexible type, which
413418
# would be great.
@@ -426,17 +431,65 @@ cdef inline bint _var_set_dict_internal(variant_map_type& ret_as_vm, flex_dict&
426431
tr_code = get_var_tr_code(v)
427432

428433
if tr_code == VAR_TR_FT_INT:
429-
ret_as_fd[pos].second.set_int(<int>v)
430-
elif tr_code == VAR_TR_FT_LONG:
431-
ret_as_fd[pos].second.set_int(<long>v)
434+
ret_as_fd[pos].second.set_int(<flex_int>v)
432435
elif tr_code == VAR_TR_FT_FLOAT:
433436
ret_as_fd[pos].second.set_double(<double>v)
434437
elif tr_code == VAR_TR_FT_STR:
435438
ret_as_fd[pos].second.set_string(<str>v)
436439
elif tr_code == VAR_TR_FT_UNICODE:
437440
ret_as_fd[pos].second.set_string((<unicode>v))
441+
elif tr_code == VAR_TR_FT_NONE:
442+
ret_as_fd[pos].second = flexible_type(UNDEFINED)
438443
elif tr_code == VAR_TR_ATTEMPT_OTHER_FLEXIBLE_TYPE:
439444
ret_as_fd[pos].second = _translate_to_flexible_type(v)
445+
elif tr_code == VAR_TR_LIST or tr_code == VAR_TR_TUPLE:
446+
ret_as_fd[pos].second.set_list(flex_list())
447+
448+
if tr_code == VAR_TR_LIST:
449+
sub_is_flex_type = _var_set_listlike_internal(
450+
sub_vv, ret_as_fd[pos].second.get_list_m(), <list>v, False)
451+
else:
452+
sub_is_flex_type = _var_set_listlike_internal(
453+
sub_vv, ret_as_fd[pos].second.get_list_m(), <tuple>v, False)
454+
455+
if sub_is_flex_type:
456+
check_list_to_vector_translation(ret_as_fd[pos].second)
457+
458+
if not writing_to_flex_dict:
459+
variant_set_flexible_type(ret_as_vm[ret_as_fd[pos].first.get_string()],
460+
ret_as_fd[pos].second)
461+
else:
462+
if writing_to_flex_dict:
463+
ret_as_vm.clear()
464+
for i in range(pos):
465+
variant_set_flexible_type(ret_as_vm[ret_as_fd[i].first.get_string()],
466+
ret_as_fd[i].second)
467+
468+
writing_to_flex_dict = False
469+
470+
471+
variant_set_variant_vector(ret_as_vm[ret_as_fd[pos].first.get_string()], sub_vv)
472+
473+
elif tr_code == VAR_TR_DICT:
474+
ret_as_fd[pos].second.set_dict(flex_dict())
475+
476+
sub_is_flex_type = _var_set_dict_internal(sub_vm, ret_as_fd[pos].second.get_dict_m(), <dict>v, False)
477+
478+
if sub_is_flex_type:
479+
if not writing_to_flex_dict:
480+
variant_set_flexible_type(ret_as_vm[ret_as_fd[pos].first.get_string()],
481+
ret_as_fd[pos].second)
482+
else:
483+
if writing_to_flex_dict:
484+
ret_as_vm.clear()
485+
for i in range(pos):
486+
variant_set_flexible_type(ret_as_vm[ret_as_fd[i].first.get_string()],
487+
ret_as_fd[i].second)
488+
489+
writing_to_flex_dict = False
490+
491+
variant_set_variant_map(ret_as_vm[ret_as_fd[pos].first.get_string()], sub_vm)
492+
440493
else:
441494
# That didn't work -- translate the rest as
442495
# variant type if it can be a varmap.
@@ -555,20 +608,19 @@ cdef _convert_to_variant_type(variant_type& ret, object v, int tr_code):
555608

556609
# Fast-tracked flexible type versions
557610
if tr_code == VAR_TR_FT_INT:
558-
ft.set_int(<int>v)
559-
variant_set_flexible_type(ret, ft)
560-
elif tr_code == VAR_TR_FT_LONG:
561-
ft.set_int(<long>v)
611+
ft.set_int(<flex_int>v)
562612
variant_set_flexible_type(ret, ft)
563613
elif tr_code == VAR_TR_FT_FLOAT:
564-
ft.set_double(<float>v)
614+
ft.set_double(<double>v)
565615
variant_set_flexible_type(ret, ft)
566616
elif tr_code == VAR_TR_FT_STR:
567617
ft.set_string(<str>v)
568618
variant_set_flexible_type(ret, ft)
569619
elif tr_code == VAR_TR_FT_UNICODE:
570620
ft.set_string(<unicode>v)
571621
variant_set_flexible_type(ret, ft)
622+
elif tr_code == VAR_TR_FT_NONE:
623+
variant_set_flexible_type(ret, flexible_type(UNDEFINED))
572624

573625
# Nested container types
574626
elif tr_code == VAR_TR_DICT:
@@ -611,17 +663,16 @@ cdef _convert_to_variant_type(variant_type& ret, object v, int tr_code):
611663
variant_set_closure(ret, make_function_closure_info(v))
612664

613665
# Flexible type -- this is actually the last resort since we
614-
# assume the aboves are exact matches and do not include the
615-
# flexible type stuff.
666+
# assume the above types are exact matches and do not include the
667+
# flexible type stuff, save for the fast paths for common types.
616668
elif tr_code == VAR_TR_ATTEMPT_OTHER_FLEXIBLE_TYPE:
617669
try:
618670
variant_set_flexible_type(ret, flexible_type_from_pyobject(v))
619671
except TypeError:
620672
raise_translation_error(v)
621673

622674
else:
623-
assert False
624-
675+
assert False
625676

626677
################################################################################
627678
# The main translation function
@@ -693,3 +744,14 @@ cdef to_value(PyCommClient cli, variant_type& v):
693744
return to_vector(cli, variant_get_variant_vector(v))
694745
else:
695746
raise TypeError("Unsupported variant type.")
747+
748+
################################################################################
749+
# Routines to assist with debugging.
750+
751+
def _debug_is_flexible_type_encoded(object obj):
752+
"""
753+
Checks to make sure that if an object can be encoded as a flexible
754+
type, then it is.
755+
"""
756+
cdef variant_type vt = from_value(obj)
757+
return (vt.which() == VAR_TYPE_FLEXIBLE_TYPE)

oss_src/unity/python/sframe/test/test_extensions.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
from ..data_structures.sframe import SFrame
1111
import random
1212

13+
from ..cython.cy_variant import _debug_is_flexible_type_encoded
14+
1315

1416
class VariantCheckTest(unittest.TestCase):
1517

@@ -67,6 +69,10 @@ def test_stress(self):
6769

6870
random.seed(0)
6971

72+
73+
class A: pass
74+
A.flextype_encodable = True
75+
7076
def _make(depth):
7177

7278
if depth == 0:
@@ -79,8 +85,10 @@ def _make(depth):
7985
elif s == 1:
8086
return random.randint(0,100000)
8187
elif s == 2:
88+
A.flextype_encodable = False
8289
return SArray([random.randint(0,100000) for i in range(2)])
8390
elif s == 3:
91+
A.flextype_encodable = False
8492
return SFrame({'a' : [random.randint(0,100000) for i in range(2)],
8593
'b' : [str(random.randint(0,100000)) for i in range(2)]})
8694

@@ -93,5 +101,18 @@ def _make(depth):
93101
return {str(random.randint(0, 100)) : _make(depth - 1)
94102
for i in range(length)}
95103

96-
for i in range(10):
97-
self.variant_turnaround(_make(5 + i))
104+
for depth in [2,3,4,5,10]:
105+
for i in range(10):
106+
107+
A.flextype_encodable = True
108+
109+
obj = _make(depth)
110+
111+
# Test that it's losslessly encoded and decoded
112+
self.variant_turnaround(obj)
113+
114+
# Test that if it can be
115+
if _debug_is_flexible_type_encoded(obj):
116+
self.assertTrue(A.flextype_encodable)
117+
else:
118+
self.assertFalse(A.flextype_encodable)

0 commit comments

Comments
 (0)