Skip to content

Commit a51efc9

Browse files
author
Yucheng Low
committed
Merge pull request #158 from hoytak/more-hypervariant-flextype-recording-power
Flex types always converted correctly in cy_variant
2 parents 4896f8b + 4130e4b commit a51efc9

3 files changed

Lines changed: 112 additions & 29 deletions

File tree

oss_src/unity/python/sframe/cython/cy_flexible_type.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1270,7 +1270,7 @@ cdef flexible_type _ft_translate(object v, int tr_code) except *:
12701270

12711271
# These are optimized by the cython compiler into a big switch statement.
12721272
if tr_code == FT_INT_TYPE:
1273-
ret.set_int(<long>v)
1273+
ret.set_int(<flex_int>v)
12741274
return ret
12751275
elif tr_code == FT_FLOAT_TYPE:
12761276
ret.set_double(<double>v)

oss_src/unity/python/sframe/cython/cy_variant.pyx

Lines changed: 88 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,11 @@ from .cy_model cimport UnityModel
2222
from .cy_sframe cimport UnitySFrameProxy
2323
from .cy_sarray cimport UnitySArrayProxy
2424

25-
from .cy_flexible_type cimport flexible_type, flex_list, flex_dict
25+
from .cy_flexible_type cimport flexible_type, flex_list, flex_dict, flex_int
2626
from .cy_flexible_type cimport flexible_type_from_pyobject
2727
from .cy_flexible_type cimport pyobject_from_flexible_type
2828
from .cy_flexible_type cimport check_list_to_vector_translation
29+
from .cy_flexible_type cimport flex_type_enum, UNDEFINED
2930

3031
from .cy_dataframe cimport gl_dataframe
3132
from .cy_dataframe cimport is_pandas_dataframe
@@ -53,10 +54,10 @@ from libcpp.map cimport map
5354

5455
# Fast track simple flexible types
5556
DEF VAR_TR_FT_INT = 0
56-
DEF VAR_TR_FT_LONG = 1
57-
DEF VAR_TR_FT_FLOAT = 2
58-
DEF VAR_TR_FT_STR = 3
59-
DEF VAR_TR_FT_UNICODE = 4
57+
DEF VAR_TR_FT_FLOAT = 1
58+
DEF VAR_TR_FT_STR = 2
59+
DEF VAR_TR_FT_UNICODE = 3
60+
DEF VAR_TR_FT_NONE = 4
6061

6162
# Nested types
6263
DEF VAR_TR_DICT = 5
@@ -139,16 +140,18 @@ cdef int _get_tr_code_by_type_string(object v) except -1:
139140
if not internal_classes_set:
140141
import_internal_classes()
141142

142-
if type(v) is int:
143+
if type(v) is int or type(v) is long:
143144
ret = VAR_TR_FT_INT
144-
elif type(v) is long:
145-
ret = VAR_TR_FT_LONG
146145
elif type(v) is float:
147146
ret = VAR_TR_FT_FLOAT
148147
elif type(v) is str:
149148
ret = VAR_TR_FT_STR
150149
elif type(v) is unicode_type:
151150
ret = VAR_TR_FT_UNICODE
151+
elif type(v) is bool:
152+
ret = VAR_TR_FT_INT
153+
elif v is None:
154+
ret = VAR_TR_FT_NONE
152155
elif type(v) is list:
153156
ret = VAR_TR_LIST
154157
elif type(v) is tuple:
@@ -306,10 +309,7 @@ cdef bint _var_set_listlike_internal(variant_vector_type& ret_as_vv,
306309
element_stored_in_flex_list = False
307310

308311
if tr_code == VAR_TR_FT_INT:
309-
ret_as_fl[i].set_int(<int>x)
310-
element_stored_in_flex_list = True
311-
elif tr_code == VAR_TR_FT_LONG:
312-
ret_as_fl[i].set_int(<long>x)
312+
ret_as_fl[i].set_int(<flex_int>x)
313313
element_stored_in_flex_list = True
314314
elif tr_code == VAR_TR_FT_FLOAT:
315315
ret_as_fl[i].set_double(<double>x)
@@ -320,6 +320,9 @@ cdef bint _var_set_listlike_internal(variant_vector_type& ret_as_vv,
320320
elif tr_code == VAR_TR_FT_UNICODE:
321321
ret_as_fl[i].set_string((<unicode>x))
322322
element_stored_in_flex_list = True
323+
elif tr_code == VAR_TR_FT_NONE:
324+
ret_as_fl[i] = flexible_type(UNDEFINED)
325+
element_stored_in_flex_list = True
323326
elif tr_code == VAR_TR_LIST or tr_code == VAR_TR_TUPLE:
324327
ret_as_fl[i].set_list(flex_list())
325328

@@ -406,8 +409,10 @@ cdef inline bint _var_set_dict_internal(variant_map_type& ret_as_vm, flex_dict&
406409

407410
cdef long pos
408411
cdef int tr_code
409-
cdef bint writing_to_flex_dict
410-
412+
cdef bint writing_to_flex_dict, sub_is_flex_type
413+
cdef variant_vector_type sub_vv = variant_vector_type()
414+
cdef variant_map_type sub_vm = variant_map_type()
415+
411416
if output_can_be_varmap:
412417
# First see if it can be translated as flexible type, which
413418
# would be great.
@@ -426,17 +431,65 @@ cdef inline bint _var_set_dict_internal(variant_map_type& ret_as_vm, flex_dict&
426431
tr_code = get_var_tr_code(v)
427432

428433
if tr_code == VAR_TR_FT_INT:
429-
ret_as_fd[pos].second.set_int(<int>v)
430-
elif tr_code == VAR_TR_FT_LONG:
431-
ret_as_fd[pos].second.set_int(<long>v)
434+
ret_as_fd[pos].second.set_int(<flex_int>v)
432435
elif tr_code == VAR_TR_FT_FLOAT:
433436
ret_as_fd[pos].second.set_double(<double>v)
434437
elif tr_code == VAR_TR_FT_STR:
435438
ret_as_fd[pos].second.set_string(<str>v)
436439
elif tr_code == VAR_TR_FT_UNICODE:
437440
ret_as_fd[pos].second.set_string((<unicode>v))
441+
elif tr_code == VAR_TR_FT_NONE:
442+
ret_as_fd[pos].second = flexible_type(UNDEFINED)
438443
elif tr_code == VAR_TR_ATTEMPT_OTHER_FLEXIBLE_TYPE:
439444
ret_as_fd[pos].second = _translate_to_flexible_type(v)
445+
elif tr_code == VAR_TR_LIST or tr_code == VAR_TR_TUPLE:
446+
ret_as_fd[pos].second.set_list(flex_list())
447+
448+
if tr_code == VAR_TR_LIST:
449+
sub_is_flex_type = _var_set_listlike_internal(
450+
sub_vv, ret_as_fd[pos].second.get_list_m(), <list>v, False)
451+
else:
452+
sub_is_flex_type = _var_set_listlike_internal(
453+
sub_vv, ret_as_fd[pos].second.get_list_m(), <tuple>v, False)
454+
455+
if sub_is_flex_type:
456+
check_list_to_vector_translation(ret_as_fd[pos].second)
457+
458+
if not writing_to_flex_dict:
459+
variant_set_flexible_type(ret_as_vm[ret_as_fd[pos].first.get_string()],
460+
ret_as_fd[pos].second)
461+
else:
462+
if writing_to_flex_dict:
463+
ret_as_vm.clear()
464+
for i in range(pos):
465+
variant_set_flexible_type(ret_as_vm[ret_as_fd[i].first.get_string()],
466+
ret_as_fd[i].second)
467+
468+
writing_to_flex_dict = False
469+
470+
471+
variant_set_variant_vector(ret_as_vm[ret_as_fd[pos].first.get_string()], sub_vv)
472+
473+
elif tr_code == VAR_TR_DICT:
474+
ret_as_fd[pos].second.set_dict(flex_dict())
475+
476+
sub_is_flex_type = _var_set_dict_internal(sub_vm, ret_as_fd[pos].second.get_dict_m(), <dict>v, False)
477+
478+
if sub_is_flex_type:
479+
if not writing_to_flex_dict:
480+
variant_set_flexible_type(ret_as_vm[ret_as_fd[pos].first.get_string()],
481+
ret_as_fd[pos].second)
482+
else:
483+
if writing_to_flex_dict:
484+
ret_as_vm.clear()
485+
for i in range(pos):
486+
variant_set_flexible_type(ret_as_vm[ret_as_fd[i].first.get_string()],
487+
ret_as_fd[i].second)
488+
489+
writing_to_flex_dict = False
490+
491+
variant_set_variant_map(ret_as_vm[ret_as_fd[pos].first.get_string()], sub_vm)
492+
440493
else:
441494
# That didn't work -- translate the rest as
442495
# variant type if it can be a varmap.
@@ -555,20 +608,19 @@ cdef _convert_to_variant_type(variant_type& ret, object v, int tr_code):
555608

556609
# Fast-tracked flexible type versions
557610
if tr_code == VAR_TR_FT_INT:
558-
ft.set_int(<int>v)
559-
variant_set_flexible_type(ret, ft)
560-
elif tr_code == VAR_TR_FT_LONG:
561-
ft.set_int(<long>v)
611+
ft.set_int(<flex_int>v)
562612
variant_set_flexible_type(ret, ft)
563613
elif tr_code == VAR_TR_FT_FLOAT:
564-
ft.set_double(<float>v)
614+
ft.set_double(<double>v)
565615
variant_set_flexible_type(ret, ft)
566616
elif tr_code == VAR_TR_FT_STR:
567617
ft.set_string(<str>v)
568618
variant_set_flexible_type(ret, ft)
569619
elif tr_code == VAR_TR_FT_UNICODE:
570620
ft.set_string(<unicode>v)
571621
variant_set_flexible_type(ret, ft)
622+
elif tr_code == VAR_TR_FT_NONE:
623+
variant_set_flexible_type(ret, flexible_type(UNDEFINED))
572624

573625
# Nested container types
574626
elif tr_code == VAR_TR_DICT:
@@ -611,17 +663,16 @@ cdef _convert_to_variant_type(variant_type& ret, object v, int tr_code):
611663
variant_set_closure(ret, make_function_closure_info(v))
612664

613665
# Flexible type -- this is actually the last resort since we
614-
# assume the aboves are exact matches and do not include the
615-
# flexible type stuff.
666+
# assume the above types are exact matches and do not include the
667+
# flexible type stuff, save for the fast paths for common types.
616668
elif tr_code == VAR_TR_ATTEMPT_OTHER_FLEXIBLE_TYPE:
617669
try:
618670
variant_set_flexible_type(ret, flexible_type_from_pyobject(v))
619671
except TypeError:
620672
raise_translation_error(v)
621673

622674
else:
623-
assert False
624-
675+
assert False
625676

626677
################################################################################
627678
# The main translation function
@@ -693,3 +744,14 @@ cdef to_value(PyCommClient cli, variant_type& v):
693744
return to_vector(cli, variant_get_variant_vector(v))
694745
else:
695746
raise TypeError("Unsupported variant type.")
747+
748+
################################################################################
749+
# Routines to assist with debugging.
750+
751+
def _debug_is_flexible_type_encoded(object obj):
752+
"""
753+
Checks to make sure that if an object can be encoded as a flexible
754+
type, then it is.
755+
"""
756+
cdef variant_type vt = from_value(obj)
757+
return (vt.which() == VAR_TYPE_FLEXIBLE_TYPE)

oss_src/unity/python/sframe/test/test_extensions.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
from ..data_structures.sframe import SFrame
1111
import random
1212

13+
from ..cython.cy_variant import _debug_is_flexible_type_encoded
14+
1315

1416
class VariantCheckTest(unittest.TestCase):
1517

@@ -67,6 +69,10 @@ def test_stress(self):
6769

6870
random.seed(0)
6971

72+
73+
class A: pass
74+
A.flextype_encodable = True
75+
7076
def _make(depth):
7177

7278
if depth == 0:
@@ -79,8 +85,10 @@ def _make(depth):
7985
elif s == 1:
8086
return random.randint(0,100000)
8187
elif s == 2:
88+
A.flextype_encodable = False
8289
return SArray([random.randint(0,100000) for i in range(2)])
8390
elif s == 3:
91+
A.flextype_encodable = False
8492
return SFrame({'a' : [random.randint(0,100000) for i in range(2)],
8593
'b' : [str(random.randint(0,100000)) for i in range(2)]})
8694

@@ -93,5 +101,18 @@ def _make(depth):
93101
return {str(random.randint(0, 100)) : _make(depth - 1)
94102
for i in range(length)}
95103

96-
for i in range(10):
97-
self.variant_turnaround(_make(5 + i))
104+
for depth in [2,3,4,5,10]:
105+
for i in range(10):
106+
107+
A.flextype_encodable = True
108+
109+
obj = _make(depth)
110+
111+
# Test that it's losslessly encoded and decoded
112+
self.variant_turnaround(obj)
113+
114+
# Test that if it can be
115+
if _debug_is_flexible_type_encoded(obj):
116+
self.assertTrue(A.flextype_encodable)
117+
else:
118+
self.assertFalse(A.flextype_encodable)

0 commit comments

Comments
 (0)