Skip to content

Commit fbcab89

Browse files
author
Yucheng Low
committed
Merge pull request #147 from esamanas/dbapi2-support-PRBRANCH
Add DBAPI2 support for SFrame
2 parents 343bba6 + 517f4c8 commit fbcab89

25 files changed

Lines changed: 1834 additions & 11 deletions

oss_src/flexible_type/flexible_type_base_types.hpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -768,6 +768,27 @@ struct has_direct_conversion_to_flexible_type {
768768
flex_type_enum::UNDEFINED;
769769
};
770770

771+
/**
772+
* Given a set of types, choose a common type that all types in the set can be
773+
* converted to and preserves the most data. Not designed to be passed a set
774+
* with UNDEFINED in it.
775+
*/
776+
inline flex_type_enum get_common_type(const std::set<flex_type_enum> &types) {
777+
if (types.size() == 0) return flex_type_enum::FLOAT;
778+
else if (types.size() == 1) return *(types.begin());
779+
else if (types.size() == 2) {
780+
if (types.count(flex_type_enum::INTEGER) && types.count(flex_type_enum::FLOAT)) {
781+
return flex_type_enum::FLOAT;
782+
}
783+
if (types.count(flex_type_enum::LIST) && types.count(flex_type_enum::VECTOR)) {
784+
return flex_type_enum::LIST;
785+
}
786+
} else {
787+
throw std::string("Could not find a common type to convert all values.");
788+
}
789+
790+
return flex_type_enum::UNDEFINED;
791+
}
771792

772793
} // namespace graphlab
773794

oss_src/unity/lib/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ make_library(unity_core
2727
unity_odbc_connection.cpp
2828
image_util.cpp
2929
../extensions/additional_sframe_utilities.cpp
30+
unity_sarray_builder.cpp
31+
unity_sframe_builder.cpp
3032
REQUIRES
3133
flexible_type
3234
pylambda table_printer
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/**
2+
* Copyright (C) 2015 Dato, Inc.
3+
* All rights reserved.
4+
*
5+
* This software may be modified and distributed under the terms
6+
* of the BSD license. See the LICENSE file for details.
7+
*/
8+
#ifndef GRAPHLAB_UNITY_SARRAY_BUILDER_INTERFACE_HPP
9+
#define GRAPHLAB_UNITY_SARRAY_BUILDER_INTERFACE_HPP
10+
#include <vector>
11+
#include <flexible_type/flexible_type.hpp>
12+
#include <cppipc/magic_macros.hpp>
13+
14+
namespace graphlab {
15+
16+
class unity_sarray_base;
17+
18+
GENERATE_INTERFACE_AND_PROXY(unity_sarray_builder_base, unity_sarray_builder_proxy,
19+
(void, init, (size_t)(size_t)(flex_type_enum))
20+
(void, append, (const flexible_type&)(size_t))
21+
(void, append_multiple, (const std::vector<flexible_type>&)(size_t))
22+
(flex_type_enum, get_type, )
23+
(std::vector<flexible_type>, read_history, (size_t)(size_t))
24+
(std::shared_ptr<unity_sarray_base>, close, )
25+
)
26+
27+
} // namespace graphlab
28+
29+
#endif //GRAPHLAB_UNITY_SARRAY_BUILDER_INTERFACE_HPP
30+
#include <unity/lib/api/unity_sarray_interface.hpp>
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#ifndef GRAPHLAB_UNITY_SFRAME_BUILDER_INTERFACE_HPP
2+
#define GRAPHLAB_UNITY_SFRAME_BUILDER_INTERFACE_HPP
3+
#include <vector>
4+
#include <string>
5+
#include <flexible_type/flexible_type.hpp>
6+
#include <cppipc/magic_macros.hpp>
7+
8+
namespace graphlab {
9+
10+
class unity_sframe_base;
11+
12+
GENERATE_INTERFACE_AND_PROXY(unity_sframe_builder_base, unity_sframe_builder_proxy,
13+
(void, init, (size_t)(size_t)(std::vector<std::string>)(std::vector<flex_type_enum>)(std::string))
14+
(void, append, (const std::vector<flexible_type>&)(size_t))
15+
(void, append_multiple, (const std::vector<std::vector<flexible_type>>&)(size_t))
16+
(std::vector<std::string>, column_names, )
17+
(std::vector<flex_type_enum>, column_types, )
18+
(std::vector<std::vector<flexible_type>>, read_history, (size_t)(size_t))
19+
(std::shared_ptr<unity_sframe_base>, close, )
20+
)
21+
22+
} // namespace graphlab
23+
24+
#endif //GRAPHLAB_UNITY_SFRAME_BUILDER_INTERFACE_HPP
25+
#include <unity/lib/api/unity_sframe_interface.hpp>

oss_src/unity/lib/gl_sarray.cpp

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,17 +39,12 @@ flex_type_enum infer_type_of_list(const std::vector<flexible_type>& vec) {
3939
last_type = val.get_type();
4040
}
4141
}
42-
if (types.size() == 0) return flex_type_enum::FLOAT;
43-
else if (types.size() == 1) return *(types.begin());
44-
else if (types.size() == 2) {
45-
if (types.count(flex_type_enum::INTEGER) && types.count(flex_type_enum::FLOAT)) {
46-
return flex_type_enum::FLOAT;
47-
}
48-
if (types.count(flex_type_enum::LIST) && types.count(flex_type_enum::VECTOR)) {
49-
return flex_type_enum::LIST;
50-
}
42+
43+
try {
44+
return get_common_type(types);
45+
} catch(std::string &e) {
46+
throw std::string("Cannot infer Array type. Not all elements of array are the same type.");
5147
}
52-
throw std::string("Cannot infer Array type. Not all elements of array are the same type.");
5348
}
5449

5550
/**
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
/**
2+
* Copyright (C) 2015 Dato, Inc.
3+
* All rights reserved.
4+
*
5+
* This software may be modified and distributed under the terms
6+
* of the BSD license. See the LICENSE file for details.
7+
*/
8+
9+
#include <unity/lib/unity_sarray_builder.hpp>
10+
#include <unity/lib/unity_sarray.hpp>
11+
12+
namespace graphlab {
13+
14+
void unity_sarray_builder::init(size_t num_segments, size_t history_size, flex_type_enum dtype) {
15+
if(m_inited)
16+
log_and_throw("This sarray_builder has already been initialized!");
17+
18+
m_sarray = std::make_shared<sarray<flexible_type>>();
19+
m_sarray->open_for_write(num_segments);
20+
m_out_iters.resize(num_segments);
21+
m_history.resize(num_segments);
22+
for(size_t i = 0; i < num_segments; ++i) {
23+
m_out_iters[i] = m_sarray->get_output_iterator(i);
24+
m_history[i] = std::make_shared<boost::circular_buffer<flexible_type>>(history_size);
25+
}
26+
m_given_dtype = dtype;
27+
if(dtype == flex_type_enum::UNDEFINED)
28+
log_and_throw("Must specify a valid type.");
29+
m_sarray->set_type(m_given_dtype);
30+
31+
m_inited = true;
32+
}
33+
34+
void unity_sarray_builder::append(const flexible_type &val, size_t segment) {
35+
if(!m_inited)
36+
log_and_throw("Must call 'init' first!");
37+
38+
if(m_closed)
39+
log_and_throw("Cannot append values when closed.");
40+
41+
if(segment >= m_out_iters.size()) {
42+
log_and_throw("Invalid segment number!");
43+
}
44+
45+
m_history[segment]->push_back(val);
46+
47+
auto in_type = val.get_type();
48+
if(in_type != flex_type_enum::UNDEFINED && in_type != m_given_dtype) {
49+
log_and_throw(std::string("Append failed: ") +
50+
flex_type_enum_to_name(in_type) + std::string(" type is "
51+
"incompatible with type of future SArray."));
52+
}
53+
54+
*(m_out_iters[segment]) = val;
55+
}
56+
57+
void unity_sarray_builder::append_multiple(const std::vector<flexible_type> &vals, size_t segment) {
58+
for(const auto &i : vals) {
59+
this->append(i, segment);
60+
}
61+
}
62+
63+
flex_type_enum unity_sarray_builder::get_type() {
64+
return m_given_dtype;
65+
}
66+
67+
std::vector<flexible_type> unity_sarray_builder::read_history(size_t num_elems, size_t segment) {
68+
if(!m_inited)
69+
log_and_throw("Must call 'init' first!");
70+
71+
if(m_closed)
72+
log_and_throw("History is invalid when closed.");
73+
74+
if(segment >= m_history.size())
75+
log_and_throw("Invalid segment.");
76+
77+
auto history = m_history[segment];
78+
79+
if(num_elems > history->size())
80+
num_elems = history->size();
81+
if(num_elems == size_t(-1))
82+
num_elems = history->size();
83+
84+
std::vector<flexible_type> ret_vec(num_elems);
85+
86+
if(num_elems == 0)
87+
return ret_vec;
88+
89+
std::copy_n(history->rbegin(), num_elems, ret_vec.rbegin());
90+
91+
return ret_vec;
92+
}
93+
94+
std::shared_ptr<unity_sarray_base> unity_sarray_builder::close() {
95+
if(!m_inited)
96+
log_and_throw("Must call 'init' first!");
97+
98+
if(m_closed)
99+
log_and_throw("Already closed.");
100+
101+
m_sarray->close();
102+
m_closed = true;
103+
auto ret = std::make_shared<unity_sarray>();
104+
ret->construct_from_sarray(m_sarray);
105+
return ret;
106+
}
107+
108+
} // namespace graphlab
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
/**
2+
* Copyright (C) 2015 Dato, Inc.
3+
* All rights reserved.
4+
*
5+
* This software may be modified and distributed under the terms
6+
* of the BSD license. See the LICENSE file for details.
7+
*/
8+
#ifndef GRAPHLAB_UNITY_SARRAY_BUILDER_HPP
9+
#define GRAPHLAB_UNITY_SARRAY_BUILDER_HPP
10+
11+
#include <vector>
12+
#include <sframe/sarray.hpp>
13+
#include <boost/circular_buffer.hpp>
14+
#include <unity/lib/api/unity_sarray_builder_interface.hpp>
15+
16+
namespace graphlab {
17+
18+
// forward declarations
19+
template <typename T>
20+
class sarray;
21+
22+
/**
23+
* Provides a Python interface to incrementally build an SArray.
24+
*
25+
* Unlike most other unity objects, this is not a wrapper of another
26+
* "sarray_builder" class, but provides the implementation. This is because it
27+
* is a slightly embellished wrapper around the SArray's output iterator, so
28+
* there is no further functionality that needs to be available for the C++
29+
* side.
30+
*
31+
* The unity_sarray_builder is designed to append values until \ref close is
32+
* called, which returns the SArray. No "reopening" is allowed, and no
33+
* operations in that instance of unity_sarray_builder will work after close is
34+
* called.
35+
*/
36+
class unity_sarray_builder: public unity_sarray_builder_base {
37+
public:
38+
/**
39+
* Default constructor. Does nothing
40+
*/
41+
unity_sarray_builder() {}
42+
43+
/**
44+
* Initialize the unity_sarray_buidler.
45+
*
46+
* This essentially opens the output iterator for writing.
47+
*
48+
*/
49+
void init(size_t num_segments, size_t history_size, flex_type_enum dtype);
50+
51+
/**
52+
* Add a single flexible_type value to the SArray.
53+
*
54+
* The segment number allows the user to use the parallel interface provided
55+
* by the underlying output_iterator.
56+
*
57+
* Throws if:
58+
* - init hasn't been called or close has been called
59+
* - segment number is invalid
60+
* - the type of \p val differs from the type given in \ref init
61+
*
62+
*/
63+
void append(const flexible_type &val, size_t segment);
64+
65+
/**
66+
* A wrapper around \ref append which adds multiple flexible_types to SArray.
67+
*
68+
* Throws if:
69+
* - init hasn't been called or close has been called
70+
* - segment number is invalid
71+
* - the type of any values in \p vals differs from
72+
* the type given in \ref init
73+
*/
74+
void append_multiple(const std::vector<flexible_type> &vals, size_t segment);
75+
76+
/**
77+
* Return the current type of the SArray.
78+
*/
79+
flex_type_enum get_type();
80+
81+
/**
82+
* Return the last \p num_elems elements appended.
83+
*/
84+
std::vector<flexible_type> read_history(size_t num_elems, size_t segment);
85+
86+
/**
87+
* Finalize SArray and return it.
88+
*/
89+
std::shared_ptr<unity_sarray_base> close();
90+
91+
unity_sarray_builder(const unity_sarray_builder&) = delete;
92+
unity_sarray_builder& operator=(const unity_sarray_builder&) = delete;
93+
private:
94+
/// Methods
95+
96+
/// Variables
97+
bool m_inited = false;
98+
bool m_closed = false;
99+
std::shared_ptr<sarray<flexible_type>> m_sarray;
100+
std::vector<sarray<flexible_type>::iterator> m_out_iters;
101+
flex_type_enum m_given_dtype = flex_type_enum::UNDEFINED;
102+
std::set<flex_type_enum> m_types_inserted;
103+
104+
std::vector<std::shared_ptr<boost::circular_buffer<flexible_type>>> m_history;
105+
};
106+
107+
} // namespace graphlab
108+
#endif // GRAPHLAB_UNITY_SARRAY_BUILDER_HPP

0 commit comments

Comments
 (0)