Skip to content

Commit 3b59a47

Browse files
authored
[Feat][C++] Support Date and Timestamp data type (#398)
--------- Signed-off-by: acezen <qiaozi.zwb@alibaba-inc.com>
1 parent b61678a commit 3b59a47

7 files changed

Lines changed: 277 additions & 39 deletions

File tree

cpp/include/gar/fwd.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,10 @@ const std::shared_ptr<DataType>& float32();
189189
const std::shared_ptr<DataType>& float64();
190190
/// @brief Return a string DataType instance
191191
const std::shared_ptr<DataType>& string();
192+
/// @brief Return a date DataType instance
193+
const std::shared_ptr<DataType>& date();
194+
/// @brief Return a timestamp DataType instance
195+
const std::shared_ptr<DataType>& timestamp();
192196
/**
193197
* @brief Return a list DataType instance
194198
*

cpp/include/gar/util/convert_to_arrow_type.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,11 @@ CONVERT_TO_ARROW_TYPE(Type::DOUBLE, double, arrow::DoubleType,
6969
CONVERT_TO_ARROW_TYPE(Type::STRING, std::string, arrow::LargeStringType,
7070
arrow::LargeStringArray, arrow::LargeStringBuilder,
7171
arrow::large_utf8(), "string")
72+
CONVERT_TO_ARROW_TYPE(Type::TIMESTAMP, Timestamp, arrow::TimestampType,
73+
arrow::TimestampArray, arrow::TimestampBuilder,
74+
arrow::timestamp(arrow::TimeUnit::MILLI), "timestamp")
75+
CONVERT_TO_ARROW_TYPE(Type::DATE, Date, arrow::Date32Type, arrow::Date32Array,
76+
arrow::Date32Builder, arrow::date32(), "date")
7277

7378
} // namespace GAR_NAMESPACE_INTERNAL
7479

cpp/include/gar/util/data_type.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,12 @@ enum class Type {
5454
/** List of some logical data type */
5555
LIST,
5656

57+
/** int32_t days since the UNIX epoch */
58+
DATE,
59+
60+
/** Exact timestamp encoded with int64 since UNIX epoch in milliseconds */
61+
TIMESTAMP,
62+
5763
/** User-defined data type */
5864
USER_DEFINED,
5965

@@ -125,6 +131,31 @@ class DataType {
125131
std::shared_ptr<DataType> child_;
126132
std::string user_defined_type_name_;
127133
}; // struct DataType
134+
135+
// Define a Timestamp class to represent timestamp data type value
136+
class Timestamp {
137+
public:
138+
using c_type = int64_t;
139+
explicit Timestamp(c_type value) : value_(value) {}
140+
141+
c_type value() const { return value_; }
142+
143+
private:
144+
c_type value_;
145+
};
146+
147+
// Define a Date class to represent date data type value
148+
class Date {
149+
public:
150+
using c_type = int32_t;
151+
explicit Date(c_type value) : value_(value) {}
152+
153+
c_type value() const { return value_; }
154+
155+
private:
156+
c_type value_;
157+
};
158+
128159
} // namespace GAR_NAMESPACE_INTERNAL
129160

130161
#endif // GAR_UTIL_DATA_TYPE_H_

cpp/src/data_type.cc

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ std::shared_ptr<arrow::DataType> DataType::DataTypeToArrowDataType(
3939
return arrow::float64();
4040
case Type::STRING:
4141
return arrow::large_utf8();
42+
case Type::DATE:
43+
return arrow::date32();
44+
case Type::TIMESTAMP:
45+
return arrow::timestamp(arrow::TimeUnit::MILLI);
4246
case Type::LIST:
4347
return arrow::list(DataTypeToArrowDataType(type->child_));
4448
default:
@@ -65,6 +69,12 @@ std::shared_ptr<DataType> DataType::ArrowDataTypeToDataType(
6569
return string();
6670
case arrow::Type::LARGE_STRING:
6771
return string();
72+
case arrow::Type::DATE32:
73+
return date();
74+
case arrow::Type::TIMESTAMP:
75+
case arrow::Type::DATE64: // Date64 of Arrow is used to represent timestamp
76+
// milliseconds
77+
return timestamp();
6878
case arrow::Type::LIST:
6979
return list(ArrowDataTypeToDataType(type->field(0)->type()));
7080
default:
@@ -89,6 +99,8 @@ std::string DataType::ToTypeName() const {
8999
TO_STRING_CASE(FLOAT)
90100
TO_STRING_CASE(DOUBLE)
91101
TO_STRING_CASE(STRING)
102+
TO_STRING_CASE(DATE)
103+
TO_STRING_CASE(TIMESTAMP)
92104

93105
#undef TO_STRING_CASE
94106
case Type::USER_DEFINED:
@@ -113,6 +125,10 @@ std::shared_ptr<DataType> DataType::TypeNameToDataType(const std::string& str) {
113125
return float64();
114126
} else if (str == "string") {
115127
return string();
128+
} else if (str == "date") {
129+
return date();
130+
} else if (str == "timestamp") {
131+
return timestamp();
116132
} else if (str == "list<int32>") {
117133
return list(int32());
118134
} else if (str == "list<int64>") {
@@ -141,6 +157,8 @@ TYPE_FACTORY(int64, Type::INT64)
141157
TYPE_FACTORY(float32, Type::FLOAT)
142158
TYPE_FACTORY(float64, Type::DOUBLE)
143159
TYPE_FACTORY(string, Type::STRING)
160+
TYPE_FACTORY(date, Type::DATE)
161+
TYPE_FACTORY(timestamp, Type::TIMESTAMP)
144162

145163
std::shared_ptr<DataType> list(const std::shared_ptr<DataType>& value_type) {
146164
return std::make_shared<DataType>(Type::LIST, value_type);

cpp/src/edges_builder.cc

Lines changed: 79 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,20 @@ Status EdgesBuilder::validate(const Edge& e,
152152
invalid_type = true;
153153
}
154154
break;
155+
case Type::DATE:
156+
// date is stored as int32_t
157+
if (property.second.type() !=
158+
typeid(typename TypeToArrowType<Type::DATE>::CType::c_type)) {
159+
invalid_type = true;
160+
}
161+
break;
162+
case Type::TIMESTAMP:
163+
// timestamp is stored as int64_t
164+
if (property.second.type() !=
165+
typeid(typename TypeToArrowType<Type::TIMESTAMP>::CType::c_type)) {
166+
invalid_type = true;
167+
}
168+
break;
155169
default:
156170
return Status::TypeError("Unsupported property type.");
157171
}
@@ -165,6 +179,67 @@ Status EdgesBuilder::validate(const Edge& e,
165179
return Status::OK();
166180
}
167181

182+
template <Type type>
183+
Status EdgesBuilder::tryToAppend(
184+
const std::string& property_name,
185+
std::shared_ptr<arrow::Array>& array, // NOLINT
186+
const std::vector<Edge>& edges) {
187+
using CType = typename TypeToArrowType<type>::CType;
188+
arrow::MemoryPool* pool = arrow::default_memory_pool();
189+
typename TypeToArrowType<type>::BuilderType builder(pool);
190+
for (const auto& e : edges) {
191+
if (e.Empty() || (!e.ContainProperty(property_name))) {
192+
RETURN_NOT_ARROW_OK(builder.AppendNull());
193+
} else {
194+
RETURN_NOT_ARROW_OK(
195+
builder.Append(std::any_cast<CType>(e.GetProperty(property_name))));
196+
}
197+
}
198+
array = builder.Finish().ValueOrDie();
199+
return Status::OK();
200+
}
201+
202+
template <>
203+
Status EdgesBuilder::tryToAppend<Type::TIMESTAMP>(
204+
const std::string& property_name,
205+
std::shared_ptr<arrow::Array>& array, // NOLINT
206+
const std::vector<Edge>& edges) {
207+
using CType = typename TypeToArrowType<Type::TIMESTAMP>::CType::c_type;
208+
arrow::MemoryPool* pool = arrow::default_memory_pool();
209+
typename TypeToArrowType<Type::TIMESTAMP>::BuilderType builder(
210+
arrow::timestamp(arrow::TimeUnit::MILLI), pool);
211+
for (const auto& e : edges) {
212+
if (e.Empty() || (!e.ContainProperty(property_name))) {
213+
RETURN_NOT_ARROW_OK(builder.AppendNull());
214+
} else {
215+
RETURN_NOT_ARROW_OK(
216+
builder.Append(std::any_cast<CType>(e.GetProperty(property_name))));
217+
}
218+
}
219+
array = builder.Finish().ValueOrDie();
220+
return Status::OK();
221+
}
222+
223+
template <>
224+
Status EdgesBuilder::tryToAppend<Type::DATE>(
225+
const std::string& property_name,
226+
std::shared_ptr<arrow::Array>& array, // NOLINT
227+
const std::vector<Edge>& edges) {
228+
using CType = typename TypeToArrowType<Type::DATE>::CType::c_type;
229+
arrow::MemoryPool* pool = arrow::default_memory_pool();
230+
typename TypeToArrowType<Type::DATE>::BuilderType builder(pool);
231+
for (const auto& e : edges) {
232+
if (e.Empty() || (!e.ContainProperty(property_name))) {
233+
RETURN_NOT_ARROW_OK(builder.AppendNull());
234+
} else {
235+
RETURN_NOT_ARROW_OK(
236+
builder.Append(std::any_cast<CType>(e.GetProperty(property_name))));
237+
}
238+
}
239+
array = builder.Finish().ValueOrDie();
240+
return Status::OK();
241+
}
242+
168243
Status EdgesBuilder::appendToArray(
169244
const std::shared_ptr<DataType>& type, const std::string& property_name,
170245
std::shared_ptr<arrow::Array>& array, // NOLINT
@@ -182,32 +257,16 @@ Status EdgesBuilder::appendToArray(
182257
return tryToAppend<Type::DOUBLE>(property_name, array, edges);
183258
case Type::STRING:
184259
return tryToAppend<Type::STRING>(property_name, array, edges);
260+
case Type::DATE:
261+
return tryToAppend<Type::DATE>(property_name, array, edges);
262+
case Type::TIMESTAMP:
263+
return tryToAppend<Type::TIMESTAMP>(property_name, array, edges);
185264
default:
186265
return Status::TypeError("Unsupported property type.");
187266
}
188267
return Status::OK();
189268
}
190269

191-
template <Type type>
192-
Status EdgesBuilder::tryToAppend(
193-
const std::string& property_name,
194-
std::shared_ptr<arrow::Array>& array, // NOLINT
195-
const std::vector<Edge>& edges) {
196-
using CType = typename TypeToArrowType<type>::CType;
197-
arrow::MemoryPool* pool = arrow::default_memory_pool();
198-
typename TypeToArrowType<type>::BuilderType builder(pool);
199-
for (const auto& e : edges) {
200-
if (e.Empty() || (!e.ContainProperty(property_name))) {
201-
RETURN_NOT_ARROW_OK(builder.AppendNull());
202-
} else {
203-
RETURN_NOT_ARROW_OK(
204-
builder.Append(std::any_cast<CType>(e.GetProperty(property_name))));
205-
}
206-
}
207-
array = builder.Finish().ValueOrDie();
208-
return Status::OK();
209-
}
210-
211270
Status EdgesBuilder::tryToAppend(
212271
int src_or_dest,
213272
std::shared_ptr<arrow::Array>& array, // NOLINT

cpp/src/graph.cc

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ Status TryToCastToAny(const std::shared_ptr<DataType>& type,
5454
return CastToAny<Type::DOUBLE>(array, any);
5555
case Type::STRING:
5656
return CastToAny<Type::STRING>(array, any);
57+
case Type::DATE:
58+
return CastToAny<Type::DATE>(array, any);
59+
case Type::TIMESTAMP:
60+
return CastToAny<Type::TIMESTAMP>(array, any);
5761
default:
5862
return Status::TypeError("Unsupported type.");
5963
}
@@ -111,6 +115,36 @@ Result<T> Vertex::property(const std::string& property) const {
111115
}
112116
}
113117

118+
template <>
119+
Result<Date> Vertex::property(const std::string& property) const {
120+
if (properties_.find(property) == properties_.end()) {
121+
return Status::KeyError("Property with name ", property,
122+
" does not exist in the vertex.");
123+
}
124+
try {
125+
Date ret(std::any_cast<Date::c_type>(properties_.at(property)));
126+
return ret;
127+
} catch (const std::bad_any_cast& e) {
128+
return Status::TypeError("Any cast failed, the property type of ", property,
129+
" is not matched ", e.what());
130+
}
131+
}
132+
133+
template <>
134+
Result<Timestamp> Vertex::property(const std::string& property) const {
135+
if (properties_.find(property) == properties_.end()) {
136+
return Status::KeyError("Property with name ", property,
137+
" does not exist in the vertex.");
138+
}
139+
try {
140+
Timestamp ret(std::any_cast<Timestamp::c_type>(properties_.at(property)));
141+
return ret;
142+
} catch (const std::bad_any_cast& e) {
143+
return Status::TypeError("Any cast failed, the property type of ", property,
144+
" is not matched ", e.what());
145+
}
146+
}
147+
114148
template <>
115149
Result<StringArray> Vertex::property(const std::string& property) const {
116150
auto it = list_properties_.find(property);
@@ -182,6 +216,36 @@ Result<T> Edge::property(const std::string& property) const {
182216
}
183217
}
184218

219+
template <>
220+
Result<Date> Edge::property(const std::string& property) const {
221+
if (properties_.find(property) == properties_.end()) {
222+
return Status::KeyError("Property with name ", property,
223+
" does not exist in the edge.");
224+
}
225+
try {
226+
Date ret(std::any_cast<Date::c_type>(properties_.at(property)));
227+
return ret;
228+
} catch (const std::bad_any_cast& e) {
229+
return Status::TypeError("Any cast failed, the property type of ", property,
230+
" is not matched ", e.what());
231+
}
232+
}
233+
234+
template <>
235+
Result<Timestamp> Edge::property(const std::string& property) const {
236+
if (properties_.find(property) == properties_.end()) {
237+
return Status::KeyError("Property with name ", property,
238+
" does not exist in the edge.");
239+
}
240+
try {
241+
Timestamp ret(std::any_cast<Timestamp::c_type>(properties_.at(property)));
242+
return ret;
243+
} catch (const std::bad_any_cast& e) {
244+
return Status::TypeError("Any cast failed, the property type of ", property,
245+
" is not matched ", e.what());
246+
}
247+
}
248+
185249
template <>
186250
Result<StringArray> Edge::property(const std::string& property) const {
187251
auto it = list_properties_.find(property);

0 commit comments

Comments
 (0)