4141#include < simdjson.h>
4242#include < tipb/expression.pb.h>
4343
44+ #include < algorithm>
4445#include < ext/range.h>
46+ #include < limits>
4547#include < magic_enum.hpp>
4648#include < string_view>
4749#include < type_traits>
@@ -63,6 +65,8 @@ namespace DB
6365
6466namespace ErrorCodes
6567{
68+ extern const int ARGUMENT_OUT_OF_BOUND;
69+ extern const int BAD_ARGUMENTS;
6670extern const int ILLEGAL_COLUMN;
6771extern const int UNKNOWN_TYPE;
6872} // namespace ErrorCodes
@@ -976,6 +980,208 @@ class FunctionJsonArray : public IFunction
976980};
977981
978982
983+ class FunctionJsonObject : public IFunction
984+ {
985+ public:
986+ static constexpr auto name = " json_object" ;
987+ static FunctionPtr create (const Context &) { return std::make_shared<FunctionJsonObject>(); }
988+
989+ String getName () const override { return name; }
990+
991+ size_t getNumberOfArguments () const override { return 0 ; }
992+
993+ bool isVariadic () const override { return true ; }
994+
995+ bool useDefaultImplementationForNulls () const override { return false ; }
996+ bool useDefaultImplementationForConstants () const override { return true ; }
997+ DataTypePtr getReturnTypeImpl (const DataTypes & arguments) const override
998+ {
999+ if (unlikely (arguments.size () % 2 != 0 ))
1000+ {
1001+ throw Exception (
1002+ fmt::format (" Incorrect parameter count in the call to native function '{}'" , getName ()),
1003+ ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
1004+ }
1005+ for (const auto arg_idx : ext::range (0 , arguments.size ()))
1006+ {
1007+ if (arg_idx % 2 == 0 && arguments[arg_idx]->onlyNull ())
1008+ throw Exception (" JSON documents may not contain NULL member names." , ErrorCodes::BAD_ARGUMENTS);
1009+
1010+ if (!arguments[arg_idx]->onlyNull ())
1011+ {
1012+ const auto * arg = removeNullable (arguments[arg_idx]).get ();
1013+ if (!arg->isStringOrFixedString ())
1014+ throw Exception (
1015+ ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
1016+ " Illegal type {} of argument {} of function {}" ,
1017+ arg->getName (),
1018+ arg_idx + 1 ,
1019+ getName ());
1020+ }
1021+ }
1022+ return std::make_shared<DataTypeString>();
1023+ }
1024+
1025+ void executeImpl (Block & block, const ColumnNumbers & arguments, size_t result) const override
1026+ {
1027+ if (arguments.empty ())
1028+ {
1029+ // clang-format off
1030+ const UInt8 empty_object_json_value[] = {
1031+ JsonBinary::TYPE_CODE_OBJECT, // object_type
1032+ 0x0 , 0x0 , 0x0 , 0x0 , // element_count
1033+ 0x8 , 0x0 , 0x0 , 0x0 }; // total_size
1034+ // clang-format on
1035+ auto empty_object_json = ColumnString::create ();
1036+ empty_object_json->insertData (
1037+ reinterpret_cast <const char *>(empty_object_json_value),
1038+ sizeof (empty_object_json_value) / sizeof (UInt8));
1039+ block.getByPosition (result).column = ColumnConst::create (std::move (empty_object_json), block.rows ());
1040+ return ;
1041+ }
1042+
1043+ auto nested_block = createBlockWithNestedColumns (block, arguments);
1044+ StringSources sources;
1045+ for (auto column_number : arguments)
1046+ {
1047+ sources.push_back (
1048+ block.getByPosition (column_number).column ->onlyNull ()
1049+ ? nullptr
1050+ : createDynamicStringSource (*nested_block.getByPosition (column_number).column ));
1051+ }
1052+
1053+ auto rows = block.rows ();
1054+ auto col_to = ColumnString::create ();
1055+ auto & data_to = col_to->getChars ();
1056+ auto & offsets_to = col_to->getOffsets ();
1057+ offsets_to.resize (rows);
1058+
1059+ std::vector<const NullMap *> nullmaps;
1060+ nullmaps.reserve (sources.size ());
1061+ bool is_input_nullable = false ;
1062+ for (auto column_number : arguments)
1063+ {
1064+ const auto & col = block.getByPosition (column_number).column ;
1065+ if (col->isColumnNullable ())
1066+ {
1067+ const auto & column_nullable = static_cast <const ColumnNullable &>(*col);
1068+ nullmaps.push_back (&(column_nullable.getNullMapData ()));
1069+ is_input_nullable = true ;
1070+ }
1071+ else
1072+ {
1073+ nullmaps.push_back (nullptr );
1074+ }
1075+ }
1076+
1077+ if (is_input_nullable)
1078+ doExecuteImpl<true >(sources, rows, data_to, offsets_to, nullmaps);
1079+ else
1080+ doExecuteImpl<false >(sources, rows, data_to, offsets_to, nullmaps);
1081+
1082+ block.getByPosition (result).column = std::move (col_to);
1083+ }
1084+
1085+ private:
1086+ template <bool is_input_nullable>
1087+ static void doExecuteImpl (
1088+ StringSources & sources,
1089+ size_t rows,
1090+ ColumnString::Chars_t & data_to,
1091+ ColumnString::Offsets & offsets_to,
1092+ const std::vector<const NullMap *> & nullmaps)
1093+ {
1094+ struct JsonObjectEntry
1095+ {
1096+ StringRef key;
1097+ JsonBinary value;
1098+ size_t input_order;
1099+ };
1100+
1101+ const size_t pair_count = sources.size () / 2 ;
1102+ size_t reserve_size = rows * (1 + pair_count * 16 );
1103+ for (const auto & source : sources)
1104+ reserve_size += source ? source->getSizeForReserve () : rows;
1105+ JsonBinary::JsonBinaryWriteBuffer write_buffer (data_to, reserve_size);
1106+
1107+ std::vector<JsonObjectEntry> entries;
1108+ std::vector<StringRef> keys;
1109+ std::vector<JsonBinary> values;
1110+ entries.reserve (pair_count);
1111+ keys.reserve (pair_count);
1112+ values.reserve (pair_count);
1113+
1114+ for (size_t i = 0 ; i < rows; ++i)
1115+ {
1116+ entries.clear ();
1117+ for (size_t col = 0 ; col < sources.size (); col += 2 )
1118+ {
1119+ if constexpr (is_input_nullable)
1120+ {
1121+ const auto * key_nullmap = nullmaps[col];
1122+ if (!sources[col] || (key_nullmap && (*key_nullmap)[i]))
1123+ throw Exception (" JSON documents may not contain NULL member names." , ErrorCodes::BAD_ARGUMENTS);
1124+ }
1125+
1126+ assert (sources[col]);
1127+ const auto & key_from = sources[col]->getWhole ();
1128+ if (unlikely (key_from.size > std::numeric_limits<UInt16>::max ()))
1129+ throw Exception (
1130+ " TiDB/TiFlash does not yet support JSON objects with the key length >= 65536" ,
1131+ ErrorCodes::ARGUMENT_OUT_OF_BOUND);
1132+ StringRef key{key_from.data , key_from.size };
1133+
1134+ JsonBinary value (JsonBinary::TYPE_CODE_LITERAL, StringRef (&JsonBinary::LITERAL_NIL, 1 ));
1135+ if constexpr (is_input_nullable)
1136+ {
1137+ const auto * value_nullmap = nullmaps[col + 1 ];
1138+ if (sources[col + 1 ] && !(value_nullmap && (*value_nullmap)[i]))
1139+ {
1140+ const auto & data_from = sources[col + 1 ]->getWhole ();
1141+ value = JsonBinary (data_from.data [0 ], StringRef (&data_from.data [1 ], data_from.size - 1 ));
1142+ }
1143+ }
1144+ else
1145+ {
1146+ assert (sources[col + 1 ]);
1147+ const auto & data_from = sources[col + 1 ]->getWhole ();
1148+ value = JsonBinary (data_from.data [0 ], StringRef (&data_from.data [1 ], data_from.size - 1 ));
1149+ }
1150+
1151+ entries.push_back ({key, value, col >> 1 });
1152+ }
1153+
1154+ std::sort (entries.begin (), entries.end (), [](const auto & lhs, const auto & rhs) {
1155+ return lhs.key == rhs.key ? lhs.input_order < rhs.input_order : lhs.key < rhs.key ;
1156+ });
1157+
1158+ keys.clear ();
1159+ values.clear ();
1160+ for (size_t entry_idx = 0 ; entry_idx < entries.size ();)
1161+ {
1162+ size_t last_idx = entry_idx;
1163+ while (last_idx + 1 < entries.size () && entries[last_idx + 1 ].key == entries[entry_idx].key )
1164+ ++last_idx;
1165+
1166+ keys.push_back (entries[last_idx].key );
1167+ values.push_back (entries[last_idx].value );
1168+ entry_idx = last_idx + 1 ;
1169+ }
1170+
1171+ JsonBinary::buildBinaryJsonObjectInBuffer (keys, values, write_buffer);
1172+ writeChar (0 , write_buffer);
1173+ offsets_to[i] = write_buffer.count ();
1174+
1175+ for (const auto & source : sources)
1176+ {
1177+ if (source)
1178+ source->next ();
1179+ }
1180+ }
1181+ }
1182+ };
1183+
1184+
9791185class FunctionCastJsonAsJson : public IFunction
9801186{
9811187public:
0 commit comments