Skip to content

Commit 9c85436

Browse files
committed
Add PreparedInsert flow
Add a new pattern for "prepared inserts". It works like this: * Call `PrepareInsert` with an `INSERT` query with optional columns and ending in `VALUES`. No values should be included in the string. * It returns a `PreparedInsert` object that has two methods: * `Block()` returns a `Block` pre-configured with columns as declared in the `INSERT` statement * `Execute()` inserts data from the block then clears it. * When the `PreparedInsert` object goes out of scope it first signals the server that it's done sending data. This allows one to send smaller batches of blocks, thereby using less memory, but still in a single ClickHouse `INSERT` operation. Expected to be useful in the Postgres foreign data wrapper insert API, where multiple rows can be inserted at once but its API handles one-at-a-time insertion. It will also support the FDW COPY API, which can submit huge batches of data to insert, as well.
1 parent 6919524 commit 9c85436

4 files changed

Lines changed: 304 additions & 6 deletions

File tree

clickhouse/block.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,10 @@ class Block {
8585
return columns_.at(idx).name;
8686
}
8787

88-
/// Convinience method to wipe out all rows from all columns
88+
/// Convenience method to wipe out all rows from all columns
8989
void Clear();
9090

91-
/// Convinience method to do Reserve() on all columns
91+
/// Convenience method to do Reserve() on all columns
9292
void Reserve(size_t new_cap);
9393

9494
/// Reference to column by index in the block.

clickhouse/client.cpp

Lines changed: 162 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,12 @@ class Client::Impl {
161161

162162
void Insert(const std::string& table_name, const std::string& query_id, const Block& block);
163163

164+
PreparedInsert * PrepareInsert(Query query);
165+
166+
void FinishInsert();
167+
168+
void SendData(const Block& block);
169+
164170
void Ping();
165171

166172
void ResetConnection();
@@ -175,12 +181,11 @@ class Client::Impl {
175181
bool Handshake();
176182

177183
bool ReceivePacket(uint64_t* server_packet = nullptr);
184+
bool ReceivePreparePackets(uint64_t* server_packet = nullptr);
178185

179186
void SendQuery(const Query& query, bool finalize = true);
180187
void FinalizeQuery();
181188

182-
void SendData(const Block& block);
183-
184189
void SendBlockData(const Block& block);
185190
void SendExternalData(const ExternalTables& external_tables);
186191

@@ -415,6 +420,23 @@ void Client::Impl::Insert(const std::string& table_name, const std::string& quer
415420
}
416421
}
417422

423+
void Client::Impl::FinishInsert() {
424+
// Send empty block as marker of end of data.
425+
SendData(Block());
426+
427+
// Wait for EOS.
428+
uint64_t eos_packet{0};
429+
while (ReceivePacket(&eos_packet)) {
430+
;
431+
}
432+
433+
if (eos_packet != ServerCodes::EndOfStream && eos_packet != ServerCodes::Exception
434+
&& eos_packet != ServerCodes::Log && options_.rethrow_exceptions) {
435+
throw ProtocolError(std::string{"unexpected packet from server while receiving end of query, expected (expected Exception, EndOfStream or Log, got: "}
436+
+ (eos_packet ? std::to_string(eos_packet) : "nothing") + ")");
437+
}
438+
}
439+
418440
void Client::Impl::Ping() {
419441
WireFormat::WriteUInt64(*output_, ClientCodes::Ping);
420442
output_->Flush();
@@ -648,6 +670,78 @@ bool Client::Impl::ReceivePacket(uint64_t* server_packet) {
648670
}
649671
}
650672

673+
bool Client::Impl::ReceivePreparePackets(uint64_t* server_packet) {
674+
uint64_t packet_type = 0;
675+
676+
while (true) {
677+
if (!WireFormat::ReadVarint64(*input_, &packet_type)) {
678+
throw std::runtime_error("unexpected package type " +
679+
std::to_string((int)packet_type) + " for insert query");
680+
}
681+
if (server_packet) {
682+
*server_packet = packet_type;
683+
}
684+
685+
switch (packet_type) {
686+
case ServerCodes::Data: {
687+
if (!ReceiveData()) {
688+
throw ProtocolError("can't read data packet from input stream");
689+
}
690+
return true;
691+
}
692+
693+
case ServerCodes::Exception: {
694+
ReceiveException();
695+
return false;
696+
}
697+
698+
case ServerCodes::ProfileInfo:
699+
case ServerCodes::Progress:
700+
case ServerCodes::Pong:
701+
case ServerCodes::Hello:
702+
continue;
703+
704+
case ServerCodes::Log: {
705+
// log tag
706+
if (!WireFormat::SkipString(*input_)) {
707+
return false;
708+
}
709+
Block block;
710+
711+
// Use uncompressed stream since log blocks usually contain only one row
712+
if (!ReadBlock(*input_, &block)) {
713+
return false;
714+
}
715+
716+
if (events_) {
717+
events_->OnServerLog(block);
718+
}
719+
continue;
720+
}
721+
722+
case ServerCodes::TableColumns: {
723+
// external table name
724+
if (!WireFormat::SkipString(*input_)) {
725+
return false;
726+
}
727+
728+
// columns metadata
729+
if (!WireFormat::SkipString(*input_)) {
730+
return false;
731+
}
732+
continue;
733+
}
734+
735+
// No others expected.
736+
case ServerCodes::EndOfStream:
737+
case ServerCodes::ProfileEvents:
738+
default:
739+
throw UnimplementedError("unimplemented " + std::to_string((int)packet_type));
740+
break;
741+
}
742+
}
743+
}
744+
651745
bool Client::Impl::ReadBlock(InputStream& input, Block* block) {
652746
// Additional information about block.
653747
if (server_info_.revision >= DBMS_MIN_REVISION_WITH_BLOCK_INFO) {
@@ -1063,7 +1157,7 @@ void Client::Impl::RetryGuard(std::function<void()> func) {
10631157
}
10641158
}
10651159
}
1066-
// Connectiong with current_endpoint_ are broken.
1160+
// Connecting with current_endpoint_ are broken.
10671161
// Trying to establish with the another one from the list.
10681162
size_t connection_attempts_count = GetConnectionAttempts();
10691163
for (size_t i = 0; i < connection_attempts_count;)
@@ -1085,6 +1179,41 @@ void Client::Impl::RetryGuard(std::function<void()> func) {
10851179
}
10861180
}
10871181

1182+
Client::PreparedInsert * Client::Impl::PrepareInsert(Query query) {
1183+
// Arrange a query callback to extract a block that corresponds to the
1184+
// query columns.
1185+
auto block = new Block();
1186+
query.OnData([&block](const Block& b) {
1187+
for (Block::Iterator bi(b); bi.IsValid(); bi.Next()) {
1188+
// Create the ClickHouse column type.
1189+
clickhouse::ColumnRef col = bi.Column();
1190+
auto chtype = col->Type();
1191+
if (chtype->GetCode() == Type::LowCardinality) {
1192+
chtype = col->As<ColumnLowCardinality>()->GetNestedType();
1193+
}
1194+
block->AppendColumn(bi.Name(), clickhouse::CreateColumnByType(col->Type()->GetName()));
1195+
}
1196+
1197+
return true;
1198+
});
1199+
1200+
1201+
EnsureNull en(static_cast<QueryEvents*>(&query), &events_);
1202+
1203+
if (options_.ping_before_query) {
1204+
RetryGuard([this]() { Ping(); });
1205+
}
1206+
1207+
SendQuery(query.GetText());
1208+
1209+
// Receive data packet but keep the query/connection open.
1210+
if (!ReceivePreparePackets()) {
1211+
throw std::runtime_error("fail to receive data packet");
1212+
}
1213+
1214+
return new PreparedInsert(this, block);
1215+
}
1216+
10881217
Client::Client(const ClientOptions& opts)
10891218
: options_(opts)
10901219
, impl_(new Impl(opts))
@@ -1149,6 +1278,14 @@ void Client::Insert(const std::string& table_name, const std::string& query_id,
11491278
impl_->Insert(table_name, query_id, block);
11501279
}
11511280

1281+
Client::PreparedInsert * Client::PrepareInsert(const std::string& query) {
1282+
return impl_->PrepareInsert(Query(query));
1283+
}
1284+
1285+
Client::PreparedInsert * Client::PrepareInsert(const std::string& query, const std::string& query_id) {
1286+
return impl_->PrepareInsert(Query(query, query_id));
1287+
}
1288+
11521289
void Client::Ping() {
11531290
impl_->Ping();
11541291
}
@@ -1179,4 +1316,26 @@ Client::Version Client::GetVersion() {
11791316
};
11801317
}
11811318

1319+
Client::PreparedInsert::PreparedInsert(void *c, Block *b) {
1320+
client = c;
1321+
block = b;
1322+
}
1323+
1324+
Client::PreparedInsert::~PreparedInsert() {
1325+
auto c = (Client::Impl *)(client);
1326+
c->FinishInsert();
1327+
1328+
// Do not delete client as we're effectively its child.
1329+
if (block) delete block;
1330+
}
1331+
1332+
Block * Client::PreparedInsert::GetBlock() { return block; }
1333+
1334+
void Client::PreparedInsert::Execute() {
1335+
auto c = (Client::Impl *)(client);
1336+
block->RefreshRowCount();
1337+
c->SendData(*block);
1338+
block->Clear();
1339+
}
1340+
11821341
}

clickhouse/client.h

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,21 @@ class Client {
243243
std::unique_ptr<SocketFactory> socket_factory);
244244
~Client();
245245

246+
/**
247+
* Prepared insert object.
248+
*/
249+
class PreparedInsert {
250+
public:
251+
Block * GetBlock();
252+
void Execute();
253+
// XXX This shouldn't be public.
254+
PreparedInsert(void *client, Block * block);
255+
~PreparedInsert();
256+
private:
257+
Block * block;
258+
void * client;
259+
};
260+
246261
/// Intends for execute arbitrary queries.
247262
void Execute(const Query& query);
248263

@@ -273,6 +288,43 @@ class Client {
273288
void Insert(const std::string& table_name, const Block& block);
274289
void Insert(const std::string& table_name, const std::string& query_id, const Block& block);
275290

291+
/**
292+
* @brief Prepare an \p INSERT query.
293+
* @param query The INSERT query; must end with \p VALUES but contain no values
294+
* @return The prepared insert object
295+
*
296+
* Use this object to insert multiple blocks of data into a table. Example:
297+
*
298+
* \code{.cpp}
299+
* // Start the insertion.
300+
* auto inserter = client->PrepareInsert("INSERT INTO foo (id, name) VALUES");
301+
*
302+
* // Grab the columns from the inserter's block.
303+
* auto block = inserter->GetBlock();
304+
* auto col1 = (*block)[0]->As<ColumnUInt64>();
305+
* auto col2 = (*block)[1]->As<ColumnString>();
306+
*
307+
* // Add a couple of records to the block.
308+
* col1.Append(1);
309+
* col1.Append(2);
310+
* col2.Append("holden");
311+
* col2.Append("naomi");
312+
*
313+
* Send those records.
314+
* inserter->Execute();
315+
*
316+
* // Add another record.
317+
* col1.Append(3);
318+
* col2.Append("amos");
319+
*
320+
* // Send it and finish.
321+
* inserter->Execute();
322+
* delete inserter; // Finishes insert.
323+
* \endcode
324+
*/
325+
Client::PreparedInsert * PrepareInsert(const std::string& query);
326+
Client::PreparedInsert * PrepareInsert(const std::string& query, const std::string& query_id);
327+
276328
/// Ping server for aliveness.
277329
void Ping();
278330

0 commit comments

Comments
 (0)