From 67771908e78d95dfebc0da85b67bd83919abfb98 Mon Sep 17 00:00:00 2001 From: Giriraj Singh Date: Tue, 25 Nov 2025 14:26:37 +0530 Subject: [PATCH 01/84] Added support to connect and perform CRUD operations with couchbase (#3138) * Implemented Couchbase binary protocol support * added support for single connection type for couchbase * removed unnecessary cout statements * added protocol code for helo packet * fixed vbucketID code for identification, fixed add and get functions * Added test cases for threaded get and add functions * Added Error Handling code and made upsert and delete examples * added makefile for example/couchbase_c++ * fixed bugs in couchbase header files * Added License and formatted to google c++ norms * fixed bugs, added support for collections and added couchbase_client.md * fixed license issue * added custom logic for caching collectionIDs * added caching of collection manifests * Added example code for multithreaded demonstration * updated CMake * Abstracted CRUD operations * Added pipeline/batching support * commented unused variables * Updated support for C++17 * fixed some issue. * Using Mutex instead of shared lock to support c++11 * Formatted code to google c++ format * Introduced local cache per-instance of CouchbaseOperations and added functionality to handle server side manifest updates. * Delete MODULE.bazel.lock Unnecessary file * Fixed bugs in local collection cache and collection refresh logic * remove recurring statements * Fixed bugs/repetitive calls to refreshing manifest on server * Formatted function/variable naming scheme and formatted code in c++ google format * removed unnecessary code * updated comments * updated comments * updated documentation * updated documentation * updated documentation * updated documentation * Updated documentation * Updated documentation * Update documentation * Added features and fixed bugs in multithreaded environment Using connection_groups to differentiate between connections across CouchbaseOperations instances to different buckets. Renamed CollectionManifestTracker class to CollectionManifestManager and all the related functionality inside it as before refreshing method was outside this class Added two different authenticate method authenticate(not secure) and authenticateSSL(secure) * Updated multithreaded and single threaded code. Added an example where a single instance is being shared across the threads when operating on single bucket. * updated documentation updated the documentation on thread safe operations and fixed small small discrepancies. * removed commented code and updated readme to have links for cluster download certificate * removed unused code. * Added traditional bRPC coding approach Traditional bRPC coding approach doesn't uses high level functions but provides more control to the user fixed formatting issues. fixed the bug in couchbase.cpp where logic to check the cache is empty was inverted * updated couchbase_example.md * added unit test cases * removed using namespace std from couchbase.h * restored original CMakeLists.txt --- CMakeLists.txt | 2 +- docs/en/couchbase_example.md | 1209 ++++++++ example/couchbase_c++/Makefile | 95 + example/couchbase_c++/couchbase_client.cpp | 451 +++ .../multithreaded_couchbase_client.cpp | 375 +++ .../traditional_brpc_couchbase_client.cpp | 171 ++ src/brpc/couchbase.cpp | 2634 +++++++++++++++++ src/brpc/couchbase.h | 517 ++++ src/brpc/global.cpp | 11 + src/brpc/options.proto | 1 + src/brpc/policy/couchbase_protocol.cpp | 236 ++ src/brpc/policy/couchbase_protocol.h | 173 ++ src/brpc/proto_base.proto | 3 + test/brpc_couchbase_unittest.cpp | 85 + 14 files changed, 5962 insertions(+), 1 deletion(-) create mode 100644 docs/en/couchbase_example.md create mode 100644 example/couchbase_c++/Makefile create mode 100644 example/couchbase_c++/couchbase_client.cpp create mode 100644 example/couchbase_c++/multithreaded_couchbase_client.cpp create mode 100644 example/couchbase_c++/traditional_brpc_couchbase_client.cpp create mode 100644 src/brpc/couchbase.cpp create mode 100644 src/brpc/couchbase.h create mode 100644 src/brpc/policy/couchbase_protocol.cpp create mode 100644 src/brpc/policy/couchbase_protocol.h create mode 100644 test/brpc_couchbase_unittest.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 06aee44c86..9b5db489b3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -603,4 +603,4 @@ install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/output/include/ # Install pkgconfig configure_file(cmake/brpc.pc.in ${PROJECT_BINARY_DIR}/brpc.pc @ONLY) -install(FILES ${PROJECT_BINARY_DIR}/brpc.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) +install(FILES ${PROJECT_BINARY_DIR}/brpc.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) \ No newline at end of file diff --git a/docs/en/couchbase_example.md b/docs/en/couchbase_example.md new file mode 100644 index 0000000000..6748580827 --- /dev/null +++ b/docs/en/couchbase_example.md @@ -0,0 +1,1209 @@ +## Couchbase bRPC Binary Protocol Integration + +This document explains the implementation of Couchbase Binary Protocol support added to bRPC, and the available high-level operations, collection support, SSL authentication, and how to run the provided example client against either a local Couchbase Server cluster or a Couchbase Capella (cloud) deployment. However, the couchbase binary protocol implementation in bRPC currently do not have fine-grained optimizations which has been already done in the couchbase-cxx-client SDK also having query support, better error handling and much more optimized/reliable operations. So, we also added the support of couchbase using couchbase-cxx-SDK in bRPC and is available at [Couchbaselabs-cb-brpc](https://github.com/couchbaselabs/cb_brpc/tree/couchbase_sdk_brpc). + +--- +### 1. Overview + +The integration provides high-level APIs for communicating with Couchbase Server using its Binary Protocol, using the high-level `CouchbaseOperations` class which provides a simplified interface. + +The core pieces are: +* `src/brpc/policy/couchbase_protocol.[h|cpp]` – framing + parse loop for binary responses, and request serialization. +* `src/brpc/couchbase.[h|cpp]` – high-level `CouchbaseOperations` class with request (`CouchbaseRequest`) and response (`CouchbaseResponse`) builders, parsers and error-handlers. +* `example/couchbase_c++/couchbase_client.cpp` – an end‑to‑end example using the high-level API for authentication, bucket selection, CRUD operations, and collection‑scoped operations. +* `example/couchbase_c++/multithreaded_couchbase_client.cpp` – a multithreaded example where an instance of `CouchbaseOperations` is shared across the threads operating on same bucket. An another block of code where multiple threads have their own `CouchbaseOperations` instance as the threads operate on different buckets. +* `example/couchbase_c++/traditional_brpc_couchbase_client.cpp` – demonstrates the traditional bRPC approach with manual channel, controller, and request/response management for advanced users who need fine-grained control. + +Design goals: +* **SSL Support**: Built-in SSL/TLS support for secure connections to Couchbase Capella. +* **Per-instance Authentication**: Each `CouchbaseOperations` object maintains its own authenticated session if each instance connects to a different bucket, when multiple instances connect/operate on the same bucket then a single TCP socket is shared for these `CouchbaseOperations` instances because separate `connection_groups` are created on the basis of `server_name+bucket`. +* **Collection Support**: Native support for collection-scoped operations. +* Keep wire structs identical to the binary protocol (24‑byte header, network order numeric fields). +* Future extensions for advanced features. + +--- +### 2. Features + +| Category | Supported Operations | Notes | +|----------|----------------------|-------| +| **High-Level API** | `CouchbaseOperations` class | **Recommended**: Simple methods returning `Result` struct | +| **Traditional API** | Manual channel/controller management | **Advanced**: Direct bRPC access for custom configurations | +| **SSL/TLS Support** | Built-in SSL encryption | **Required** for Couchbase Capella, optional for local clusters | +| Authentication | SASL `PLAIN` with/without SSL | `authenticate()` for non-SSL, `authenticateSSL()` for SSL connections | +| Bucket selection | Integrated with authentication | Bucket specified during authentication; `selectBucket()` also available separately | +| Basic KV | `add()`, `upsert()`, `delete_()`, `get()` | Clean API with `Result` struct error handling; | +| **Pipeline Operations** | `beginPipeline()`, `pipelineRequest()`, `executePipeline()` | **NEW**: Batch multiple operations in single network call for improved performance | +| Collections | Collection-scoped CRUD operations | Pass collection name as optional parameter (defaults to "_default") | +| Error Handling | `Result.success` + `Result.error_message` + `Result.status_code` | Human-readable error messages with Couchbase status codes | + +- **Simplified**: No need to manage channels, controllers, or response parsing +- **Flexible Threading**: Share instances across threads for same bucket/server, or create separate instances for different buckets/servers +- **Error Handling**: Simple boolean success with descriptive error messages and error codes +- **SSL Built-in**: SSL handling for secure connections + + +--- +### 3. Binary Protocol Mapping + +Couchbase binary protcol header, for original documentation [click here](https://github.com/couchbase/kv_engine/blob/master/docs/BinaryProtocol.md). The following header format has been used to connect with the couchbase servers. +``` +Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| Magic | Opcode | Key length | + +---------------+---------------+---------------+---------------+ + 4| Extras length | Data type | vbucket id | + +---------------+---------------+---------------+---------------+ + 8| Total body length | + +---------------+---------------+---------------+---------------+ + 12| Opaque | + +---------------+---------------+---------------+---------------+ + 16| CAS | + | | + +---------------+---------------+---------------+---------------+ + Total 24 bytes +``` + +Overall packet structure:- +``` + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| HEADER | + | | + | | + | | + +---------------+---------------+---------------+---------------+ + 24| COMMAND-SPECIFIC EXTRAS (as needed) | + | (note length in the extras length header field) | + +---------------+---------------+---------------+---------------+ + m| Key (as needed) | + | (note length in key length header field) | + +---------------+---------------+---------------+---------------+ + n| Value (as needed) | + | (note length is total body length header field, minus | + | sum of the extras and key length body fields) | + +---------------+---------------+---------------+---------------+ + Total 24 + x bytes (24 byte header, and x byte body) +``` + +--- +### 4. High-Level API (`CouchbaseOperations`) + +**Approach**: Use the `CouchbaseOperations` class for operations. Instances can be shared across threads when connecting to the same bucket, or you can create separate instances in multi-threading where each thread is connecting to a separate bucket. + +#### Basic Usage: +```cpp +#include + +brpc::CouchbaseOperations couchbase_ops; + +// 1. Authenticate with bucket selection (REQUIRED for each instance) +brpc::CouchbaseOperations::Result auth_result = couchbase_ops.authenticate( + username, password, server_address, bucket_name); +if (!auth_result.success) { + LOG(ERROR) << "Auth failed: " << auth_result.error_message; + return -1; +} + +// 2. Perform operations (bucket is already selected during authentication) +brpc::CouchbaseOperations::Result add_result = couchbase_ops.add("user::123", json_value); +if (add_result.success) { + std::cout << "Document added successfully!" << std::endl; +} else { + std::cout << "Add failed: " << add_result.error_message << std::endl; +} + +// Optional: Switch to a different bucket (if needed) +// brpc::CouchbaseOperations::Result bucket_result = couchbase_ops.selectBucket("another_bucket"); +``` + +#### SSL Authentication (Essential for Couchbase Capella): +To know how to download the security certificate [click here](https://docs.couchbase.com/cloud/security/security-certificates.html). +```cpp +// For Couchbase Capella (cloud) - SSL is REQUIRED +brpc::CouchbaseOperations::Result auth_result = couchbase_ops.authenticateSSL( + username, + password, + "cluster.cloud.couchbase.com:11207", // SSL port + bucket_name, // bucket name + "path/to/certificate.txt" // certificate path(can be downloaded from capella UI) +); +``` + +#### Collection Operations: +```cpp +// Default collection +auto result = couchbase_ops.get("doc::1"); + +// Specific collection +auto result = couchbase_ops.get("doc::1", "my_collection"); +auto add_result = couchbase_ops.add("doc::2", value, "my_collection"); +``` + +#### Pipeline Operations (Performance Optimization): +The pipeline API allows batching multiple operations into a single network call, significantly improving performance for bulk operations: + +#### How Pipeline Operations Work + +1. **Begin Pipeline**: Start a new pipeline session +2. **Add Operations**: Queue multiple operations without executing them +3. **Execute Pipeline**: Send all operations in a single network call +4. **Process Results**: Handle results in the same order as requests + +#### Pipeline API Methods + +| Method | Description | Usage | +|--------|-------------|-------| +| `beginPipeline()` | Start a new pipeline session | Must call before adding operations | +| `pipelineRequest(op_type, key, value, collection)` | Add operation to pipeline | Supports all CRUD operations | +| `executePipeline()` | Execute all queued operations | Returns `vector` in request order | +| `clearPipeline()` | Clear pipeline without executing | Use for cleanup on errors | +| `isPipelineActive()` | Check if pipeline is active | Returns `bool` | +| `getPipelineSize()` | Get number of queued operations | Returns `size_t` | + +```cpp +// Begin a new pipeline +if (!couchbase_ops.beginPipeline()) { + LOG(ERROR) << "Failed to begin pipeline"; + return -1; +} + +// Add multiple operations to the pipeline (not executed yet) +bool success = true; +success &= couchbase_ops.pipelineRequest(brpc::CouchbaseOperations::ADD, "key1", "value1"); +success &= couchbase_ops.pipelineRequest(brpc::CouchbaseOperations::UPSERT, "key2", "value2"); +success &= couchbase_ops.pipelineRequest(brpc::CouchbaseOperations::GET, "key1"); +success &= couchbase_ops.pipelineRequest(brpc::CouchbaseOperations::DELETE, "key3"); + +if (!success) { + couchbase_ops.clearPipeline(); // Clean up on error + return -1; +} + +// Execute all operations in a single network call +std::vector results = couchbase_ops.executePipeline(); + +// Process results in the same order as requests +for (size_t i = 0; i < results.size(); ++i) { + if (results[i].success) { + std::cout << "Operation " << i << " succeeded" << std::endl; + if (!results[i].value.empty()) { + std::cout << "Value: " << results[i].value << std::endl; + } + } else { + std::cout << "Operation " << i << " failed: " << results[i].error_message << std::endl; + } +} +``` + +**Pipeline with Collections**: +```cpp +// Pipeline operations can also use collections +couchbase_ops.beginPipeline(); +couchbase_ops.pipelineRequest(brpc::CouchbaseOperations::ADD, "doc1", "value1", "my_collection"); +couchbase_ops.pipelineRequest(brpc::CouchbaseOperations::GET, "doc1", "", "my_collection"); +auto results = couchbase_ops.executePipeline(); +``` + +#### Error Handling Pattern: +```cpp +brpc::CouchbaseOperations::Result result = couchbase_ops.someOperation(...); +if (!result.success) { + // Handle error + LOG(ERROR) << "Operation failed: " << result.error_message; + LOG(ERROR) << "Error Code: " << result.status_code; // Couchbase status code +} else { + // Use result.value if applicable (for Get operations) + std::cout << "Retrieved value: " << result.value << std::endl; +} +``` + +--- +### 5. Traditional bRPC Couchbase Client (`traditional_brpc_couchbase_client.cpp`) + +For developers who need fine-grained control over the bRPC framework or want to understand the low-level implementation, we provide a traditional bRPC client example. This approach requires manual management of channels, controllers, and response parsing. + +**When to use Traditional API:** +- Advanced bRPC users who need custom channel configurations +- Fine-grained control over connection pooling and retry logic +- Direct access to underlying bRPC controller for debugging +- Learning the internal workings of the high-level API + +**When to use High-Level API (Recommended):** +- Standard CRUD operations and authentication +- Simpler error handling and cleaner code +- Collection based operations with minimal boilerplate +- Pipeline operations for batch processing while also available in traditional approach it is easier to do using High-Level API. + +#### Traditional Client Example Walkthrough + +The traditional client (`example/couchbase_c++/traditional_brpc_couchbase_client.cpp`) demonstrates the low-level bRPC approach: + +**1. Channel Setup and Configuration** +```cpp +brpc::Channel channel; +brpc::ChannelOptions options; +options.protocol = brpc::PROTOCOL_COUCHBASE; // Set Couchbase protocol +options.connection_type = "single"; // Single persistent connection +options.timeout_ms = 1000; // 1 second timeout +options.max_retry = 3; // Retry up to 3 times + +if (channel.Init("localhost:11210", &options) != 0) { + LOG(ERROR) << "Failed to initialize channel"; + return -1; +} +``` + +**2. Authentication with Manual Request/Response Handling** +```cpp +brpc::Controller cntl; +brpc::CouchbaseOperations::CouchbaseRequest req; +brpc::CouchbaseOperations::CouchbaseResponse res; +uint64_t cas; + +// Build authentication request +req.authenticateRequest("Administrator", "password"); + +// Execute the request +channel.CallMethod(NULL, &cntl, &req, &res, NULL); + +// Check controller status +if (cntl.Failed()) { + LOG(ERROR) << "Unable to authenticate: " << cntl.ErrorText(); + return -1; +} + +// Parse response - must call popHello() and popAuthenticate() in order +if (res.popHello(&cas) && res.popAuthenticate(&cas)) { + std::cout << "Authentication Successful" << std::endl; +} else { + std::cout << "Authentication Failed with status code: " + << std::hex << res._status_code << std::endl; + return -1; +} +``` + +**3. Bucket Selection** +```cpp +// IMPORTANT: Reset controller and clear request/response before each operation +cntl.Reset(); +req.Clear(); +res.Clear(); + +// Build bucket selection request +req.selectBucketRequest("testing"); + +// Execute the request +channel.CallMethod(NULL, &cntl, &req, &res, NULL); + +if (cntl.Failed()) { + LOG(ERROR) << "Unable to select bucket: " << cntl.ErrorText(); + return -1; +} + +// Parse response - status_code only updated AFTER calling pop function +if (res.popSelectBucket(&cas)) { + std::cout << "Bucket Selection Successful" << std::endl; +} else { + std::cout << "Bucket Selection Failed with status code: " + << std::hex << res._status_code << std::endl; + std::cout << "Error Message: " << res.lastError() << std::endl; + return -1; +} +``` + +**4. ADD Operation (Create Document)** +```cpp +// Reset for new operation +cntl.Reset(); +req.Clear(); +res.Clear(); + +// Build ADD request +req.addRequest( + "sample_key", // key + R"({"name": "John Doe", "age": 30, "email": "john@example.com"})", // value + 0, // flags + 0, // exptime (0 = no expiration) + 0 // cas (0 for new document) +); + +// Execute the request +channel.CallMethod(NULL, &cntl, &req, &res, NULL); + +if (cntl.Failed()) { + LOG(ERROR) << "Unable to add key-value: " << cntl.ErrorText(); + return -1; +} + +// Parse response +if (res.popAdd(&cas)) { + std::cout << "Key-Value Addition Successful" << std::endl; +} else { + std::cout << "Key-Value Addition Failed with status code: " + << std::hex << res._status_code << std::endl; + std::cout << "Error Message: " << res.lastError() << std::endl; + return -1; +} +``` + +**5. GET Operation (Retrieve Document)** +```cpp +// Reset for new operation +cntl.Reset(); +req.Clear(); +res.Clear(); + +// Build GET request +req.getRequest("sample_key"); + +// Execute the request +channel.CallMethod(NULL, &cntl, &req, &res, NULL); + +if (cntl.Failed()) { + LOG(ERROR) << "Unable to get value for key: " << cntl.ErrorText(); + return -1; +} + +// Parse response - GET returns value and flags +std::string value; +uint32_t flags; +if (res.popGet(&value, &flags, &cas)) { + std::cout << "Key-Value Retrieval Successful" << std::endl; + std::cout << "Retrieved Value: " << value << std::endl; +} else { + std::cout << "Key-Value Retrieval Failed with status code: " + << std::hex << res._status_code << std::endl; + std::cout << "Error Message: " << res.lastError() << std::endl; + return -1; +} +``` + +**6. DELETE Operation (Remove Document)** +```cpp +// Reset for new operation +cntl.Reset(); +req.Clear(); +res.Clear(); + +// Build DELETE request +req.deleteRequest("sample_key"); + +// Execute the request +channel.CallMethod(NULL, &cntl, &req, &res, NULL); + +if (cntl.Failed()) { + LOG(ERROR) << "Unable to delete key-value: " << cntl.ErrorText(); + return -1; +} + +// Parse response +if (res.popDelete()) { + std::cout << "Key-Value Deletion Successful" << std::endl; +} else { + std::cout << "Key-Value Deletion Failed with status code: " + << std::hex << res._status_code << std::endl; + std::cout << "Error Message: " << res.lastError() << std::endl; + return -1; +} +``` + +#### Key Differences: Traditional vs High-Level API + +| Aspect | Traditional API | High-Level API | +|--------|----------------|----------------| +| **Setup** | Manual channel, controller, request/response management | Single `CouchbaseOperations` instance | +| **Error Handling** | Check both `cntl.Failed()` and response status | Simple `Result.success` boolean | +| **Resource Management** | Must call `cntl.Reset()`, `req.Clear()`, `res.Clear()` | Automatic | +| **Response Parsing** | Manual `pop*()` calls with CAS handling | Transparent | +| **Code Verbosity** | ~15-20 lines per operation | ~2-3 lines per operation | +| **Collections** | Manual collection ID retrieval and management | Automatic with collection name parameter | +| **Pipeline Operations** | Complex manual request building | Simple `beginPipeline()`, `pipelineRequest()`, `executePipeline()` | +| **SSL Support** | Manual SSL configuration in channel options | Built-in `authenticateSSL()` method | +| **Threading** | Manual connection pooling management | Automatic connection group management | + +--- +### 6. Request/Response Classes (`CouchbaseRequest`/`CouchbaseResponse`) + +These classes are public in `CouchbaseOperations` and can be used for advanced bRPC programs. The high-level API uses these classes internally, and the traditional client example demonstrates their direct usage. They are responsible for building the request that needs to be sent and received over the channel. + + +#### Response Parsing: +Each `pop*` method consumes the front of the internal response buffer, validating: +1. Header present. +2. Opcode matches expected operation. +3. Status == success (otherwise `_err` filled with formatted message). +4. Body length sufficient. + +--- +### 7. Example Client Walkthrough + +#### Single-Threaded Example (`couchbase_client.cpp`) +Uses the **high-level `CouchbaseOperations` API**: + +1. **Create `CouchbaseOperations` instance** - can create more than one per thread. +```cpp +brpc::CouchbaseOperations couchbase_ops; +``` + +2. **Prompt for credentials** - username/password for authentication. +```cpp +std::string username = "Administrator"; +std::string password = "password"; +while (username.empty() || password.empty()) { + std::cout << "Enter Couchbase username: "; + std::cin >> username; + std::cout << "Enter Couchbase password: "; + std::cin >> password; +} +``` + +3. **Authentication with bucket selection** - `authenticate()` for local, `authenticateSSL()` for Capella. + +**Function Signatures:** +```cpp +// Non-SSL authentication +Result authenticate(const string& username, // Couchbase username + const string& password, // Couchbase password + const string& server_address, // Server host:port (e.g., "localhost:11210") + const string& bucket_name); // Target bucket name + +// SSL authentication +Result authenticateSSL(const string& username, // Couchbase username + const string& password, // Couchbase password + const string& server_address, // Server host:port (e.g., "cluster.cloud.couchbase.com:11207") + const string& bucket_name, // Target bucket name + string path_to_cert); // Path to SSL certificate file +``` + +**Usage Examples:** +```cpp +// For local Couchbase (non-SSL) +brpc::CouchbaseOperations::Result auth_result = + couchbase_ops.authenticate(username, password, FLAGS_server, "testing"); + +// For Couchbase Capella (SSL) +// brpc::CouchbaseOperations::Result auth_result = +// couchbase_ops.authenticateSSL(username, password, "cluster.cloud.couchbase.com:11207", +// "bucket_name", "path/to/cert.txt"); + +if (!auth_result.success) { + LOG(ERROR) << "Authentication failed: " << auth_result.error_message; + return -1; +} +``` + +4. **Basic CRUD operations**: + - Add document (should succeed) + - Try adding same key again (should fail with "key exists") + - Get document (retrieve the added document) + +**Function Signatures:** +```cpp +// ADD operation - creates new document, fails if key exists +Result add(const string& key, // Document key/ID + const string& value, // Document value (JSON string) + string collection_name = "_default"); // Collection name (optional, defaults to "_default") + +// GET operation - retrieves document by key +Result get(const string& key, // Document key/ID to retrieve + string collection_name = "_default"); // Collection name (optional, defaults to "_default") +``` + +**Usage Examples:** +```cpp +std::string add_key = "user::test_brpc_binprot"; +std::string add_value = R"({"name": "John Doe", "age": 30, "email": "john@example.com"})"; + +// First ADD operation (should succeed) +brpc::CouchbaseOperations::Result add_result = couchbase_ops.add(add_key, add_value); +if (add_result.success) { + std::cout << "ADD operation successful" << std::endl; +} else { + std::cout << "ADD operation failed: " << add_result.error_message << std::endl; +} + +// Second ADD operation (should fail - key exists) +brpc::CouchbaseOperations::Result add_result2 = couchbase_ops.add(add_key, add_value); +if (!add_result2.success) { + std::cout << "Second ADD failed as expected: " << add_result2.error_message << std::endl; +} + +// GET operation +brpc::CouchbaseOperations::Result get_result = couchbase_ops.get(add_key); +if (get_result.success) { + std::cout << "GET operation successful" << std::endl; + std::cout << "GET value: " << get_result.value << std::endl; +} +``` + +5. **Multiple document operations** - Add several documents with different keys. +```cpp +std::string item1_key = "binprot_item1"; +std::string item2_key = "binprot_item2"; +std::string item3_key = "binprot_item3"; + +couchbase_ops.add(item1_key, add_value); +couchbase_ops.add(item2_key, add_value); +couchbase_ops.add(item3_key, add_value); +``` + +6. **Upsert operations**: + - Upsert existing document (should update) + - Upsert new document (should create) + - Verify with Get operations + +**Function Signature:** +```cpp +// UPSERT operation - creates new document or updates existing one +Result upsert(const string& key, // Document key/ID + const string& value, // Document value (JSON string) + string collection_name = "_default"); // Collection name (optional, defaults to "_default") +``` + +**Usage Examples:** +```cpp +std::string upsert_key = "upsert_test"; +std::string upsert_value = R"({"operation": "upsert", "version": 1})"; + +// Upsert new document (will create) +brpc::CouchbaseOperations::Result upsert_result = couchbase_ops.upsert(upsert_key, upsert_value); + +// Upsert existing document (will update) +std::string updated_value = R"({"operation": "upsert", "version": 2})"; +brpc::CouchbaseOperations::Result update_result = couchbase_ops.upsert(upsert_key, updated_value); + +// Verify with GET +brpc::CouchbaseOperations::Result check_result = couchbase_ops.get(upsert_key); +``` + +7. **Delete operations**: + - Delete non-existent key (should fail gracefully) + - Delete existing key (should succeed) + +**Function Signature:** +```cpp +// DELETE operation - removes document by key +Result delete_(const string& key, // Document key/ID to delete + string collection_name = "_default"); // Collection name (optional, defaults to "_default") +``` + +**Usage Examples:** +```cpp +// Delete non-existent key +std::string delete_key = "non_existent_key"; +brpc::CouchbaseOperations::Result delete_result = couchbase_ops.delete_(delete_key); +if (!delete_result.success) { + std::cout << "Delete failed as expected: " << delete_result.error_message << std::endl; +} + +// Delete existing key +std::string delete_existing_key = "binprot_item1"; +brpc::CouchbaseOperations::Result delete_existing_result = couchbase_ops.delete_(delete_existing_key); +if (delete_existing_result.success) { + std::cout << "Delete existing key successful" << std::endl; +} +``` + +8. **Collection-scoped operations** - Add/Get/Upsert/Delete in specific collections. + +**Note:** All CRUD operations support an optional collection parameter. When not specified, operations default to the "_default" collection. + +**Usage Examples:** +```cpp +std::string collection_name = "testing_collection"; // Target collection name +std::string coll_key = "collection::doc1"; // Document key +std::string coll_value = R"({"collection_operation": "add", "scope": "custom"})"; // Document value + +// Collection-scoped ADD (key, value, collection_name) +brpc::CouchbaseOperations::Result coll_add_result = + couchbase_ops.add(coll_key, coll_value, collection_name); + +// Collection-scoped GET (key, collection_name) +brpc::CouchbaseOperations::Result coll_get_result = + couchbase_ops.get(coll_key, collection_name); + +// Collection-scoped UPSERT (key, value, collection_name) +brpc::CouchbaseOperations::Result coll_upsert_result = + couchbase_ops.upsert(coll_key, coll_value, collection_name); + +// Collection-scoped DELETE (key, collection_name) +brpc::CouchbaseOperations::Result coll_delete_result = + couchbase_ops.delete_(coll_key, collection_name); +``` + +9. **Pipeline operations demo**: + - Begin pipeline and add multiple operations + - Execute batch operations in single network call + - Process results in order + - Collection-scoped pipeline operations + - Error handling and cleanup + +**Function Signatures:** +```cpp +// Pipeline management functions +bool beginPipeline(); // Start a new pipeline session + +bool pipelineRequest(operation_type op_type, // Operation type (ADD, UPSERT, GET, DELETE, etc.) + const string& key, // Document key/ID + const string& value = "", // Document value (empty for GET/DELETE operations) + string collection_name = "_default"); // Collection name (optional) + +vector executePipeline(); // Execute all queued operations and return results + +bool clearPipeline(); // Clear pipeline without executing (cleanup) + +// Pipeline status functions +bool isPipelineActive() const; // Check if pipeline is active +size_t getPipelineSize() const; // Get number of queued operations +``` + +**Usage Examples:** +```cpp +// Begin pipeline +if (!couchbase_ops.beginPipeline()) { + std::cout << "Failed to begin pipeline" << std::endl; + return -1; +} + +// Add multiple operations to pipeline +std::string pipeline_key1 = "pipeline::doc1"; +std::string pipeline_key2 = "pipeline::doc2"; +std::string pipeline_value1 = R"({"operation": "pipeline_add", "id": 1})"; +std::string pipeline_value2 = R"({"operation": "pipeline_upsert", "id": 2})"; + +bool pipeline_success = true; +// pipelineRequest(operation_type, key, value, collection_name) +pipeline_success &= couchbase_ops.pipelineRequest(brpc::CouchbaseOperations::ADD, pipeline_key1, pipeline_value1); +pipeline_success &= couchbase_ops.pipelineRequest(brpc::CouchbaseOperations::UPSERT, pipeline_key2, pipeline_value2); +pipeline_success &= couchbase_ops.pipelineRequest(brpc::CouchbaseOperations::GET, pipeline_key1); // Empty value for GET + +if (!pipeline_success) { + couchbase_ops.clearPipeline(); // Clean up on error + return -1; +} + +// Execute pipeline - returns results in same order as requests +std::vector pipeline_results = couchbase_ops.executePipeline(); + +// Process results +for (size_t i = 0; i < pipeline_results.size(); ++i) { + if (pipeline_results[i].success) { + std::cout << "Operation " << (i + 1) << " SUCCESS"; + if (!pipeline_results[i].value.empty()) { + std::cout << " - Value: " << pipeline_results[i].value; + } + std::cout << std::endl; + } else { + std::cout << "Operation " << (i + 1) << " FAILED: " + << pipeline_results[i].error_message << std::endl; + } +} + +// Collection-scoped pipeline operations +if (couchbase_ops.beginPipeline()) { + // pipelineRequest(operation_type, key, value, collection_name) + couchbase_ops.pipelineRequest(brpc::CouchbaseOperations::ADD, "coll_pipeline::doc1", + R"({"collection_operation": "pipeline_add", "id": 1})", collection_name); + couchbase_ops.pipelineRequest(brpc::CouchbaseOperations::GET, "coll_pipeline::doc1", "", collection_name); + auto coll_results = couchbase_ops.executePipeline(); +} +``` + +10. **Bucket switching** - Demonstrate changing bucket selection. + +**Function Signature:** +```cpp +// SELECTBUCKET operation - switch to a different bucket on the same server +Result selectBucket(const string& bucket_name); // Target bucket name to switch to +``` + +**Usage Example:** +```cpp +std::string bucket_name = "testing"; +std::cout << "Enter Couchbase bucket name: "; +std::cin >> bucket_name; + +// selectBucket(bucket_name) - switches to the specified bucket +brpc::CouchbaseOperations::Result bucket_result = couchbase_ops.selectBucket(bucket_name); +if (!bucket_result.success) { + LOG(ERROR) << "Bucket selection failed: " << bucket_result.error_message; + return -1; +} else { + std::cout << "Bucket Selection Successful" << std::endl; +} + +// Perform operations on new bucket +performOperations(couchbase_ops); +``` + +#### Multithreaded Example (`multithreaded_couchbase_client.cpp`) +Demonstrates: +- **20 bthreads** (5 threads per bucket across 4 buckets) +- **Multiple threading patterns**: Each thread can create its own instance or share instances +- **Concurrent operations** across multiple buckets and collections +- **Thread-safe statistics tracking** for operations +- **Collection-scoped operations** across threads + +**Global Configuration**: +```cpp +const int NUM_THREADS = 20; +const int THREADS_PER_BUCKET = 5; + +// Global config structure +struct { + std::string username = "Administrator"; + std::string password = "password"; + std::vector bucket_names = {"t0", "t1", "t2", "t3"}; +} g_config; + +// Thread statistics tracking +struct ThreadStats { + std::atomic operations_attempted{0}; + std::atomic operations_successful{0}; + std::atomic operations_failed{0}; +}; + +struct GlobalStats { + ThreadStats total; + std::vector per_thread_stats; + GlobalStats() : per_thread_stats(NUM_THREADS) {} +} g_stats; +``` + +**Thread Worker Function**: +```cpp +struct ThreadArgs { + int thread_id; + int bucket_id; + std::string bucket_name; + ThreadStats* stats; +}; + +void* thread_worker(void* arg) { + ThreadArgs* args = static_cast(arg); + + // Create CouchbaseOperations instance for this thread + brpc::CouchbaseOperations couchbase_ops; + + // Authentication with assigned bucket + brpc::CouchbaseOperations::Result auth_result = couchbase_ops.authenticate( + g_config.username, g_config.password, "127.0.0.1:11210", args->bucket_name); + + // For SSL authentication: + // brpc::CouchbaseOperations::Result auth_result = couchbase_ops.authenticateSSL( + // g_config.username, g_config.password, "127.0.0.1:11207", args->bucket_name, "/path/to/cert.txt"); + + if (!auth_result.success) { + std::cout << "Thread " << args->thread_id << ": Auth failed - " + << auth_result.error_message << std::endl; + return NULL; + } + + // Perform CRUD operations on default collection + std::string base_key = "thread_" + std::to_string(args->thread_id); + perform_crud_operations_default(couchbase_ops, base_key, args->stats); + + // Perform collection-scoped operations + perform_crud_operations_collection(couchbase_ops, base_key, "my_collection", args->stats); + + return NULL; +} +``` + +**CRUD Operations Functions**: +```cpp +void perform_crud_operations_default(brpc::CouchbaseOperations& couchbase_ops, + const std::string& base_key, ThreadStats* stats) { + std::string key = base_key + "_default"; + std::string value = R"({"thread_id": %d, "collection": "default"})"; + + stats->operations_attempted++; + + // UPSERT operation + brpc::CouchbaseOperations::Result result = couchbase_ops.upsert(key, value); + if (result.success) { + stats->operations_successful++; + } else { + stats->operations_failed++; + } + + // GET operation + stats->operations_attempted++; + result = couchbase_ops.get(key); + if (result.success) { + stats->operations_successful++; + } else { + stats->operations_failed++; + } + + // DELETE operation + stats->operations_attempted++; + result = couchbase_ops.delete_(key); + if (result.success) { + stats->operations_successful++; + } else { + stats->operations_failed++; + } +} + +void perform_crud_operations_collection(brpc::CouchbaseOperations& couchbase_ops, + const std::string& base_key, + const std::string& collection_name, + ThreadStats* stats) { + std::string key = base_key + "_collection"; + std::string value = R"({"thread_id": %d, "collection": ")" + collection_name + R"("})"; + + // Collection-scoped operations + stats->operations_attempted++; + brpc::CouchbaseOperations::Result result = couchbase_ops.upsert(key, value, collection_name); + if (result.success) { + stats->operations_successful++; + } else { + stats->operations_failed++; + } + + stats->operations_attempted++; + result = couchbase_ops.get(key, collection_name); + if (result.success) { + stats->operations_successful++; + } else { + stats->operations_failed++; + } +} +``` + +**Main Function - Thread Management**: +```cpp +int main(int argc, char* argv[]) { + std::vector threads(NUM_THREADS); + std::vector thread_args(NUM_THREADS); + + // Create threads - 5 threads per bucket across 4 buckets + for (int i = 0; i < NUM_THREADS; ++i) { + thread_args[i].thread_id = i; + thread_args[i].bucket_id = i / THREADS_PER_BUCKET; + thread_args[i].bucket_name = g_config.bucket_names[thread_args[i].bucket_id]; + thread_args[i].stats = &g_stats.per_thread_stats[i]; + + if (bthread_start_background(&threads[i], NULL, thread_worker, &thread_args[i]) != 0) { + LOG(ERROR) << "Failed to create thread " << i; + return -1; + } + } + + // Wait for all threads to complete + for (int i = 0; i < NUM_THREADS; ++i) { + bthread_join(threads[i], NULL); + } + + // Aggregate and display statistics + g_stats.aggregate_stats(); + std::cout << "Total operations attempted: " << g_stats.total.operations_attempted.load() << std::endl; + std::cout << "Total operations successful: " << g_stats.total.operations_successful.load() << std::endl; + std::cout << "Total operations failed: " << g_stats.total.operations_failed.load() << std::endl; + + return 0; +} +``` + +**Alternative Pattern - Shared Instance Demo**: +```cpp +// Shared instance worker function +void* shared_object_thread_worker(void *arg) { + ThreadArgs* shared_args = static_cast(arg); + brpc::CouchbaseOperations* shared_couchbase_ops = shared_args->couchbase_ops; + + // Perform operations - 10 times on default collection, 10 times on col1 collection + for (int i = 0; i < 10; ++i) { + std::string base_key = butil::string_printf("shared_thread_op_%d_thread_id_%d", + i, shared_args->thread_id); + + // CRUD operations on default collection using shared instance + perform_crud_operations_default(*shared_couchbase_ops, base_key, shared_args->stats); + + // CRUD operations on col1 collection using shared instance + perform_crud_operations_col1(*shared_couchbase_ops, base_key, shared_args->stats); + + // Small delay between operations + bthread_usleep(10000); // 10ms + } + return NULL; +} + +// Main function demonstrates shared instance pattern +int main_shared_demo() { + // Create a shared CouchbaseOperations instance + brpc::CouchbaseOperations shared_couchbase_ops; + brpc::CouchbaseOperations::Result result; + + // Authenticate shared instance + result = shared_couchbase_ops.authenticate( + g_config.username, g_config.password, "127.0.0.1:11210", "t0"); + + if (result.success) { + std::cout << GREEN << "Shared CouchbaseOperations instance authenticated successfully!" + << RESET << std::endl; + } else { + std::cout << RED << "Shared CouchbaseOperations instance authentication failed: " + << result.error_message << RESET << std::endl; + return -1; + } + + // Configure all threads to use the shared instance + std::vector threads(NUM_THREADS); + std::vector args(NUM_THREADS); + + for (int i = 0; i < NUM_THREADS; ++i) { + args[i].thread_id = i; + args[i].couchbase_ops = &shared_couchbase_ops; // Point to shared instance + args[i].bucket_id = 0; + args[i].bucket_name = "t0"; // All threads use same bucket via shared instance + args[i].stats = &g_stats.per_thread_stats[i]; + } + + // Start all threads using shared instance + for (int i = 0; i < NUM_THREADS; ++i) { + if (bthread_start_background(&threads[i], NULL, shared_object_thread_worker, &args[i]) != 0) { + std::cout << RED << "Failed to create shared object thread " << i << RESET << std::endl; + return -1; + } + } + + // Wait for all threads to complete + for (int i = 0; i < NUM_THREADS; ++i) { + bthread_join(threads[i], NULL); + } + + std::cout << GREEN << "All shared object threads completed!" << RESET << std::endl; + return 0; +} +``` + +Key features: +- Demonstrates different connection patterns for multithreaded scenarios +- Shows concurrent access to different buckets and collections +- Proper resource management in multithreaded environments +- Statistics tracking across all threads +- Both separate instance and shared instance patterns + +--- +### 8. Building and Running the Examples + +#### Build both examples: +```bash +cd example/couchbase_c++/ +make +``` + +#### Run Single-Threaded Example (High-Level API): +```bash +./couchbase_client +``` + +#### Run Multithreaded Example (High-Level API): +```bash +./multithreaded_couchbase_client +``` + +#### Run Traditional bRPC Client (Low-Level API): +```bash +./traditional_brpc_couchbase_client +``` + +--- +### 9. Setting Up Couchbase + +#### A. Local Install (Non‑Docker) +Download from: https://www.couchbase.com/downloads/ (Community or Enterprise) and Install. + +Setup steps: +- Open http://localhost:8091 in a browser and follow setup wizard +- Set admin credentials (Administrator / password) +- Accept terms, choose services (Data, Query, Index at minimum) +- Initialize cluster +- Create a bucket (e.g. travel-sample or custom) + +Create collections (7.0+): +- Navigate: Buckets → Your Bucket → Scopes & Collections +- Add a Scope (optional) or use `_default` +- Add a Collection (e.g. `testing_collection`) + +**SSL Configuration (Optional for Local)**: +```cpp +// Local without SSL - authenticate with bucket selection +auto result = couchbase_ops.authenticate(username, password, "localhost:11210", bucket_name); +``` + +#### B. Couchbase Capella (Cloud) - **SSL Required** +1. Sign up / log in: https://cloud.couchbase.com/ +2. Create a Free Trial or Hosted Cluster +3. Create a bucket (or load sample dataset) +4. **Create database access credentials** with appropriate RBAC roles: + - Data Reader/Writer (minimum) + - Bucket Admin (for bucket operations) +5. **Download SSL Certificate**: + - Go to Cluster → Connect → Download Certificate + - Save as `couchbase-cloud-cert.pem` in your project directory +6. **Get connection endpoint**: + - Use the **KV endpoint** (port 11207 for SSL) + - Format: `your-cluster-id.cloud.couchbase.com:11207` + +**Capella SSL Authentication Example**: +```cpp +// Couchbase Capella - SSL is MANDATORY +auto result = couchbase_ops.authenticateSSL( + "your_username", + "your_password", + "your-cluster.cloud.couchbase.com:11207", // SSL port + "your_bucket_name", // bucket name + "couchbase-cloud-cert.pem" // certificate file +); +``` + +**Important Notes for Capella**: +- **SSL is mandatory** - connections without SSL will fail +- Use port **11207** (SSL) instead of 11210 (non-SSL) +- Certificate verification is required for security +- Ensure firewall allows outbound connections on port 11207 + +--- + +### 10. Error Handling Patterns + +#### High-Level API (Recommended) +The `CouchbaseOperations` class uses a simple `Result` struct: + +```cpp +struct Result { + bool success; // true if operation succeeded + string error_message; // human-readable error description + string value; // returned value (for Get operations) + uint16_t status_code; // Couchbase status code (0x00 if success) +}; +``` + +**Recommended Pattern**: +```cpp +auto result = couchbase_ops.add("key", "value"); +if (!result.success) { + LOG(ERROR) << "Add failed: " << result.error_message; + LOG(ERROR) << "Status code: " << result.status_code; + // Handle error appropriately +} else { + std::cout << "Add succeeded!" << std::endl; +} + +// For Get operations, check both success and value +auto get_result = couchbase_ops.get("key"); +if (get_result.success) { + std::cout << "Retrieved: " << get_result.value << std::endl; +} else { + LOG(ERROR) << "Get failed: " << get_result.error_message; + LOG(ERROR) << "Status code: " << get_result.status_code; +} +``` + +--- +### 11. Best Practices + +#### Threading Patterns +> **💡 FLEXIBLE THREADING OPTIONS** +> - **Same bucket/server**: Share a single `CouchbaseOperations` instance across threads +> - **Different buckets**: Create separate instances for each bucket within the same server +> - **Different servers**: Create separate instances for each server connection +> - **Connection isolation**: Each instance uses unique connection groups based on server+bucket combination + +#### SSL Security +- **Always use SSL for Couchbase Capella** (cloud deployments) +- **Verify certificates** - don't disable certificate validation in production +- **Use port 11207** for SSL connections +- **Store certificates securely** and update them when they expire + +#### Performance +- **Reuse `CouchbaseOperations` instances** - they maintain persistent connections +- **Use pipeline operations for bulk operations** +- **Pipeline operations preserve order** - results correspond to request order + +#### Threading Examples +```cpp +// Option 1: Shared instance for same bucket +brpc::CouchbaseOperations shared_ops; +shared_ops.authenticate(username, password, server_address, bucket_name); + +void worker_thread_1() { + shared_ops.add("key1", "value1"); // Safe to share +} +void worker_thread_2() { + shared_ops.get("key2"); // Safe to share +} + +// Option 2: Separate instances for different buckets +brpc::CouchbaseOperations ops_bucket1; +brpc::CouchbaseOperations ops_bucket2; +ops_bucket1.authenticate(username, password, server_address, "bucket1"); +ops_bucket2.authenticate(username, password, server_address, "bucket2"); + +// Option 3: Separate instances for different servers +brpc::CouchbaseOperations ops_server1; +brpc::CouchbaseOperations ops_server2; +ops_server1.authenticate(username, password, "server1:11210", bucket_name); +ops_server2.authenticate(username, password, "server2:11210", bucket_name); +``` + +--- +### 12. Summary and References +This implementation provides high-level APIs for Couchbase KV operations. Couchbase (the company) contributed to this implementation, but it is not officially supported; it is "[Community Supported](https://docs.couchbase.com/server/current/third-party/integrations.html#support-model)". + +--- + +## 💡 **THREADING USAGE PATTERNS** 💡 +> +> **✅ PATTERN 1: Shared instance when multiple threads operating on the same bucket** +> ```cpp +> brpc::CouchbaseOperations shared_ops; +> shared_ops.authenticate(username, password, "server:11210", "my_bucket"); +> +> void worker_thread_1() { +> shared_ops.add("key1", "value1"); // ✅ Safe to share +> } +> void worker_thread_2() { +> shared_ops.get("key2"); // ✅ Safe to share +> } +> ``` +> +> **✅ PATTERN 2: Separate instances when different threads will be operating on different buckets** +> ```cpp +> void worker_thread1() { +> brpc::CouchbaseOperations ops_bucket1; +> ops_bucket1.authenticate(username, password, "server:11210", "bucket1"); +> ops_bucket1.add("key1", "value1"); +> } +> void worker_thread2() { +> brpc::CouchbaseOperations ops_bucket2; +> ops_bucket2.authenticate(username, password, "server:11210", "bucket2"); +> ops_bucket2.add("key1", "value1"); +> } +> ``` +> +> **✅ PATTERN 3: Separate instances when threads are operating on different servers.** +> ```cpp +> void worker_thread1() { +> brpc::CouchbaseOperations ops_bucket1; +> ops_server1.authenticate(username, password, "server1:11210", "bucket1"); +> ops_server1.add("key1", "value1"); +> } +> void worker_thread2() { +> brpc::CouchbaseOperations ops_server2; +> ops_server2.authenticate(username, password, "server2:11210", "bucket2"); +> ops_server2.add("key1", "value1"); +> } +> ``` +> +> **For additional Couchbase features, consider the couchbase-cxx-SDK version of bRPC, which provides a more complete set of Couchbase features and can be accessed at [Couchbaselabs-cb-brpc](https://github.com/couchbaselabs/cb_brpc/tree/couchbase_sdk_brpc).** + + +Contributions and issue reports are welcome! diff --git a/example/couchbase_c++/Makefile b/example/couchbase_c++/Makefile new file mode 100644 index 0000000000..f41e4b6b72 --- /dev/null +++ b/example/couchbase_c++/Makefile @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +BRPC_PATH = ../../ +include $(BRPC_PATH)/config.mk +CXXFLAGS+=$(CPPFLAGS) -std=c++17 -DNDEBUG -O2 -pipe -W -Wall -fPIC -fno-omit-frame-pointer +HDRS+=$(BRPC_PATH)/output/include +LIBS+=$(BRPC_PATH)/output/lib +HDRPATHS = $(addprefix -I, $(HDRS)) +LIBPATHS = $(addprefix -L, $(LIBS)) +COMMA=, +SOPATHS=$(addprefix -Wl$(COMMA)-rpath$(COMMA), $(LIBS)) + +# Define targets and their sources +TARGETS = couchbase_client multithreaded_couchbase_client traditional_brpc_couchbase_client +COUCHBASE_CLIENT_OBJS = couchbase_client.o +MULTITHREADED_CLIENT_OBJS = multithreaded_couchbase_client.o +TRADITIONAL_CLIENT_OBJS = traditional_brpc_couchbase_client.o +ALL_OBJS = $(COUCHBASE_CLIENT_OBJS) $(MULTITHREADED_CLIENT_OBJS) $(TRADITIONAL_CLIENT_OBJS) + +ifeq ($(SYSTEM),Darwin) + ifneq ("$(LINK_SO)", "") + STATIC_LINKINGS += -lbrpc + else + # *.a must be explicitly specified in clang + STATIC_LINKINGS += $(BRPC_PATH)/output/lib/libbrpc.a + endif + LINK_OPTIONS_SO = $^ $(STATIC_LINKINGS) $(DYNAMIC_LINKINGS) + LINK_OPTIONS = $^ $(STATIC_LINKINGS) $(DYNAMIC_LINKINGS) +else ifeq ($(SYSTEM),Linux) + STATIC_LINKINGS += -lbrpc + LINK_OPTIONS_SO = -Xlinker "-(" $^ -Xlinker "-)" $(STATIC_LINKINGS) $(DYNAMIC_LINKINGS) + LINK_OPTIONS = -Xlinker "-(" $^ -Wl,-Bstatic $(STATIC_LINKINGS) -Wl,-Bdynamic -Xlinker "-)" $(DYNAMIC_LINKINGS) +endif + +.PHONY: all clean couchbase_client multithreaded_couchbase_client help + +# Default target builds both clients +all: $(TARGETS) + +clean: + @echo "> Cleaning" + rm -rf $(TARGETS) $(ALL_OBJS) + +# Build rules for individual targets +couchbase_client: $(COUCHBASE_CLIENT_OBJS) + @echo "> Linking $@" +ifneq ("$(LINK_SO)", "") + $(CXX) $(LIBPATHS) $(SOPATHS) $(LINK_OPTIONS_SO) -o $@ +else + $(CXX) $(LIBPATHS) $(LINK_OPTIONS) -o $@ +endif + +multithreaded_couchbase_client: $(MULTITHREADED_CLIENT_OBJS) + @echo "> Linking $@" +ifneq ("$(LINK_SO)", "") + $(CXX) $(LIBPATHS) $(SOPATHS) $(LINK_OPTIONS_SO) -o $@ +else + $(CXX) $(LIBPATHS) $(LINK_OPTIONS) -o $@ +endif + +traditional_brpc_couchbase_client: $(TRADITIONAL_CLIENT_OBJS) + @echo "> Linking $@" +ifneq ("$(LINK_SO)", "") + $(CXX) $(LIBPATHS) $(SOPATHS) $(LINK_OPTIONS_SO) -o $@ +else + $(CXX) $(LIBPATHS) $(LINK_OPTIONS) -o $@ +endif + +# Compilation rules +couchbase_client.o: couchbase_client.cpp + @echo "> Compiling $@" + $(CXX) -c $(HDRPATHS) $(CXXFLAGS) $< -o $@ + +multithreaded_couchbase_client.o: multithreaded_couchbase_client.cpp + @echo "> Compiling $@" + $(CXX) -c $(HDRPATHS) $(CXXFLAGS) $< -o $@ + +traditional_brpc_couchbase_client.o: traditional_brpc_couchbase_client.cpp + @echo "> Compiling $@" + $(CXX) -c $(HDRPATHS) $(CXXFLAGS) $< -o $@ \ No newline at end of file diff --git a/example/couchbase_c++/couchbase_client.cpp b/example/couchbase_c++/couchbase_client.cpp new file mode 100644 index 0000000000..b1dc90635c --- /dev/null +++ b/example/couchbase_c++/couchbase_client.cpp @@ -0,0 +1,451 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include +#include + +// ANSI color codes for console output +#define GREEN "\033[32m" +#define RED "\033[31m" +#define RESET "\033[0m" + +DEFINE_string(server, "localhost:11210", "IP Address of server"); +int performOperations(brpc::CouchbaseOperations& couchbase_ops) { + std::string add_key = "user::test_brpc_binprot"; + std::string add_value = + R"({"name": "John Doe", "age": 30, "email": "john@example.com"})"; + + brpc::CouchbaseOperations::Result add_result = + couchbase_ops.add(add_key, add_value); + if (add_result.success) { + std::cout << GREEN << "ADD operation successful" << RESET << std::endl; + } else { + std::cout << RED << "ADD operation failed: " << add_result.error_message + << RESET << std::endl; + } + + // Try to ADD the same key again (should fail with key exists) + brpc::CouchbaseOperations::Result add_result2 = + couchbase_ops.add(add_key, add_value); + if (add_result2.success) { + std::cout << GREEN << "Second ADD operation unexpectedly successful" + << RESET << std::endl; + } else { + std::cout << RED << "Second ADD operation failed as expected: " + << add_result2.error_message << RESET << std::endl; + } + // Get operation using high-level method + brpc::CouchbaseOperations::Result get_result = couchbase_ops.get(add_key); + if (get_result.success) { + std::cout << GREEN << "GET operation successful" << RESET << std::endl; + std::cout << "GET value: " << get_result.value << std::endl; + } else { + std::cout << RED << "GET operation failed: " << get_result.error_message + << RESET << std::endl; + } + + // Add binprot item1 using high-level method + std::string item1_key = "binprot_item1"; + brpc::CouchbaseOperations::Result item1_result = + couchbase_ops.add(item1_key, add_value); + if (item1_result.success) { + std::cout << GREEN << "ADD binprot item1 successful" << RESET << std::endl; + } else { + std::cout << RED + << "ADD binprot item1 failed: " << item1_result.error_message + << RESET << std::endl; + } + + // Add binprot item2 using high-level method + std::string item2_key = "binprot_item2"; + brpc::CouchbaseOperations::Result item2_result = + couchbase_ops.add(item2_key, add_value); + if (item2_result.success) { + std::cout << GREEN << "ADD binprot item2 successful" << RESET << std::endl; + } else { + std::cout << RED + << "ADD binprot item2 failed: " << item2_result.error_message + << RESET << std::endl; + } + + // Add binprot item3 using high-level method + std::string item3_key = "binprot_item3"; + brpc::CouchbaseOperations::Result item3_result = + couchbase_ops.add(item3_key, add_value); + if (item3_result.success) { + std::cout << GREEN << "ADD binprot item3 successful" << RESET << std::endl; + } else { + std::cout << RED + << "ADD binprot item3 failed: " << item3_result.error_message + << RESET << std::endl; + } + + // Perform an UPSERT on the existing key using high-level method + std::string upsert_key = "user::test_brpc_binprot"; + std::string upsert_value = + R"({"name": "Upserted Jane Doe", "age": 28, "email": "upserted.doe@example.com"})"; + brpc::CouchbaseOperations::Result upsert_result = + couchbase_ops.upsert(upsert_key, upsert_value); + if (upsert_result.success) { + std::cout + << GREEN + << "UPSERT operation successful when the document exists in the server" + << RESET << std::endl; + } else { + std::cout + << RED + << "UPSERT operation failed when the document exists in the server: " + << upsert_result.error_message << RESET << std::endl; + } + // Do UPSERT operation on a new document using high-level method + std::string new_upsert_key = "user::test_brpc_new_upsert"; + std::string new_upsert_value = + R"({"name": "Jane Doe", "age": 28, "email": "jane.doe@example.com"})"; + brpc::CouchbaseOperations::Result new_upsert_result = + couchbase_ops.upsert(new_upsert_key, new_upsert_value); + if (new_upsert_result.success) { + std::cout << GREEN + << "UPSERT operation successful when the document doesn't exist " + "in the server" + << RESET << std::endl; + } else { + std::cout << RED + << "UPSERT operation failed when document does not exist in the " + "server: " + << new_upsert_result.error_message << RESET << std::endl; + } + + // Check the upserted data using high-level method + std::string check_key = "user::test_brpc_new_upsert"; + brpc::CouchbaseOperations::Result check_result = couchbase_ops.get(check_key); + if (check_result.success) { + std::cout << GREEN << "GET after UPSERT operation successful - Value: " + << check_result.value << RESET << std::endl; + } else { + std::cout << RED << "GET after UPSERT operation failed: " + << check_result.error_message << RESET << std::endl; + } + + // Delete a non-existent key using high-level method + std::string delete_key = "Nonexistent_key"; + brpc::CouchbaseOperations::Result delete_result = + couchbase_ops.delete_(delete_key); + if (delete_result.success) { + std::cout << GREEN << "DELETE operation successful" << RESET << std::endl; + } else { + std::cout << RED << "DELETE operation failed: as expected " + << delete_result.error_message << RESET << std::endl; + } + + // Delete the existing key using high-level method + std::string delete_existing_key = "user::test_brpc_binprot"; + brpc::CouchbaseOperations::Result delete_existing_result = + couchbase_ops.delete_(delete_existing_key); + if (delete_existing_result.success) { + std::cout << GREEN << "DELETE operation successful" << RESET << std::endl; + } else { + std::cout << RED << "DELETE operation failed: " + << delete_existing_result.error_message << RESET << std::endl; + } + + // Retrieve Collection ID for scope `_default` and collection + // `col1` + const std::string scope_name = "_default"; // default scope + std::string collection_name = "col1"; // target collection + // ------------------------------------------------------------------ + // Collection-scoped CRUD operations (only if collection id was retrieved) + // ------------------------------------------------------------------ + // 1. ADD in collection using high-level method + std::string coll_key = "user::collection_doc"; + std::string coll_value = R"({"type":"collection","op":"add","v":1})"; + brpc::CouchbaseOperations::Result coll_add_result = + couchbase_ops.add(coll_key, coll_value, collection_name); + if (coll_add_result.success) { + std::cout << GREEN << "Collection ADD success" << RESET << std::endl; + } else { + std::cout << RED + << "Collection ADD failed: " << coll_add_result.error_message + << RESET << std::endl; + } + // 2. GET from collection using high-level method + brpc::CouchbaseOperations::Result coll_get_result = + couchbase_ops.get(coll_key, collection_name); + if (coll_get_result.success) { + std::cout << GREEN + << "Collection GET success value=" << coll_get_result.value + << RESET << std::endl; + } else { + std::cout << RED + << "Collection GET failed: " << coll_get_result.error_message + << RESET << std::endl; + } + + // 3. UPSERT in collection using high-level method + std::string coll_upsert_value = + R"({"type":"collection","op":"upsert","v":2})"; + brpc::CouchbaseOperations::Result coll_upsert_result = + couchbase_ops.upsert(coll_key, coll_upsert_value, collection_name); + if (coll_upsert_result.success) { + std::cout << GREEN << "Collection UPSERT success" << RESET << std::endl; + } else { + std::cout << RED << "Collection UPSERT failed: " + << coll_upsert_result.error_message << RESET << std::endl; + } + + // 4. GET again to verify upsert using high-level method + brpc::CouchbaseOperations::Result coll_get2_result = + couchbase_ops.get(coll_key, collection_name); + if (coll_get2_result.success) { + std::cout << GREEN + << "Collection GET(after upsert) value=" << coll_get2_result.value + << RESET << std::endl; + } + + // 5. DELETE from collection using high-level method + brpc::CouchbaseOperations::Result coll_del_result = + couchbase_ops.delete_(coll_key, collection_name); + if (coll_del_result.success) { + std::cout << GREEN << "Collection DELETE success" << RESET << std::endl; + } else { + std::cout << RED + << "Collection DELETE failed: " << coll_del_result.error_message + << RESET << std::endl; + } + + // ------------------------------------------------------------------ + // Pipeline Operations Demo + // ------------------------------------------------------------------ + std::cout << GREEN << "\n=== Pipeline Operations Demo ===" << RESET + << std::endl; + + // Begin a new pipeline + if (!couchbase_ops.beginPipeline()) { + std::cout << RED << "Failed to begin pipeline" << RESET << std::endl; + return -1; + } + + std::cout << "Pipeline started. Adding multiple operations..." << std::endl; + + // Add multiple operations to the pipeline + std::string pipeline_key1 = "pipeline::doc1"; + std::string pipeline_key2 = "pipeline::doc2"; + std::string pipeline_key3 = "pipeline::doc3"; + std::string pipeline_value1 = R"({"operation": "pipeline_add", "id": 1})"; + std::string pipeline_value2 = R"({"operation": "pipeline_upsert", "id": 2})"; + std::string pipeline_value3 = R"({"operation": "pipeline_add", "id": 3})"; + + // Pipeline operations - all prepared but not yet executed + bool pipeline_success = true; + pipeline_success &= couchbase_ops.pipelineRequest( + brpc::CouchbaseOperations::ADD, pipeline_key1, pipeline_value1); + pipeline_success &= couchbase_ops.pipelineRequest( + brpc::CouchbaseOperations::UPSERT, pipeline_key2, pipeline_value2); + pipeline_success &= couchbase_ops.pipelineRequest( + brpc::CouchbaseOperations::ADD, pipeline_key3, pipeline_value3); + pipeline_success &= couchbase_ops.pipelineRequest( + brpc::CouchbaseOperations::GET, pipeline_key1); + pipeline_success &= couchbase_ops.pipelineRequest( + brpc::CouchbaseOperations::GET, pipeline_key2); + + if (!pipeline_success) { + std::cout << RED << "Failed to add operations to pipeline" << RESET + << std::endl; + couchbase_ops.clearPipeline(); + return -1; + } + + std::cout << "Added " << couchbase_ops.getPipelineSize() + << " operations to pipeline" << std::endl; + + // Execute all operations in a single network call + std::cout << "Executing pipeline operations..." << std::endl; + std::vector pipeline_results = + couchbase_ops.executePipeline(); + + // Process results in order + std::cout << GREEN << "Pipeline execution completed. Results:" << RESET + << std::endl; + for (size_t i = 0; i < pipeline_results.size(); ++i) { + const auto& result = pipeline_results[i]; + if (result.success) { + if (!result.value.empty()) { + std::cout << GREEN << " Operation " << (i + 1) + << " SUCCESS - Value: " << result.value << RESET << std::endl; + } else { + std::cout << GREEN << " Operation " << (i + 1) << " SUCCESS" << RESET + << std::endl; + } + } else { + std::cout << RED << " Operation " << (i + 1) + << " FAILED: " << result.error_message << RESET << std::endl; + } + } + + // Demonstrate pipeline with collection operations + std::cout << GREEN << "\n=== Pipeline with Collection Operations ===" << RESET + << std::endl; + + if (!couchbase_ops.beginPipeline()) { + std::cout << RED << "Failed to begin collection pipeline" << RESET + << std::endl; + return -1; + } + + std::string coll_pipeline_key1 = "coll_pipeline::doc1"; + std::string coll_pipeline_key2 = "coll_pipeline::doc2"; + std::string coll_pipeline_value1 = + R"({"collection_operation": "pipeline_add", "id": 1})"; + std::string coll_pipeline_value2 = + R"({"collection_operation": "pipeline_upsert", "id": 2})"; + + // Add collection-scoped operations to pipeline + bool coll_pipeline_success = true; + coll_pipeline_success &= couchbase_ops.pipelineRequest( + brpc::CouchbaseOperations::ADD, coll_pipeline_key1, coll_pipeline_value1, + collection_name); + coll_pipeline_success &= couchbase_ops.pipelineRequest( + brpc::CouchbaseOperations::UPSERT, coll_pipeline_key2, + coll_pipeline_value2, collection_name); + coll_pipeline_success &= couchbase_ops.pipelineRequest( + brpc::CouchbaseOperations::GET, coll_pipeline_key1, "", collection_name); + coll_pipeline_success &= + couchbase_ops.pipelineRequest(brpc::CouchbaseOperations::DELETE, + coll_pipeline_key1, "", collection_name); + + if (!coll_pipeline_success) { + std::cout << RED << "Failed to add collection operations to pipeline" + << RESET << std::endl; + couchbase_ops.clearPipeline(); + return -1; + } + + // Execute collection pipeline + std::vector coll_pipeline_results = + couchbase_ops.executePipeline(); + + std::cout << GREEN + << "Collection pipeline execution completed. Results:" << RESET + << std::endl; + for (size_t i = 0; i < coll_pipeline_results.size(); ++i) { + const auto& result = coll_pipeline_results[i]; + if (result.success) { + if (!result.value.empty()) { + std::cout << GREEN << " Collection Operation " << (i + 1) + << " SUCCESS - Value: " << result.value << RESET << std::endl; + } else { + std::cout << GREEN << " Collection Operation " << (i + 1) << " SUCCESS" + << RESET << std::endl; + } + } else { + std::cout << RED << " Collection Operation " << (i + 1) + << " FAILED: " << result.error_message << RESET << std::endl; + } + } + + // Clean up remaining pipeline documents + std::cout << GREEN << "\n=== Cleanup Pipeline Demo ===" << RESET << std::endl; + if (couchbase_ops.beginPipeline()) { + couchbase_ops.pipelineRequest(brpc::CouchbaseOperations::DELETE, + pipeline_key1); + couchbase_ops.pipelineRequest(brpc::CouchbaseOperations::DELETE, + pipeline_key2); + couchbase_ops.pipelineRequest(brpc::CouchbaseOperations::DELETE, + pipeline_key3); + couchbase_ops.pipelineRequest(brpc::CouchbaseOperations::DELETE, + coll_pipeline_key2, "", collection_name); + + std::vector cleanup_results = + couchbase_ops.executePipeline(); + std::cout << "Cleanup completed (" << cleanup_results.size() + << " operations)" << std::endl; + } + + std::cout << GREEN + << "\n=== All operations completed successfully! ===" << RESET + << std::endl; +} +int main() { + // Create CouchbaseOperations instance for high-level operations + brpc::CouchbaseOperations couchbase_ops; + + // std::cout << GREEN << "Using high-level CouchbaseOperations interface" + // << RESET << std::endl; + + // Ask username and password for authentication + std::string username = "Administrator"; + std::string password = "password"; + while (username.empty() || password.empty()) { + std::cout << "Enter Couchbase username: "; + std::cin >> username; + if (username.empty()) { + std::cout << "Username cannot be empty. Please enter again." << std::endl; + continue; + } + std::cout << "Enter Couchbase password: "; + std::cin >> password; + if (password.empty()) { + std::cout << "Password cannot be empty. Please enter again." << std::endl; + continue; + } + } + + // Use high-level authentication method + // when connecting to capella use couchbase_ops.authenticate(username, + // password, FLAGS_server, true, "path/to/cert.txt"); + brpc::CouchbaseOperations::Result auth_result = + couchbase_ops.authenticate(username, password, FLAGS_server, "testing"); + if (!auth_result.success) { + LOG(ERROR) << "Authentication failed: " << auth_result.error_message; + return -1; + } + + std::cout + << GREEN + << "Authentication successful, proceeding with Couchbase operations..." + << RESET << std::endl; + + performOperations(couchbase_ops); + + // Change bucket Selection + std::string bucket_name = "testing"; + while (bucket_name.empty()) { + std::cout << "Enter Couchbase bucket name: "; + std::cin >> bucket_name; + if (bucket_name.empty()) { + std::cout << "Bucket name cannot be empty. Please enter again." + << std::endl; + continue; + } + } + + // Use high-level bucket selection method + brpc::CouchbaseOperations::Result bucket_result = + couchbase_ops.selectBucket(bucket_name); + if (!bucket_result.success) { + LOG(ERROR) << "Bucket selection failed: " << bucket_result.error_message; + return -1; + } else { + std::cout << GREEN << "Bucket Selection Successful" << RESET << std::endl; + } + // Add operation using high-level method + performOperations(couchbase_ops); + return 0; +} diff --git a/example/couchbase_c++/multithreaded_couchbase_client.cpp b/example/couchbase_c++/multithreaded_couchbase_client.cpp new file mode 100644 index 0000000000..d6dd9e56be --- /dev/null +++ b/example/couchbase_c++/multithreaded_couchbase_client.cpp @@ -0,0 +1,375 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include + +#include +#include +#include +#include + +// ANSI color codes +#define GREEN "\033[32m" +#define RED "\033[31m" +#define BLUE "\033[34m" +#define YELLOW "\033[33m" +#define CYAN "\033[36m" +#define RESET "\033[0m" + +const int NUM_THREADS = 20; +const int THREADS_PER_BUCKET = 5; + +// Simple global config +struct { + std::string username = "Administrator"; + std::string password = "password"; + std::vector bucket_names = {"t0", "t1", "t2", "t3"}; +} g_config; + +// Simple thread statistics +struct ThreadStats { + std::atomic operations_attempted{0}; + std::atomic operations_successful{0}; + std::atomic operations_failed{0}; + + void reset() { + operations_attempted = 0; + operations_successful = 0; + operations_failed = 0; + } +}; + +// Global statistics +struct GlobalStats { + ThreadStats total; + std::vector per_thread_stats; + + GlobalStats() : per_thread_stats(NUM_THREADS) {} + + void aggregate_stats() { + total.reset(); + for (const auto& stats : per_thread_stats) { + total.operations_attempted += stats.operations_attempted.load(); + total.operations_successful += stats.operations_successful.load(); + total.operations_failed += stats.operations_failed.load(); + } + } +} g_stats; + +// Simple thread arguments +struct ThreadArgs { + int thread_id; + int bucket_id; + std::string bucket_name; + brpc::CouchbaseOperations* couchbase_ops; + ThreadStats* stats; +}; + +// Simple CRUD operations on default collection +void perform_crud_operations_default(brpc::CouchbaseOperations& couchbase_ops, + const std::string& base_key, + ThreadStats* stats) { + std::string key = base_key + "_default"; + std::string value = butil::string_printf( + R"({"thread_id": %d, "collection": "default"})", (int)bthread_self()); + + stats->operations_attempted++; + + // UPSERT + brpc::CouchbaseOperations::Result result = couchbase_ops.upsert(key, value); + if (result.success) { + stats->operations_successful++; + } else { + stats->operations_failed++; + return; + } + + stats->operations_attempted++; + + // GET + result = couchbase_ops.get(key); + if (result.success) { + stats->operations_successful++; + } else { + stats->operations_failed++; + return; + } + + stats->operations_attempted++; + + // DELETE + result = couchbase_ops.delete_(key); + if (result.success) { + stats->operations_successful++; + } else { + stats->operations_failed++; + } +} + +// Simple CRUD operations on col1 collection +void perform_crud_operations_col1(brpc::CouchbaseOperations& couchbase_ops, + const std::string& base_key, + ThreadStats* stats) { + std::string key = base_key + "_col1"; + std::string value = butil::string_printf( + R"({"thread_id": %d, "collection": "col1"})", (int)bthread_self()); + + stats->operations_attempted++; + + // UPSERT + brpc::CouchbaseOperations::Result result = + couchbase_ops.upsert(key, value, "col1"); + if (result.success) { + stats->operations_successful++; + } else { + stats->operations_failed++; + std::cout << "UPSERT failed: " << result.error_message << std::endl; + return; + } + + stats->operations_attempted++; + + // GET + result = couchbase_ops.get(key, "col1"); + if (result.success) { + stats->operations_successful++; + } else { + stats->operations_failed++; + std::cout << "GET failed: " << result.error_message << std::endl; + return; + } + + stats->operations_attempted++; + + // DELETE + result = couchbase_ops.delete_(key, "col1"); + if (result.success) { + stats->operations_successful++; + } else { + stats->operations_failed++; + } +} + +// Simple thread worker function +void* thread_worker(void* arg) { + ThreadArgs* args = static_cast(arg); + + std::cout << CYAN << "Thread " << args->thread_id << " starting on bucket " + << args->bucket_name << RESET << std::endl; + + // Create CouchbaseOperations instance for this thread + brpc::CouchbaseOperations couchbase_ops; + + // Authentication + brpc::CouchbaseOperations::Result auth_result = couchbase_ops.authenticate( + g_config.username, g_config.password, "127.0.0.1:11210", args->bucket_name); + // for SSL authentication use below line instead + // brpc::CouchbaseOperations::Result auth_result = couchbase_ops.authenticateSSL(username, password, "127.0.0.1:11210", args->bucket_name, "/path/to/cert.txt"); + + if (!auth_result.success) { + std::cout << RED << "Thread " << args->thread_id << ": Auth failed - " + << auth_result.error_message << RESET << std::endl; + return NULL; + } + + // // Select bucket + // brpc::CouchbaseOperations::Result bucket_result = + // couchbase_ops.selectBucket(args->bucket_name); + + // if (!bucket_result.success) { + // std::cout << RED << "Thread " << args->thread_id + // << ": Bucket selection failed - " << bucket_result.error_message + // << RESET << std::endl; + // return NULL; + // } + + std::cout << GREEN << "Thread " << args->thread_id << " connected to bucket " + << args->bucket_name << RESET << std::endl; + + // Perform operations - 10 times on default collection, 10 times on col1 + // collection + for (int i = 0; i < 10; ++i) { + std::string base_key = + butil::string_printf("thread_%d_op_%d", args->thread_id, i); + + // CRUD operations on default collection + perform_crud_operations_default(couchbase_ops, base_key, args->stats); + + // CRUD operations on col1 collection + perform_crud_operations_col1(couchbase_ops, base_key, args->stats); + + // Small delay between operations + bthread_usleep(10000); // 10ms + } + + int successful = args->stats->operations_successful.load(); + int attempted = args->stats->operations_attempted.load(); + int failed = args->stats->operations_failed.load(); + + std::cout << GREEN << "Thread " << args->thread_id + << " completed: " << successful << "/" << attempted + << " operations successful, " << failed << " failed" << RESET + << std::endl; + + return NULL; +} + +void* shared_object_thread_worker(void *arg){ + ThreadArgs* shared_args = static_cast(arg); + brpc::CouchbaseOperations* shared_couchbase_ops = shared_args->couchbase_ops; + // Perform operations - 10 times on default collection, 10 times on col1 + // collection + for (int i = 0; i < 10; ++i) { + std::string base_key = + butil::string_printf("shared_thread_op_%d_thread_id_%d", i, shared_args->thread_id); + // CRUD operations on default collection + perform_crud_operations_default(*shared_couchbase_ops, base_key, shared_args->stats); + // CRUD operations on col1 collection + perform_crud_operations_col1(*shared_couchbase_ops, base_key, shared_args->stats); + // Small delay between operations + bthread_usleep(10000); // 10ms + } + return NULL; +} + +// Print simple statistics +void print_stats() { + g_stats.aggregate_stats(); + + std::cout << std::endl; + std::cout << CYAN << "=== TEST RESULTS ===" << RESET << std::endl; + + int total_attempted = g_stats.total.operations_attempted.load(); + int total_successful = g_stats.total.operations_successful.load(); + int total_failed = g_stats.total.operations_failed.load(); + + double success_rate = total_attempted > 0 + ? (double)total_successful / total_attempted * 100.0 + : 0.0; + + std::cout << GREEN << "Overall Performance:" << RESET << std::endl; + std::cout << " Total Operations: " << total_attempted << std::endl; + std::cout << " Successful: " << total_successful << " (" << success_rate + << "%)" << std::endl; + std::cout << " Failed: " << total_failed << std::endl; + std::cout << std::endl; + + // Per-thread breakdown + std::cout << YELLOW << "Per-Thread Performance:" << RESET << std::endl; + for (int i = 0; i < NUM_THREADS; ++i) { + const auto& stats = g_stats.per_thread_stats[i]; + int attempted = stats.operations_attempted.load(); + int successful = stats.operations_successful.load(); + int failed = stats.operations_failed.load(); + + std::cout << " Thread " << i << ": " << attempted << " ops, " << successful + << " success, " << failed << " failed" << std::endl; + } + std::cout << std::endl; +} + +int main(int argc, char* argv[]) { + std::cout << GREEN << "Starting Simple Multithreaded Couchbase Client" + << RESET << std::endl; + std::cout + << YELLOW + << "20 threads: 5 per bucket (testing0, testing1, testing2, testing3)" + << RESET << std::endl; + std::cout << BLUE + << "Each thread performs CRUD operations on default collection and " + "col1 collection" + << RESET << std::endl; + + // Create threads and arguments + std::vector threads(NUM_THREADS); + std::vector args(NUM_THREADS); + + // Assign threads to buckets + for (int i = 0; i < NUM_THREADS; ++i) { + args[i].thread_id = i; + args[i].bucket_id = i / THREADS_PER_BUCKET; + args[i].bucket_name = g_config.bucket_names[args[i].bucket_id]; + args[i].stats = &g_stats.per_thread_stats[i]; + } + + // Print thread assignments + std::cout << "Thread Assignments:" << RESET << std::endl; + for (int i = 0; i < NUM_THREADS; ++i) { + std::cout << "Thread " << i << " -> Bucket: " << args[i].bucket_name + << std::endl; + } + std::cout << std::endl; + + // Start all threads + for (int i = 0; i < NUM_THREADS; ++i) { + if (bthread_start_background(&threads[i], NULL, thread_worker, &args[i]) != + 0) { + std::cout << RED << "Failed to create thread " << i << RESET << std::endl; + return -1; + } + } + + std::cout << GREEN << "All 20 threads started!" << RESET << std::endl; + + // Wait for all threads to complete + std::cout << YELLOW << "Waiting for all threads to complete..." << RESET + << std::endl; + for (int i = 0; i < NUM_THREADS; ++i) { + bthread_join(threads[i], NULL); + } + + std::cout << GREEN << "All threads completed!" << RESET << std::endl; + // create a shared CouchbaseOperations instance + brpc::CouchbaseOperations shared_couchbase_ops; + brpc::CouchbaseOperations::Result result; + result = shared_couchbase_ops.authenticate( + g_config.username, g_config.password, "127.0.0.1:11210", "t0"); + if(result.success){ + std::cout << GREEN << "Shared CouchbaseOperations instance authenticated successfully!" << RESET << std::endl; + } else { + std::cout << RED << "Shared CouchbaseOperations instance authentication failed: " << result.error_message << RESET << std::endl; + return -1; + } + + for (int i = 0; i < NUM_THREADS; ++i) { + args[i].thread_id = i; + args[i].couchbase_ops = &shared_couchbase_ops; + args[i].bucket_id = 0; + args[i].bucket_name = "t0"; + // args[i].stats = &g_stats.per_thread_stats[i]; + } + + for(int i = 0; i < NUM_THREADS; ++i){ + if (bthread_start_background(&threads[i], NULL, shared_object_thread_worker, &args[i]) != + 0) { + std::cout << RED << "Failed to create shared object thread " << i << RESET << std::endl; + return -1; + } + } + for(int i = 0; i < NUM_THREADS; ++i){ + bthread_join(threads[i], NULL); + } + std::cout << GREEN << "All shared object threads completed!" << RESET << std::endl; + + // Print statistics + print_stats(); + + return 0; +} diff --git a/example/couchbase_c++/traditional_brpc_couchbase_client.cpp b/example/couchbase_c++/traditional_brpc_couchbase_client.cpp new file mode 100644 index 0000000000..c63c98cd4f --- /dev/null +++ b/example/couchbase_c++/traditional_brpc_couchbase_client.cpp @@ -0,0 +1,171 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include +#include + +// ANSI color codes for console output +#define GREEN "\033[32m" +#define RED "\033[31m" +#define RESET "\033[0m" + +int main() { + // traditional bRPC Couchbase client + brpc::Channel channel; + brpc::ChannelOptions options; + options.protocol = brpc::PROTOCOL_COUCHBASE; + options.connection_type = "single"; + options.timeout_ms = 1000; // 1 second + options.max_retry = 3; + if (channel.Init("localhost:11210", &options) != 0) { + LOG(ERROR) << "Failed to initialize channel"; + return -1; + } + brpc::Controller cntl; + brpc::CouchbaseOperations::CouchbaseRequest req; + brpc::CouchbaseOperations::CouchbaseResponse res; + uint64_t cas; + req.authenticateRequest("Administrator", "password"); + channel.CallMethod(NULL, &cntl, &req, &res, NULL); + if (cntl.Failed()) { + LOG(ERROR) << "Unable to authenticate: Something went wrong" + << cntl.ErrorText(); + return -1; + } else { + if (res.popHello(&cas) && res.popAuthenticate(&cas)) { + std::cout << "Traditional bRPC Couchbase Client Authentication Successful" + << std::endl; + } else { + std::cout << "Client Authentication Failed with status code: " << std::hex + << res._status_code << std::endl; + return -1; + } + } + cntl.Reset(); + // clearing request and response + + req.Clear(); + res.Clear(); + req.selectBucketRequest("testing"); + channel.CallMethod(NULL, &cntl, &req, &res, NULL); + if (cntl.Failed()) { + LOG(ERROR) << "Unable to select bucket: Something went wrong" + << cntl.ErrorText(); + return -1; + } else { + if (res.popSelectBucket(&cas)) { + std::cout + << "Traditional bRPC Couchbase Client Bucket Selection Successful" + << std::endl; + } else { + // the status code will be updated only after you do + // popFunctionName(param). + std::cout << "Traditional bRPC Couchbase Client Bucket Selection Failed " + "with status code: " + << std::hex << res._status_code << std::endl; + std::cout << "Error Message: " << res.lastError() << std::endl; + return -1; + } + } + cntl.Reset(); + // clearing request and response + + req.Clear(); + res.Clear(); + req.addRequest( + "sample_key", + R"({"name": "John Doe", "age": 30, "email": "john@example.com"})", + 0 /*flags*/, 0 /*exptime*/, 0 /*cas*/); + channel.CallMethod(NULL, &cntl, &req, &res, NULL); + if (cntl.Failed()) { + LOG(ERROR) << "Unable to add key-value: Something went wrong" + << cntl.ErrorText(); + return -1; + } else { + if (res.popAdd(&cas)) { + std::cout + << "Traditional bRPC Couchbase Client Key-Value Addition Successful" + << std::endl; + } else { + // the status code will be updated only after you do + // popFunctionName(param). + std::cout << "Traditional bRPC Couchbase Client Key-Value Addition " + "Failed with status code: " + << std::hex << res._status_code << std::endl; + std::cout << "Error Message: " << res.lastError() << std::endl; + return -1; + } + } + cntl.Reset(); + + // clearing request and response before doing a getRequest + req.Clear(); + res.Clear(); + req.getRequest("sample_key"); + channel.CallMethod(NULL, &cntl, &req, &res, NULL); + if (cntl.Failed()) { + LOG(ERROR) << "Unable to get value for key: Something went wrong" + << cntl.ErrorText(); + return -1; + } else { + std::string value; + uint32_t flags; + if (res.popGet(&value, &flags, &cas)) { + std::cout + << "Traditional bRPC Couchbase Client Key-Value Retrieval Successful" + << std::endl; + std::cout << "Retrieved Value: " << value << std::endl; + } else { + // note the status code will be updated only after you do + // popFunctionName(param). + std::cout << "Traditional bRPC Couchbase Client Key-Value Retrieval " + "Failed with status code: " + << std::hex << res._status_code << std::endl; + std::cout << "Error Message: " << res.lastError() << std::endl; + return -1; + } + } + cntl.Reset(); + // clearing request and response before doing a deleteRequest + + req.Clear(); + res.Clear(); + req.deleteRequest("sample_key"); + channel.CallMethod(NULL, &cntl, &req, &res, NULL); + if (cntl.Failed()) { + LOG(ERROR) << "Unable to delete key-value: Something went wrong" + << cntl.ErrorText(); + return -1; + } else { + if (res.popDelete()) { + std::cout + << "Traditional bRPC Couchbase Client Key-Value Deletion Successful" + << std::endl; + } else { + // the status code will be updated only after you do + // popFunctionName(param). + std::cout << "Traditional bRPC Couchbase Client Key-Value Deletion " + "Failed with status code: " + << std::hex << res._status_code << std::endl; + std::cout << "Error Message: " << res.lastError() << std::endl; + return -1; + } + } + return 0; +} \ No newline at end of file diff --git a/src/brpc/couchbase.cpp b/src/brpc/couchbase.cpp new file mode 100644 index 0000000000..52e16dc9c7 --- /dev/null +++ b/src/brpc/couchbase.cpp @@ -0,0 +1,2634 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "brpc/couchbase.h" + +#include //for crc32 Vbucket_id + +// Debug flag for enabling debug statements +static bool DBUG = false; // Set to true to enable debug logs + +// Debug print macro +#define DEBUG_PRINT(msg) \ + do { \ + if (DBUG) { \ + std::cout << "[DEBUG] " << msg << std::endl; \ + } \ + } while (0) + +#include + +#include "brpc/policy/couchbase_protocol.h" +#include "brpc/proto_base.pb.h" +#include "butil/logging.h" +#include "butil/macros.h" +#include "butil/string_printf.h" +#include "butil/sys_byteorder.h" +#include "butil/third_party/rapidjson/document.h" +#include "butil/third_party/rapidjson/rapidjson.h" + +namespace brpc { + +// Couchbase protocol constants +namespace { +[[maybe_unused]] constexpr uint32_t APPLE_VBUCKET_COUNT = 64; +constexpr uint32_t DEFAULT_VBUCKET_COUNT = 1024; +constexpr int CONNECTION_ID_SIZE = 33; +constexpr size_t RANDOM_ID_HEX_SIZE = 67; // 33 bytes * 2 + null terminator +} // namespace + +// Static member definitions +CouchbaseManifestManager* + CouchbaseOperations::CouchbaseRequest::metadata_tracking = + &common_metadata_tracking; + +bool brpc::CouchbaseManifestManager::setBucketToCollectionManifest( + std::string server, std::string bucket, + CouchbaseManifestManager::CollectionManifest manifest) { + // Then update the collection manifest with proper locking + { + UniqueLock write_lock(rw_bucket_to_collection_manifest_mutex_); + bucket_to_collection_manifest_[server][bucket] = manifest; + } + + return true; +} + +bool brpc::CouchbaseManifestManager::getBucketToCollectionManifest( + std::string server, std::string bucket, + CouchbaseManifestManager::CollectionManifest* manifest) { + SharedLock read_lock(rw_bucket_to_collection_manifest_mutex_); + auto it1 = bucket_to_collection_manifest_.find(server); + if (it1 == bucket_to_collection_manifest_.end()) { + return false; + } + auto it2 = it1->second.find(bucket); + if (it2 == it1->second.end()) { + return false; + } + *manifest = it2->second; + return true; +} + +bool brpc::CouchbaseManifestManager::getManifestToCollectionId( + CouchbaseManifestManager::CollectionManifest* manifest, std::string scope, + std::string collection, uint8_t* collection_id) { + if (manifest == nullptr || collection_id == nullptr) { + DEBUG_PRINT("Invalid input: manifest or collection_id is null"); + return false; + } + auto it1 = manifest->scope_to_collection_id_map.find(scope); + if (it1 == manifest->scope_to_collection_id_map.end()) { + DEBUG_PRINT("Scope: " << scope << " not found in manifest"); + return false; + } + auto it2 = it1->second.find(collection); + if (it2 == it1->second.end()) { + DEBUG_PRINT("Collection: " << collection + << " not found in scope: " << scope); + return false; + } + *collection_id = it2->second; + return true; +} + +bool CouchbaseManifestManager::jsonToCollectionManifest( + const std::string& json, + CouchbaseManifestManager::CollectionManifest* manifest) { + if (manifest == nullptr) { + DEBUG_PRINT("Invalid input: manifest is null"); + return false; + } + + // Clear existing data + manifest->uid.clear(); + manifest->scope_to_collection_id_map.clear(); + + if (json.empty()) { + DEBUG_PRINT("JSON std::string is empty"); + return false; + } + + // Parse JSON using RapidJSON + BUTIL_RAPIDJSON_NAMESPACE::Document document; + document.Parse(json.c_str()); + + if (document.HasParseError()) { + DEBUG_PRINT("Failed to parse JSON: " << document.GetParseError()); + return false; + } + + if (!document.IsObject()) { + DEBUG_PRINT("JSON root is not an object"); + return false; + } + + // Extract uid + if (document.HasMember("uid") && document["uid"].IsString()) { + manifest->uid = document["uid"].GetString(); + } else { + DEBUG_PRINT("Missing or invalid 'uid' field in JSON"); + return false; + } + + // Extract scopes + if (!document.HasMember("scopes") || !document["scopes"].IsArray()) { + DEBUG_PRINT("Missing or invalid 'scopes' field in JSON"); + return false; + } + + const BUTIL_RAPIDJSON_NAMESPACE::Value& scopes = document["scopes"]; + for (BUTIL_RAPIDJSON_NAMESPACE::SizeType i = 0; i < scopes.Size(); ++i) { + const BUTIL_RAPIDJSON_NAMESPACE::Value& scope = scopes[i]; + + if (!scope.IsObject()) { + DEBUG_PRINT("Scope at index " << i << " is not an object"); + return false; + } + + // Extract scope name + if (!scope.HasMember("name") || !scope["name"].IsString()) { + DEBUG_PRINT("Missing or invalid 'name' field in scope at index " << i); + return false; + } + std::string scope_name = scope["name"].GetString(); + + // Extract collections + if (!scope.HasMember("collections") || !scope["collections"].IsArray()) { + DEBUG_PRINT("Missing or invalid 'collections' field in scope '" + << scope_name << "'"); + return false; + } + + const BUTIL_RAPIDJSON_NAMESPACE::Value& collections = scope["collections"]; + std:: unordered_map collection_map; + + for (BUTIL_RAPIDJSON_NAMESPACE::SizeType j = 0; j < collections.Size(); + ++j) { + const BUTIL_RAPIDJSON_NAMESPACE::Value& collection = collections[j]; + + if (!collection.IsObject()) { + DEBUG_PRINT("Collection at index " << j << " in scope '" << scope_name + << "' is not an object"); + return false; + } + + // Extract collection name + if (!collection.HasMember("name") || !collection["name"].IsString()) { + DEBUG_PRINT("Missing or invalid 'name' field in collection at index " + << j << " in scope '" << scope_name << "'"); + return false; + } + std::string collection_name = collection["name"].GetString(); + + // Extract collection uid (hex std::string) + if (!collection.HasMember("uid") || !collection["uid"].IsString()) { + DEBUG_PRINT("Missing or invalid 'uid' field in collection '" + << collection_name << "' in scope '" << scope_name << "'"); + return false; + } + std::string collection_uid_str = collection["uid"].GetString(); + + // Convert hex std::string to uint8_t + uint8_t collection_id = 0; + try { + // Convert hex std::string to integer + unsigned long uid_val = std::stoul(collection_uid_str, nullptr, 16); + if (uid_val > 255) { + DEBUG_PRINT( + "Collection uid '" + << collection_uid_str << "' exceeds uint8_t range in collection '" + << collection_name << "' in scope '" << scope_name << "'"); + return false; + } + collection_id = static_cast(uid_val); + } catch (const std::exception& e) { + DEBUG_PRINT("Failed to parse collection uid '" + << collection_uid_str << "' as hex in collection '" + << collection_name << "' in scope '" << scope_name << ": " + << e.what()); + return false; + } + + // Add to collection map + collection_map[collection_name] = collection_id; + } + + // Add scope and its collections to manifest + manifest->scope_to_collection_id_map[scope_name] = + std::move(collection_map); + } + + return true; +} + +bool CouchbaseManifestManager::refreshCollectionManifest( + brpc::Channel* channel, const std::string& server, const std::string& bucket, + std:: unordered_map* + local_collection_manifest_cache) { + // first fetch the manifest + // then compare the UID with the cached one + if (channel == nullptr) { + DEBUG_PRINT("No channel found, make sure to call Authenticate() first"); + return false; + } + if (server.empty()) { + DEBUG_PRINT("Server is empty, make sure to call Authenticate() first"); + return false; + } + if (bucket.empty()) { + DEBUG_PRINT("No bucket selected, make sure to call SelectBucket() first"); + return false; + } + CouchbaseOperations::CouchbaseRequest temp_get_manifest_request; + CouchbaseOperations::CouchbaseResponse temp_get_manifest_response; + brpc::Controller temp_cntl; + temp_get_manifest_request.getCollectionManifest(); + channel->CallMethod(NULL, &temp_cntl, &temp_get_manifest_request, + &temp_get_manifest_response, NULL); + if (temp_cntl.Failed()) { + DEBUG_PRINT("Failed to get collection manifest: bRPC controller error " + << temp_cntl.ErrorText()); + return false; + } + std::string manifest_json; + if (!temp_get_manifest_response.popManifest(&manifest_json)) { + DEBUG_PRINT("Failed to parse response for refreshing collection Manifest: " + << temp_get_manifest_response.lastError()); + return false; + } + brpc::CouchbaseManifestManager::CollectionManifest manifest; + if (!common_metadata_tracking.jsonToCollectionManifest(manifest_json, + &manifest)) { + DEBUG_PRINT("Failed to parse collection manifest JSON"); + return false; + } + brpc::CouchbaseManifestManager::CollectionManifest cached_manifest; + if (!common_metadata_tracking.getBucketToCollectionManifest( + server, bucket, &cached_manifest)) { + // No cached manifest found, set the new one + if (!common_metadata_tracking.setBucketToCollectionManifest(server, bucket, + manifest)) { + DEBUG_PRINT("Failed to cache collection manifest for bucket " + << bucket << " on server " << server); + return false; + } + DEBUG_PRINT("Cached collection manifest for bucket " + << bucket << " on server " << server); + // also update the local cache + if (local_collection_manifest_cache != nullptr) { + (*local_collection_manifest_cache)[bucket] = manifest; + } + return true; + } + // Compare the UID with the cached one + // If they are different, refresh the cache + else if (manifest.uid != cached_manifest.uid) { + DEBUG_PRINT("Collection manifest has changed for bucket " + << bucket << " on server " << server); + if (!common_metadata_tracking.setBucketToCollectionManifest(server, bucket, + manifest)) { + DEBUG_PRINT("Failed to update cached collection manifest for bucket " + << bucket << " on server " << server); + return false; + } + DEBUG_PRINT("Updated cached collection manifest for bucket " + << bucket << " on server " << server); + // update the local cache as well + if (local_collection_manifest_cache != nullptr) { + (*local_collection_manifest_cache)[bucket] = manifest; + DEBUG_PRINT("Added to local collection manifest cache for bucket " + << bucket << " on server " << server); + } + return true; + } else { + DEBUG_PRINT("Collection manifest is already up-to-date for bucket " + << bucket << " on server " << server); + if (local_collection_manifest_cache != nullptr) { + if (local_collection_manifest_cache->find(bucket) != + local_collection_manifest_cache->end()) { + // if the bucket already exists in the local cache, check the UID + if ((*local_collection_manifest_cache)[bucket].uid != manifest.uid) { + // if the UID is different, update the local cache + (*local_collection_manifest_cache)[bucket] = manifest; + DEBUG_PRINT("Updated local collection manifest cache for bucket " + << bucket << " on server " << server); + } + } else { + // if the bucket does not exist in the local cache, add it + (*local_collection_manifest_cache)[bucket] = manifest; + DEBUG_PRINT("Added to local collection manifest cache for bucket " + << bucket << " on server " << server); + } + } + return false; + } +} + +uint32_t CouchbaseOperations::CouchbaseRequest::hashCrc32(const char* key, + size_t key_length) { + static const uint32_t crc32tab[256] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, + 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, + 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c, + 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, + 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106, + 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, + 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, + 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, + 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, + 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, + 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e, + 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, + 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, + 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, + 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, + 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, + 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, + 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d, + }; + + uint64_t x; + uint32_t crc = UINT32_MAX; + + for (x = 0; x < key_length; x++) + crc = (crc >> 8) ^ crc32tab[(crc ^ (uint64_t)key[x]) & 0xff]; + +#ifdef __APPLE__ + return ((~crc) >> 16) % APPLE_VBUCKET_COUNT; +#else + return ((~crc) >> 16) % DEFAULT_VBUCKET_COUNT; +#endif +} + +void CouchbaseOperations::CouchbaseRequest::sharedCtor() { + _pipelined_count = 0; + _cached_size_ = 0; +} + +void CouchbaseOperations::CouchbaseRequest::sharedDtor() {} + +void CouchbaseOperations::CouchbaseRequest::setCachedSize(int size) const { + _cached_size_ = size; +} + +void CouchbaseOperations::CouchbaseRequest::Clear() { + _buf.clear(); + _pipelined_count = 0; +} + +// Support for scope level collections will be added in future. +// Get the Scope ID for a given scope name +// bool CouchbaseOperations::CouchbaseRequest::GetScopeId(const +// butil::StringPiece& scope_name) { +// if (scope_name.empty()) { +// DEBUG_PRINT("Empty scope name"); +// return false; +// } +// // Opcode 0xBC for Get Scope ID (see Collections.md) +// const policy::CouchbaseRequestHeader header = { +// policy::CB_MAGIC_REQUEST, +// policy::CB_GET_SCOPE_ID, +// butil::HostToNet16(scope_name.size()), +// 0, // no extras +// policy::CB_BINARY_RAW_BYTES, +// 0, // no vbucket +// butil::HostToNet32(scope_name.size()), +// 0, // opaque +// 0 // no CAS +// }; +// if (_buf.append(&header, sizeof(header))) { +// return false; +// } +// if (_buf.append(scope_name.data(), scope_name.size())) { +// return false; +// } +// ++_pipelined_count; +// return true; +// } + +bool CouchbaseOperations::CouchbaseRequest::selectBucketRequest( + const butil::StringPiece& bucket_name) { + if (bucket_name.empty()) { + DEBUG_PRINT("Empty bucket name"); + return false; + } + // construct the request header + const policy::CouchbaseRequestHeader header = { + policy::CB_MAGIC_REQUEST, + policy::CB_SELECT_BUCKET, + butil::HostToNet16(bucket_name.size()), + 0, + policy::CB_BINARY_RAW_BYTES, + 0, + butil::HostToNet32(bucket_name.size()), + 0, + 0}; + if (_buf.append(&header, sizeof(header))) { + DEBUG_PRINT("Failed to append header to buffer"); + return false; + } + if (_buf.append(bucket_name.data(), bucket_name.size())) { + DEBUG_PRINT("Failed to append bucket name to buffer"); + return false; + } + ++_pipelined_count; + return true; +} + +// HelloRequest sends a Hello request to the Couchbase server, which specifies +// the client features and capabilities. +// This is typically the first request sent after connecting to the server. +// It includes the agent name and a randomly generated connection ID in JSON +// format. +bool CouchbaseOperations::CouchbaseRequest::helloRequest() { + std::string agent = "brpc/1.0.0 ("; +#ifdef __APPLE__ + agent += "Darwin/"; +#elif defined(__linux__) + agent += "Linux/"; +#else + agent += "UnknownOS/"; +#endif +#if defined(__x86_64__) + agent += "x86_64"; +#elif defined(__aarch64__) + agent += "arm64"; +#else + agent += "unknown"; +#endif + agent += ";bssl/0x1010107f)"; + + // Generate a random connection ID as hex std::string + unsigned char raw_id[CONNECTION_ID_SIZE]; + FILE* urandom = fopen("/dev/urandom", "rb"); + if (!urandom || + fread(raw_id, 1, CONNECTION_ID_SIZE, urandom) != CONNECTION_ID_SIZE) { + if (urandom) fclose(urandom); + DEBUG_PRINT("Failed to generate random connection id"); + return false; + } + fclose(urandom); + char hex_id[RANDOM_ID_HEX_SIZE] = {0}; + for (int i = 0; i < CONNECTION_ID_SIZE; ++i) { + sprintf(hex_id + i * 2, "%02x", raw_id[i]); + } + + // Format key as JSON: {"a":"agent","i":"hex_id"} + std::string key = + std::string("{\"a\":\"") + agent + "\",\"i\":\"" + hex_id + "\"}"; + + const uint16_t key_len = key.size(); + uint16_t features[] = { + butil::HostToNet16(0x0001), // Datatype + butil::HostToNet16(0x0006), // XError + butil::HostToNet16(0x0007), // SelectBucket + butil::HostToNet16(0x000b), // Snappy + butil::HostToNet16(0x0012) // Collections + }; + + const uint32_t value_len = sizeof(features); + const uint32_t total_body_len = key_len + value_len; + + const policy::CouchbaseRequestHeader header = { + policy::CB_MAGIC_REQUEST, + policy::CB_HELLO_SELECT_FEATURES, + butil::HostToNet16(key_len), // key length + 0, // extras length + policy::CB_BINARY_RAW_BYTES, // data type + 0, // vbucket id + butil::HostToNet32(total_body_len), // total body length + 0, // opaque + 0 // cas value + }; + + if (_buf.append(&header, sizeof(header))) { + DEBUG_PRINT("Failed to append Hello header to buffer"); + return false; + } + if (_buf.append(key.data(), key_len)) { + DEBUG_PRINT("Failed to append Hello JSON key to buffer"); + return false; + } + if (_buf.append(reinterpret_cast(features), value_len)) { + DEBUG_PRINT("Failed to append Hello features to buffer"); + return false; + } + ++_pipelined_count; + return true; +} + +bool CouchbaseOperations::CouchbaseRequest::authenticateRequest( + const butil::StringPiece& username, const butil::StringPiece& password) { + if (username.empty() || password.empty()) { + DEBUG_PRINT("Empty username or password"); + return false; + } + // insert the features to get enabled, calling function helloRequest() will do + // this. + if (!helloRequest()) { + DEBUG_PRINT("Failed to send helloRequest for authentication"); + return false; + } + // Construct the request header + constexpr char kPlainAuthCommand[] = "PLAIN"; + constexpr char kPadding[1] = {'\0'}; + const brpc::policy::CouchbaseRequestHeader header = { + brpc::policy::CB_MAGIC_REQUEST, + brpc::policy::CB_BINARY_SASL_AUTH, + butil::HostToNet16(sizeof(kPlainAuthCommand) - 1), + 0, + 0, + 0, + butil::HostToNet32(sizeof(kPlainAuthCommand) + 1 + username.length() * 2 + + password.length()), + 0, + 0}; + std::string auth_str; + auth_str.reserve(sizeof(header) + sizeof(kPlainAuthCommand) - 1 + + username.size() * 2 + password.size() + 2); + auth_str.append(reinterpret_cast(&header), sizeof(header)); + auth_str.append(kPlainAuthCommand, sizeof(kPlainAuthCommand) - 1); + auth_str.append(username.data(), username.size()); + auth_str.append(kPadding, sizeof(kPadding)); + auth_str.append(username.data(), username.size()); + auth_str.append(kPadding, sizeof(kPadding)); + auth_str.append(password.data(), password.size()); + if (_buf.append(auth_str.data(), auth_str.size())) { + DEBUG_PRINT("Failed to append auth std::string to buffer"); + return false; + } + ++_pipelined_count; + return true; +} + +void CouchbaseOperations::CouchbaseRequest::MergeFrom( + const CouchbaseRequest& from) { + CHECK_NE(&from, this); + _buf.append(from._buf); + _pipelined_count += from._pipelined_count; +} + +bool CouchbaseOperations::CouchbaseRequest::IsInitialized() const { + return _pipelined_count != 0; +} + +void CouchbaseOperations::CouchbaseRequest::Swap(CouchbaseRequest* other) { + if (other != this) { + _buf.swap(other->_buf); + std::swap(_pipelined_count, other->_pipelined_count); + std::swap(_cached_size_, other->_cached_size_); + } +} + +void CouchbaseOperations::CouchbaseResponse::sharedCtor() { _cached_size_ = 0; } + +void CouchbaseOperations::CouchbaseResponse::sharedDtor() {} + +void CouchbaseOperations::CouchbaseResponse::setCachedSize(int size) const { + _cached_size_ = size; +} + +void CouchbaseOperations::CouchbaseResponse::Clear() {} + +void CouchbaseOperations::CouchbaseResponse::MergeFrom( + const CouchbaseResponse& from) { + CHECK_NE(&from, this); + _err = from._err; + _buf.append(from._buf); +} + +bool CouchbaseOperations::CouchbaseResponse::IsInitialized() const { + return !_buf.empty(); +} + +void CouchbaseOperations::CouchbaseResponse::swap(CouchbaseResponse* other) { + if (other != this) { + _buf.swap(other->_buf); + std::swap(_cached_size_, other->_cached_size_); + } +} + +// =================================================================== + +const char* CouchbaseOperations::CouchbaseResponse::statusStr(Status st) { + switch (st) { + case STATUS_SUCCESS: + return "SUCCESS"; + case STATUS_KEY_ENOENT: + return "Key not found"; + case STATUS_KEY_EEXISTS: + return "Key already exists"; + case STATUS_E2BIG: + return "Value too large"; + case STATUS_EINVAL: + return "Invalid arguments"; + case STATUS_NOT_STORED: + return "Item not stored"; + case STATUS_DELTA_BADVAL: + return "Invalid delta value for increment/decrement"; + case STATUS_VBUCKET_BELONGS_TO_ANOTHER_SERVER: + return "VBucket belongs to another server"; + case STATUS_AUTH_ERROR: + return "Authentication failed"; + case STATUS_AUTH_CONTINUE: + return "Authentication continue"; + case STATUS_ERANGE: + return "Range error"; + case STATUS_ROLLBACK: + return "Rollback required"; + case STATUS_EACCESS: + return "Access denied"; + case STATUS_NOT_INITIALIZED: + return "Not initialized"; + case STATUS_UNKNOWN_COMMAND: + return "Unknown command"; + case STATUS_ENOMEM: + return "Out of memory"; + case STATUS_NOT_SUPPORTED: + return "Operation not supported"; + case STATUS_EINTERNAL: + return "Internal server error"; + case STATUS_EBUSY: + return "Server busy"; + case STATUS_ETMPFAIL: + return "Temporary failure"; + case STATUS_UNKNOWN_COLLECTION: + return "Unknown collection"; + case STATUS_NO_COLLECTIONS_MANIFEST: + return "No collections manifest"; + case STATUS_CANNOT_APPLY_COLLECTIONS_MANIFEST: + return "Cannot apply collections manifest"; + case STATUS_COLLECTIONS_MANIFEST_IS_AHEAD: + return "Collections manifest is ahead"; + case STATUS_UNKNOWN_SCOPE: + return "Unknown scope"; + case STATUS_DCP_STREAM_ID_INVALID: + return "Invalid DCP stream ID"; + case STATUS_DURABILITY_INVALID_LEVEL: + return "Invalid durability level"; + case STATUS_DURABILITY_IMPOSSIBLE: + return "Durability requirements impossible"; + case STATUS_SYNC_WRITE_IN_PROGRESS: + return "Synchronous write in progress"; + case STATUS_SYNC_WRITE_AMBIGUOUS: + return "Synchronous write result ambiguous"; + case STATUS_SYNC_WRITE_RE_COMMIT_IN_PROGRESS: + return "Synchronous write re-commit in progress"; + case STATUS_SUBDOC_PATH_NOT_FOUND: + return "Sub-document path not found"; + case STATUS_SUBDOC_PATH_MISMATCH: + return "Sub-document path mismatch"; + case STATUS_SUBDOC_PATH_EINVAL: + return "Invalid sub-document path"; + case STATUS_SUBDOC_PATH_E2BIG: + return "Sub-document path too deep"; + case STATUS_SUBDOC_DOC_E2DEEP: + return "Sub-document too deep"; + case STATUS_SUBDOC_VALUE_CANTINSERT: + return "Cannot insert sub-document value"; + case STATUS_SUBDOC_DOC_NOT_JSON: + return "Document is not JSON"; + case STATUS_SUBDOC_NUM_E2BIG: + return "Sub-document number too large"; + case STATUS_SUBDOC_DELTA_E2BIG: + return "Sub-document delta too large"; + case STATUS_SUBDOC_PATH_EEXISTS: + return "Sub-document path already exists"; + case STATUS_SUBDOC_VALUE_E2DEEP: + return "Sub-document value too deep"; + case STATUS_SUBDOC_INVALID_COMBO: + return "Invalid sub-document operation combination"; + case STATUS_SUBDOC_MULTI_PATH_FAILURE: + return "Sub-document multi-path operation failed"; + case STATUS_SUBDOC_SUCCESS_DELETED: + return "Sub-document operation succeeded on deleted document"; + case STATUS_SUBDOC_XATTR_INVALID_FLAG_COMBO: + return "Invalid extended attribute flag combination"; + case STATUS_SUBDOC_XATTR_INVALID_KEY_COMBO: + return "Invalid extended attribute key combination"; + case STATUS_SUBDOC_XATTR_UNKNOWN_MACRO: + return "Unknown extended attribute macro"; + case STATUS_SUBDOC_XATTR_UNKNOWN_VATTR: + return "Unknown virtual extended attribute"; + case STATUS_SUBDOC_XATTR_CANT_MODIFY_VATTR: + return "Cannot modify virtual extended attribute"; + case STATUS_SUBDOC_MULTI_PATH_FAILURE_DELETED: + return "Sub-document multi-path operation failed on deleted document"; + case STATUS_SUBDOC_INVALID_XATTR_ORDER: + return "Invalid extended attribute order"; + case STATUS_SUBDOC_XATTR_UNKNOWN_VATTR_MACRO: + return "Unknown virtual extended attribute macro"; + case STATUS_SUBDOC_CAN_ONLY_REVIVE_DELETED_DOCUMENTS: + return "Can only revive deleted documents"; + case STATUS_SUBDOC_DELETED_DOCUMENT_CANT_HAVE_VALUE: + return "Deleted document cannot have a value"; + case STATUS_XATTR_EINVAL: + return "Invalid extended attributes"; + } + return "Unknown status"; +} + +// Helper method to format error messages with status codes +std::string CouchbaseOperations::CouchbaseResponse::formatErrorMessage( + uint16_t status_code, const std::string& operation, + const std::string& error_msg) { + if (error_msg.empty()) { + return butil::string_printf("%s failed with status 0x%02x (%s)", + operation.c_str(), status_code, + statusStr((Status)status_code)); + } else { + return butil::string_printf( + "%s failed with status 0x%02x (%s): %s", operation.c_str(), status_code, + statusStr((Status)status_code), error_msg.c_str()); + } +} + +// MUST NOT have extras. +// MUST have key. +// MUST NOT have value. +bool CouchbaseOperations::CouchbaseRequest::getOrDelete( + uint8_t command, const butil::StringPiece& key, uint8_t coll_id) { + // Collection ID + uint8_t collection_id = coll_id; + uint16_t VBucket_id = hashCrc32(key.data(), key.size()); + const policy::CouchbaseRequestHeader header = { + policy::CB_MAGIC_REQUEST, command, + butil::HostToNet16(key.size() + 1), // Key + 0, // extras length + policy::CB_BINARY_RAW_BYTES, // data type + butil::HostToNet16(VBucket_id), + butil::HostToNet32(key.size() + + sizeof(collection_id)), // total body length includes + // key and collection id + 0, 0}; + if (_buf.append(&header, sizeof(header))) { + return false; + } + if (_buf.append(&collection_id, sizeof(collection_id))) { + return false; + } + if (_buf.append(key.data(), key.size())) { + return false; + } + ++_pipelined_count; + return true; +} + +// collectionID fetching either from the metadata cache or if doesn't exist then +// fetch from the server. +bool CouchbaseOperations::CouchbaseRequest::getCachedOrFetchCollectionId( + std::string collection_name, uint8_t* coll_id, + brpc::CouchbaseManifestManager* metadata_tracking, brpc::Channel* channel, + const std::string& server, const std::string& selected_bucket, + std:: unordered_map* + local_cache) { + if (collection_name.empty()) { + DEBUG_PRINT("Empty collection name"); + return false; + } + if (channel == nullptr) { + DEBUG_PRINT("No channel found, make sure to call Authenticate() first"); + return false; + } + if (server.empty()) { + DEBUG_PRINT("Server is empty, make sure to call Authenticate() first"); + return false; + } + if (selected_bucket.empty()) { + DEBUG_PRINT("No bucket selected, make sure to call SelectBucket() first"); + return false; + } + + brpc::CouchbaseManifestManager::CollectionManifest manifest; + // check if the server/bucket exists in the cached collection manifest + if (!metadata_tracking->getBucketToCollectionManifest(server, selected_bucket, + &manifest)) { + DEBUG_PRINT("No cached collection manifest found for bucket " + << selected_bucket << " on server " << server + << ", fetching from server"); + // No cached manifest found, fetch from server + if (!metadata_tracking->refreshCollectionManifest( + channel, server, selected_bucket, local_cache)) { + return false; + } + // local cache will also be updated in refreshCollectionManifest + // get the reference to collectionID from local cache + if (!getLocalCachedCollectionId(selected_bucket, "_default", + collection_name, coll_id)) { + // collectionID not found in the latest manifest fetched from server + return false; + } + // collectionID has been found in the latest manifest fetched from server + // and is stored in coll_id + return true; + } else { + // check if collection name to id mapping exists. + if (!metadata_tracking->getManifestToCollectionId( + &manifest, "_default", collection_name, coll_id)) { + // Just to verify that the collectionID does not exist in the manifest + // refresh manifest from server and try again + if (!metadata_tracking->refreshCollectionManifest( + channel, server, selected_bucket, local_cache)) { + return false; + } + // local cache will also be updated in refreshCollectionManifest + // get the reference to collectionID from local cache + if (!getLocalCachedCollectionId(selected_bucket, "_default", + collection_name, coll_id)) { + // collectionID not found in the latest manifest fetched from server + return false; + } + // collectionID has been found in the latest manifest fetched from server + // and is stored in coll_id + return true; + } + // update the local cache with the manifest in global cache + (*local_collection_manifest_cache)[selected_bucket] = manifest; + // collectionID found in the cached manifest + return true; + } +} + +bool CouchbaseOperations::CouchbaseRequest::getRequest( + const butil::StringPiece& key, std::string collection_name, + brpc::Channel* channel, const std::string& server, const std::string& bucket) { + DEBUG_PRINT("getRequest called with key: " + << key << ", collection_name: " << collection_name + << ", server: " << server << ", bucket: " << bucket); + uint8_t coll_id = 0; // default collection ID + if (collection_name != "_default") { + // check if the local cache is empty or not. + if (local_collection_manifest_cache->empty()) { + DEBUG_PRINT("Local collection manifest cache is empty in getRequest"); + // if local cache is empty, goto global cache or fetch from server + if (!getCachedOrFetchCollectionId( + collection_name, &coll_id, metadata_tracking, channel, server, + bucket, local_collection_manifest_cache)) { + DEBUG_PRINT( + "Failed to get collection id from global cache or server in " + "getRequest"); + return false; + } + } + // check if the collection id is available in the local cache + else if (!getLocalCachedCollectionId(bucket, "_default", collection_name, + &coll_id)) { + DEBUG_PRINT("Collection id not found in local cache in getRequest"); + // if not check in the global cache or fetch from server + if (!getCachedOrFetchCollectionId( + collection_name, &coll_id, metadata_tracking, channel, server, + bucket, local_collection_manifest_cache)) { + DEBUG_PRINT( + "Failed to get collection id from global cache or server in " + "getRequest"); + return false; + } + } + } + DEBUG_PRINT("getRequest using coll_id: " << (int)coll_id); + return getOrDelete(policy::CB_BINARY_GET, key, coll_id); +} + +bool CouchbaseOperations::CouchbaseRequest::deleteRequest( + const butil::StringPiece& key, std::string collection_name, + brpc::Channel* channel, const std::string& server, const std::string& bucket) { + DEBUG_PRINT("deleteRequest called with key: " + << key << ", collection_name: " << collection_name + << ", server: " << server << ", bucket: " << bucket); + uint8_t coll_id = 0; // default collection ID + if (collection_name != "_default") { + // check if the local cache is empty or not. + if (local_collection_manifest_cache->empty()) { + DEBUG_PRINT("Local collection manifest cache is empty in deleteRequest"); + // if local cache is empty, goto global cache or fetch from server + if (!getCachedOrFetchCollectionId( + collection_name, &coll_id, metadata_tracking, channel, server, + bucket, local_collection_manifest_cache)) { + DEBUG_PRINT( + "Failed to get collection id from global cache or server in " + "deleteRequest"); + return false; + } + } + // check if the collection id is available in the local cache + else if (!getLocalCachedCollectionId(bucket, "_default", collection_name, + &coll_id)) { + DEBUG_PRINT("Collection id not found in local cache in deleteRequest"); + // if not check in the global cache or fetch from server + if (!getCachedOrFetchCollectionId( + collection_name, &coll_id, metadata_tracking, channel, server, + bucket, local_collection_manifest_cache)) { + DEBUG_PRINT( + "Failed to get collection id from global cache or server in " + "deleteRequest"); + return false; + } + } + } + DEBUG_PRINT("deleteRequest using coll_id: " << (int)coll_id); + return getOrDelete(policy::CB_BINARY_DELETE, key, coll_id); +} + +struct FlushHeaderWithExtras { + policy::CouchbaseRequestHeader header; + uint32_t exptime; +} __attribute__((packed)); +BAIDU_CASSERT(sizeof(FlushHeaderWithExtras) == 28, must_match); + +bool CouchbaseOperations::CouchbaseResponse::popGet(butil::IOBuf* value, + uint32_t* flags, + uint64_t* cas_value) { + const size_t n = _buf.size(); + policy::CouchbaseResponseHeader header; + if (n < sizeof(header)) { + butil::string_printf(&_err, "buffer is too small to contain a header"); + return false; + } + _buf.copy_to(&header, sizeof(header)); + if (header.command != (uint8_t)policy::CB_BINARY_GET) { + butil::string_printf(&_err, "not a GET response"); + return false; + } + if (n < sizeof(header) + header.total_body_length) { + butil::string_printf(&_err, "response=%u < header=%u + body=%u", + (unsigned)n, (unsigned)sizeof(header), + header.total_body_length); + return false; + } + if (header.status != (uint16_t)STATUS_SUCCESS) { + if (DBUG && header.extras_length != 0) { + DEBUG_PRINT("GET response must not have flags"); + } + if (DBUG && header.key_length != 0) { + DEBUG_PRINT("GET response must not have key"); + } + const int value_size = (int)header.total_body_length - + (int)header.extras_length - (int)header.key_length; + _status_code = header.status; + if (value_size < 0) { + butil::string_printf(&_err, "value_size=%d is non-negative", value_size); + return false; + } + _buf.pop_front(sizeof(header) + header.extras_length + header.key_length); + if (value_size > 0) { + std::string error_msg; + _buf.cutn(&error_msg, value_size); + _err = formatErrorMessage(header.status, "GET operation", error_msg); + } else { + _err = formatErrorMessage(header.status, "GET operation"); + } + return false; + } + if (header.extras_length != 4u) { + butil::string_printf( + &_err, "GET response must have flags as extras, actual length=%u", + header.extras_length); + return false; + } + if (header.key_length != 0) { + butil::string_printf(&_err, "GET response must not have key"); + return false; + } + const int value_size = (int)header.total_body_length - + (int)header.extras_length - (int)header.key_length; + if (value_size < 0) { + butil::string_printf(&_err, "value_size=%d is non-negative", value_size); + return false; + } + _buf.pop_front(sizeof(header)); + uint32_t raw_flags = 0; + _buf.cutn(&raw_flags, sizeof(raw_flags)); + if (flags) { + *flags = butil::NetToHost32(raw_flags); + } + if (value) { + value->clear(); + _buf.cutn(value, value_size); + } + if (cas_value) { + *cas_value = header.cas_value; + } + _err.clear(); + return true; +} + +bool CouchbaseOperations::CouchbaseResponse::popGet(std::string* value, + uint32_t* flags, + uint64_t* cas_value) { + butil::IOBuf tmp; + if (popGet(&tmp, flags, cas_value)) { + tmp.copy_to(value); + return true; + } + return false; +} + +// MUST NOT have extras +// MUST NOT have key +// MUST NOT have value +bool CouchbaseOperations::CouchbaseResponse::popDelete() { + return popStore(policy::CB_BINARY_DELETE, NULL); +} + +struct StoreHeaderWithExtras { + policy::CouchbaseRequestHeader header; + uint32_t flags; + uint32_t exptime; +} __attribute__((packed)); +BAIDU_CASSERT(sizeof(StoreHeaderWithExtras) == 32, must_match); +const size_t STORE_EXTRAS = + sizeof(StoreHeaderWithExtras) - sizeof(policy::CouchbaseRequestHeader); +// MUST have extras. +// MUST have key. +// MAY have value. +// Extra data for set/add/replace: +// Byte/ 0 | 1 | 2 | 3 | +// / | | | | +// |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| +// +---------------+---------------+---------------+---------------+ +// 0| Flags | +// +---------------+---------------+---------------+---------------+ +// 4| Expiration | +// +---------------+---------------+---------------+---------------+ +// Total 8 bytes +bool CouchbaseOperations::CouchbaseRequest::store( + uint8_t command, const butil::StringPiece& key, + const butil::StringPiece& value, uint32_t flags, uint32_t exptime, + uint64_t cas_value, uint8_t coll_id) { + // add collection id + // uint16_t collection_id = 0x00; + uint8_t collection_id = coll_id; + uint16_t vBucket_id = hashCrc32(key.data(), key.size()); + StoreHeaderWithExtras header_with_extras = { + {policy::CB_MAGIC_REQUEST, command, + butil::HostToNet16(key.size() + + 1), // collection id is not included in part of key, + // so not including it in key length. + STORE_EXTRAS, policy::CB_JSON, butil::HostToNet16(vBucket_id), + butil::HostToNet32(STORE_EXTRAS + sizeof(collection_id) + key.size() + + value.size()), // total body length + 0, butil::HostToNet64(cas_value)}, + butil::HostToNet32(flags), + butil::HostToNet32(exptime)}; + if (_buf.append(&header_with_extras, sizeof(header_with_extras))) { + return false; + } + if (_buf.append(&collection_id, sizeof(collection_id))) { + return false; + } + if (_buf.append(key.data(), key.size())) { + return false; + } + if (_buf.append(value.data(), value.size())) { + return false; + } + ++_pipelined_count; + return true; +} + +// MUST have CAS +// MUST NOT have extras +// MUST NOT have key +// MUST NOT have value +bool CouchbaseOperations::CouchbaseResponse::popStore(uint8_t command, + uint64_t* cas_value) { + const size_t n = _buf.size(); + policy::CouchbaseResponseHeader header; + if (n < sizeof(header)) { + butil::string_printf(&_err, "buffer is too small to contain a header"); + return false; + } + _buf.copy_to(&header, sizeof(header)); + if (header.command != command) { + butil::string_printf(&_err, "Not a STORE response"); + return false; + } + if (n < sizeof(header) + header.total_body_length) { + butil::string_printf(&_err, "Not enough data"); + return false; + } + if (DBUG && header.extras_length != 0) { + DEBUG_PRINT("STORE response must not have flags"); + } + if (DBUG && header.key_length != 0) { + DEBUG_PRINT("STORE response must not have key"); + } + int value_size = (int)header.total_body_length - (int)header.extras_length - + (int)header.key_length; + if (header.status != (uint16_t)STATUS_SUCCESS) { + _buf.pop_front(sizeof(header) + header.extras_length + header.key_length); + _status_code = header.status; + if (value_size > 0) { + std::string error_msg; + _buf.cutn(&error_msg, value_size); + _err = formatErrorMessage( + header.status, couchbaseBinaryCommandToString(command), error_msg); + } else { + _err = formatErrorMessage(header.status, + couchbaseBinaryCommandToString(command)); + } + return false; + } + if (DBUG && value_size != 0) { + DEBUG_PRINT("STORE response must not have value, actually=" << value_size); + } + _buf.pop_front(sizeof(header) + header.total_body_length); + if (cas_value) { + *cas_value = header.cas_value; + } + _err.clear(); + return true; +} + +const char* +CouchbaseOperations::CouchbaseResponse::couchbaseBinaryCommandToString( + uint8_t cmd) { + switch (cmd) { + case 0x1f: + return "CB_HELLO_SELECT_FEATURES"; + case 0x89: + return "CB_SELECT_BUCKET"; + case 0xBC: + return "CB_GET_SCOPE_ID"; + case 0x00: + return "CB_BINARY_GET"; + case 0x01: + return "CB_BINARY_SET"; + case 0x02: + return "CB_BINARY_ADD"; + case 0x03: + return "CB_BINARY_REPLACE"; + case 0x04: + return "CB_BINARY_DELETE"; + case 0x05: + return "CB_BINARY_INCREMENT"; + case 0x06: + return "CB_BINARY_DECREMENT"; + case 0x07: + return "CB_BINARY_QUIT"; + case 0x08: + return "CB_BINARY_FLUSH"; + case 0x09: + return "CB_BINARY_GETQ"; + case 0x0a: + return "CB_BINARY_NOOP"; + case 0x0b: + return "CB_BINARY_VERSION"; + case 0x0c: + return "CB_BINARY_GETK"; + case 0x0d: + return "CB_BINARY_GETKQ"; + case 0x0e: + return "CB_BINARY_APPEND"; + case 0x0f: + return "CB_BINARY_PREPEND"; + case 0x10: + return "CB_BINARY_STAT"; + case 0x11: + return "CB_BINARY_SETQ"; + case 0x12: + return "CB_BINARY_ADDQ"; + case 0x13: + return "CB_BINARY_REPLACEQ"; + case 0x14: + return "CB_BINARY_DELETEQ"; + case 0x15: + return "CB_BINARY_INCREMENTQ"; + case 0x16: + return "CB_BINARY_DECREMENTQ"; + case 0x17: + return "CB_BINARY_QUITQ"; + case 0x18: + return "CB_BINARY_FLUSHQ"; + case 0x19: + return "CB_BINARY_APPENDQ"; + case 0x1a: + return "CB_BINARY_PREPENDQ"; + case 0x1c: + return "CB_BINARY_TOUCH"; + case 0x1d: + return "CB_BINARY_GAT"; + case 0x1e: + return "CB_BINARY_GATQ"; + case 0x23: + return "CB_BINARY_GATK"; + case 0x24: + return "CB_BINARY_GATKQ"; + case 0x20: + return "CB_BINARY_SASL_LIST_MECHS"; + case 0x21: + return "CB_BINARY_SASL_AUTH"; + case 0x22: + return "CB_BINARY_SASL_STEP"; + case 0xb5: + return "CB_GET_CLUSTER_CONFIG"; + case 0xba: + return "CB_GET_COLLECTIONS_MANIFEST"; + case 0xbb: + return "CB_COLLECTIONS_GET_CID"; + default: + return "UNKNOWN_COMMAND"; + } +} + +bool CouchbaseOperations::CouchbaseRequest::upsertRequest( + const butil::StringPiece& key, const butil::StringPiece& value, + uint32_t flags, uint32_t exptime, uint64_t cas_value, + std::string collection_name, brpc::Channel* channel, const std::string& server, + const std::string& bucket) { + DEBUG_PRINT("upsertRequest called with key: " + << key << ", value: " << value + << ", collection_name: " << collection_name + << ", server: " << server << ", bucket: " << bucket); + uint8_t coll_id = 0; // default collection ID + if (collection_name != "_default") { + // check if the local cache is empty or not. + if (local_collection_manifest_cache->empty()) { + DEBUG_PRINT("Local collection manifest cache is empty in upsertRequest"); + // if local cache is empty, goto global cache or fetch from server + if (!getCachedOrFetchCollectionId( + collection_name, &coll_id, metadata_tracking, channel, server, + bucket, local_collection_manifest_cache)) { + DEBUG_PRINT( + "Failed to get collection id from global cache or server in " + "upsertRequest"); + return false; + } + } + // check if the collection id is available in the local cache + else if (!getLocalCachedCollectionId(bucket, "_default", collection_name, + &coll_id)) { + DEBUG_PRINT("Collection id not found in local cache in upsertRequest"); + // if not check in the global cache or fetch from server + if (!getCachedOrFetchCollectionId( + collection_name, &coll_id, metadata_tracking, channel, server, + bucket, local_collection_manifest_cache)) { + DEBUG_PRINT( + "Failed to get collection id from global cache or server in " + "upsertRequest"); + return false; + } + } + } + DEBUG_PRINT("upsertRequest using coll_id: " << (int)coll_id); + return store(policy::CB_BINARY_SET, key, value, flags, exptime, cas_value, + coll_id); +} + +bool CouchbaseOperations::CouchbaseRequest::getCollectionManifest() { + const policy::CouchbaseRequestHeader header = { + policy::CB_MAGIC_REQUEST, + policy::CB_GET_COLLECTIONS_MANIFEST, + 0, // no key + 0, // no extras + policy::CB_BINARY_RAW_BYTES, + 0, // no vbucket + 0, // no body (no key, no extras, no value) + 0, // opaque + 0 // no CAS + }; + if (_buf.append(&header, sizeof(header))) { + return false; + } + ++_pipelined_count; + return true; +} + +bool CouchbaseOperations::CouchbaseRequest::addRequest( + const butil::StringPiece& key, const butil::StringPiece& value, + uint32_t flags, uint32_t exptime, uint64_t cas_value, + std::string collection_name, brpc::Channel* channel, const std::string& server, + const std::string& bucket) { + DEBUG_PRINT("addRequest called with key: " + << key << ", value: " << value + << ", collection_name: " << collection_name + << ", server: " << server << ", bucket: " << bucket); + uint8_t coll_id = 0; // default collection ID + if (collection_name != "_default") { + // check if the local cache is empty or not. + if (local_collection_manifest_cache->empty()) { + DEBUG_PRINT("Local collection manifest cache is empty in addRequest"); + // if local cache is empty, goto global cache or fetch from server + if (!getCachedOrFetchCollectionId( + collection_name, &coll_id, metadata_tracking, channel, server, + bucket, local_collection_manifest_cache)) { + DEBUG_PRINT( + "Failed to get collection id from global cache or server in " + "addRequest"); + return false; + } + } + // check if the collection id is available in the local cache + else if (!getLocalCachedCollectionId(bucket, "_default", collection_name, + &coll_id)) { + DEBUG_PRINT("Collection id not found in local cache in addRequest"); + // if not check in the global cache or fetch from server + if (!getCachedOrFetchCollectionId( + collection_name, &coll_id, metadata_tracking, channel, server, + bucket, local_collection_manifest_cache)) { + DEBUG_PRINT( + "Failed to get collection id from global cache or server in " + "addRequest"); + return false; + } + } + } + DEBUG_PRINT("addRequest using coll_id: " << (int)coll_id); + return store(policy::CB_BINARY_ADD, key, value, flags, exptime, cas_value, + coll_id); +} + +bool CouchbaseOperations::CouchbaseRequest::appendRequest( + const butil::StringPiece& key, const butil::StringPiece& value, + uint32_t flags, uint32_t exptime, uint64_t cas_value, + std::string collection_name, brpc::Channel* channel, const std::string& server, + const std::string& bucket) { + if (value.empty()) { + DEBUG_PRINT("value to append must be non-empty"); + return false; + } + uint8_t coll_id = 0; // default collection ID + if (collection_name != "_default") { + // check if the local cache is empty or not. + if (local_collection_manifest_cache->empty()) { + // if local cache is empty, goto global cache or fetch from server + if (!getCachedOrFetchCollectionId( + collection_name, &coll_id, metadata_tracking, channel, server, + bucket, local_collection_manifest_cache)) { + return false; + } + } + // check if the collection id is available in the local cache + else if (!getLocalCachedCollectionId(bucket, "_default", collection_name, + &coll_id)) { + // if not check in the global cache or fetch from server + if (!getCachedOrFetchCollectionId( + collection_name, &coll_id, metadata_tracking, channel, server, + bucket, local_collection_manifest_cache)) { + return false; + } + } + } + return store(policy::CB_BINARY_APPEND, key, value, flags, exptime, cas_value, + coll_id); +} + +bool CouchbaseOperations::CouchbaseRequest::prependRequest( + const butil::StringPiece& key, const butil::StringPiece& value, + uint32_t flags, uint32_t exptime, uint64_t cas_value, + std::string collection_name, brpc::Channel* channel, const std::string& server, + const std::string& bucket) { + if (value.empty()) { + DEBUG_PRINT("value to prepend must be non-empty"); + return false; + } + uint8_t coll_id = 0; // default collection ID + if (collection_name != "_default") { + // check if the local cache is empty or not. + if (local_collection_manifest_cache->empty()) { + // if local cache is empty, goto global cache or fetch from server + if (!getCachedOrFetchCollectionId( + collection_name, &coll_id, metadata_tracking, channel, server, + bucket, local_collection_manifest_cache)) { + return false; + } + } + // check if the collection id is available in the local cache + else if (!getLocalCachedCollectionId(bucket, "_default", collection_name, + &coll_id)) { + // if not check in the global cache or fetch from server + if (!getCachedOrFetchCollectionId( + collection_name, &coll_id, metadata_tracking, channel, server, + bucket, local_collection_manifest_cache)) { + return false; + } + } + } + return store(policy::CB_BINARY_PREPEND, key, value, flags, exptime, cas_value, + coll_id); +} + +bool CouchbaseOperations::CouchbaseResponse::popAuthenticate( + uint64_t* cas_value) { + return popStore(policy::CB_BINARY_SASL_AUTH, cas_value); +} +bool CouchbaseOperations::CouchbaseResponse::popHello(uint64_t* cas_value) { + return popStore(policy::CB_HELLO_SELECT_FEATURES, cas_value); +} +bool CouchbaseOperations::CouchbaseResponse::popUpsert(uint64_t* cas_value) { + return popStore(policy::CB_BINARY_SET, cas_value); +} +bool CouchbaseOperations::CouchbaseResponse::popAdd(uint64_t* cas_value) { + return popStore(policy::CB_BINARY_ADD, cas_value); +} +// Warning: Not tested +// bool CouchbaseOperations::CouchbaseResponse::PopReplace(uint64_t* cas_value) +// { +// return popStore(policy::CB_BINARY_REPLACE, cas_value); +// } +bool CouchbaseOperations::CouchbaseResponse::popAppend(uint64_t* cas_value) { + return popStore(policy::CB_BINARY_APPEND, cas_value); +} +bool CouchbaseOperations::CouchbaseResponse::popPrepend(uint64_t* cas_value) { + return popStore(policy::CB_BINARY_PREPEND, cas_value); +} +bool CouchbaseOperations::CouchbaseResponse::popSelectBucket( + uint64_t* cas_value) { + if (popStore(policy::CB_SELECT_BUCKET, cas_value) == false) { + DEBUG_PRINT("Failed to select bucket: " << _err); + return false; + } + // Note: Bucket tracking is now handled at CouchbaseOperations level, not + // per-thread + return true; +} +// Collection-related response method +bool CouchbaseOperations::CouchbaseResponse::popCollectionId( + uint8_t* collection_id) { + const size_t n = _buf.size(); + policy::CouchbaseResponseHeader header; + if (n < sizeof(header)) { + butil::string_printf(&_err, "buffer is too small to contain a header"); + return false; + } + _buf.copy_to(&header, sizeof(header)); + + if (header.command != policy::CB_COLLECTIONS_GET_CID) { + butil::string_printf(&_err, "Not a collection ID response"); + return false; + } + + // Making sure buffer has the whole body (extras + key + value) + if (n < sizeof(header) + header.total_body_length) { + butil::string_printf(&_err, "Not enough data"); + return false; + } + + if (header.status != 0) { + // handle error case + _buf.pop_front(sizeof(header) + header.extras_length + header.key_length); + // Possibly read error message from value if present + size_t value_size = + header.total_body_length - header.extras_length - header.key_length; + if (value_size > 0) { + std::string err_msg; + _buf.cutn(&err_msg, value_size); + _err = + formatErrorMessage(header.status, "Collection ID request", err_msg); + } else { + _err = formatErrorMessage(header.status, "Collection ID request"); + } + return false; + } + + // Success case: we expect extras_length >= 12 (8 bytes manifest + 4 bytes + // collection id) + if (header.extras_length < 12) { + butil::string_printf(&_err, "Extras too small to contain collection ID"); + // remove the response from buffer so you don't re‐process + _buf.pop_front(sizeof(header) + header.total_body_length); + return false; + } + + // Skip header + _buf.pop_front(sizeof(header)); + + // return true; + uint64_t manifest_id_net = 0; + _buf.copy_to(reinterpret_cast(&manifest_id_net), + sizeof(manifest_id_net)); + // You may convert this if needed: + uint64_t manifest_id = butil::NetToHost64(manifest_id_net); + DEBUG_PRINT("Manifest ID: " << manifest_id); + _buf.pop_front(sizeof(manifest_id_net)); + + // Next 1 bytes → collection ID (u8) + uint32_t cid_net = 0; + _buf.copy_to(reinterpret_cast(&cid_net), sizeof(cid_net)); + uint8_t cid_host = butil::NetToHost32(cid_net); + *collection_id = static_cast(cid_host); + _buf.pop_front(sizeof(cid_net)); + + _buf.pop_front(header.total_body_length); + _err.clear(); + return true; +} + +bool CouchbaseOperations::CouchbaseResponse::popManifest( + std::string* manifest_json) { + const size_t n = _buf.size(); + policy::CouchbaseResponseHeader header; + if (n < sizeof(header)) { + butil::string_printf(&_err, "buffer is too small to contain a header"); + return false; + } + _buf.copy_to(&header, sizeof(header)); + + if (header.command != policy::CB_GET_COLLECTIONS_MANIFEST) { + butil::string_printf(&_err, "Not a get collections manifest response"); + return false; + } + + // Making sure buffer has the whole body (extras + key + value) + if (n < sizeof(header) + header.total_body_length) { + butil::string_printf(&_err, "Not enough data"); + return false; + } + + if (header.status != 0) { + // handle error case + if (header.extras_length != 0) { + DEBUG_PRINT("Get Collections Manifest response must not have extras"); + } + if (header.key_length != 0) { + DEBUG_PRINT("Get Collections Manifest response must not have key"); + } + _buf.pop_front(sizeof(header) + header.extras_length + header.key_length); + // Possibly read error message from value if present + size_t value_size = + header.total_body_length - header.extras_length - header.key_length; + if (value_size > 0) { + std::string err_msg; + _buf.cutn(&err_msg, value_size); + _err = formatErrorMessage(header.status, "Get Collections Manifest", + err_msg); + } else { + _err = formatErrorMessage(header.status, "Get Collections Manifest"); + } + return false; + } + + // Success case: the manifest should be in the value section + size_t value_size = + header.total_body_length - header.extras_length - header.key_length; + if (value_size == 0) { + butil::string_printf(&_err, "No manifest data in response"); + _buf.pop_front(sizeof(header) + header.total_body_length); + return false; + } + + // Skip header and any extras/key + _buf.pop_front(sizeof(header) + header.extras_length + header.key_length); + + // Read the manifest JSON from the value section + _buf.cutn(manifest_json, value_size); + + _err.clear(); + return true; +} + +struct IncrHeaderWithExtras { + policy::CouchbaseRequestHeader header; + uint64_t delta; + uint64_t initial_value; + uint32_t exptime; +} __attribute__((packed)); +BAIDU_CASSERT(sizeof(IncrHeaderWithExtras) == 44, must_match); + +const size_t INCR_EXTRAS = + sizeof(IncrHeaderWithExtras) - sizeof(policy::CouchbaseRequestHeader); + +// MUST have extras. +// MUST have key. +// MUST NOT have value. +// Extra data for incr/decr: +// Byte/ 0 | 1 | 2 | 3 | +// / | | | | +// |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| +// +---------------+---------------+---------------+---------------+ +// 0| Delta to add / subtract | +// | | +// +---------------+---------------+---------------+---------------+ +// 8| Initial value | +// | | +// +---------------+---------------+---------------+---------------+ +// 16| Expiration | +// +---------------+---------------+---------------+---------------+ +// Total 20 bytes +bool CouchbaseOperations::CouchbaseRequest::counter( + uint8_t command, const butil::StringPiece& key, uint64_t delta, + uint64_t initial_value, uint32_t exptime) { + IncrHeaderWithExtras header_with_extras = { + {policy::CB_MAGIC_REQUEST, command, butil::HostToNet16(key.size()), + INCR_EXTRAS, policy::CB_BINARY_RAW_BYTES, 0, + butil::HostToNet32(INCR_EXTRAS + key.size()), 0, 0}, + butil::HostToNet64(delta), + butil::HostToNet64(initial_value), + butil::HostToNet32(exptime)}; + if (_buf.append(&header_with_extras, sizeof(header_with_extras))) { + return false; + } + if (_buf.append(key.data(), key.size())) { + return false; + } + ++_pipelined_count; + return true; +} + +// MUST NOT have extras. +// MUST NOT have key. +// MUST have value. +// Byte/ 0 | 1 | 2 | 3 | +// / | | | | +// |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| +// +---------------+---------------+---------------+---------------+ +// 0| 64-bit unsigned response. | +// | | +// +---------------+---------------+---------------+---------------+ +// Total 8 bytes +bool CouchbaseOperations::CouchbaseResponse::popCounter(uint8_t command, + uint64_t* new_value, + uint64_t* cas_value) { + const size_t n = _buf.size(); + policy::CouchbaseResponseHeader header; + if (n < sizeof(header)) { + butil::string_printf(&_err, "buffer is too small to contain a header"); + return false; + } + _buf.copy_to(&header, sizeof(header)); + if (header.command != command) { + butil::string_printf(&_err, "not a INCR/DECR response"); + return false; + } + if (n < sizeof(header) + header.total_body_length) { + butil::string_printf(&_err, "response=%u < header=%u + body=%u", + (unsigned)n, (unsigned)sizeof(header), + header.total_body_length); + return false; + } + if (DBUG && header.extras_length != 0) { + DEBUG_PRINT("INCR/DECR response must not have flags"); + } + if (DBUG && header.key_length != 0) { + DEBUG_PRINT("INCR/DECR response must not have key"); + } + const int value_size = (int)header.total_body_length - + (int)header.extras_length - (int)header.key_length; + _buf.pop_front(sizeof(header) + header.extras_length + header.key_length); + + if (header.status != (uint16_t)STATUS_SUCCESS) { + if (value_size < 0) { + butil::string_printf(&_err, "value_size=%d is negative", value_size); + } else { + if (value_size > 0) { + std::string error_msg; + _buf.cutn(&error_msg, value_size); + _err = + formatErrorMessage(header.status, "Counter operation", error_msg); + } else { + _err = formatErrorMessage(header.status, "Counter operation"); + } + } + return false; + } + if (value_size != 8) { + butil::string_printf(&_err, "value_size=%d is not 8", value_size); + return false; + } + uint64_t raw_value = 0; + _buf.cutn(&raw_value, sizeof(raw_value)); + *new_value = butil::NetToHost64(raw_value); + if (cas_value) { + *cas_value = header.cas_value; + } + _err.clear(); + return true; +} + +// MUST NOT have extras. +// MUST NOT have key. +// MUST NOT have value. +bool CouchbaseOperations::CouchbaseRequest::versionRequest() { + const policy::CouchbaseRequestHeader header = {policy::CB_MAGIC_REQUEST, + policy::CB_BINARY_VERSION, + 0, + 0, + policy::CB_BINARY_RAW_BYTES, + 0, + 0, + 0, + 0}; + if (_buf.append(&header, sizeof(header))) { + return false; + } + ++_pipelined_count; + return true; +} + +bool CouchbaseOperations::CouchbaseResponse::popVersion(std::string* version) { + const size_t n = _buf.size(); + policy::CouchbaseResponseHeader header; + if (n < sizeof(header)) { + butil::string_printf(&_err, "buffer is too small to contain a header"); + return false; + } + _buf.copy_to(&header, sizeof(header)); + if (header.command != policy::CB_BINARY_VERSION) { + butil::string_printf(&_err, "not a VERSION response"); + return false; + } + if (n < sizeof(header) + header.total_body_length) { + butil::string_printf(&_err, "response=%u < header=%u + body=%u", + (unsigned)n, (unsigned)sizeof(header), + header.total_body_length); + return false; + } + if (DBUG && header.extras_length != 0) { + DEBUG_PRINT("VERSION response must not have flags"); + } + if (DBUG && header.key_length != 0) { + DEBUG_PRINT("VERSION response must not have key"); + } + const int value_size = (int)header.total_body_length - + (int)header.extras_length - (int)header.key_length; + _buf.pop_front(sizeof(header) + header.extras_length + header.key_length); + if (value_size < 0) { + butil::string_printf(&_err, "value_size=%d is negative", value_size); + return false; + } + if (header.status != (uint16_t)STATUS_SUCCESS) { + if (value_size > 0) { + std::string error_msg; + _buf.cutn(&error_msg, value_size); + _err = formatErrorMessage(header.status, "Version request", error_msg); + } else { + _err = formatErrorMessage(header.status, "Version request"); + } + return false; + } + if (version) { + version->clear(); + _buf.cutn(version, value_size); + } + _err.clear(); + return true; +} + +bool sendRequest(CouchbaseOperations::operation_type op_type, const std::string& key, + const std::string& value, std::string collection_name, + CouchbaseOperations::Result* result, brpc::Channel* channel, + const std::string& server, const std::string& bucket, + CouchbaseOperations::CouchbaseRequest* request, + CouchbaseOperations::CouchbaseResponse* response) { + if (channel == nullptr) { + DEBUG_PRINT("No channel found, make sure to call Authenticate() first"); + result->error_message = + "No channel found, make sure to call Authenticate() first"; + return false; + } + if (server.empty()) { + DEBUG_PRINT("Server is empty, make sure to call Authenticate() first"); + result->error_message = + "Server is empty, make sure to call Authenticate() first"; + return false; + } + if (bucket.empty()) { + DEBUG_PRINT("No bucket selected, make sure to call SelectBucket() first"); + result->error_message = + "No bucket selected, make sure to call SelectBucket() first"; + return false; + } + brpc::Controller cntl; + bool request_created = false; + switch (op_type) { + case CouchbaseOperations::GET: + request_created = + request->getRequest(key, collection_name, channel, server, bucket); + break; + case CouchbaseOperations::UPSERT: + request_created = request->upsertRequest( + key, value, 0, 0, 0, collection_name, channel, server, bucket); + break; + case CouchbaseOperations::ADD: + request_created = request->addRequest( + key, value, 0, 0, 0, collection_name, channel, server, bucket); + break; + case CouchbaseOperations::APPEND: + request_created = request->appendRequest( + key, value, 0, 0, 0, collection_name, channel, server, bucket); + break; + case CouchbaseOperations::PREPEND: + request_created = request->prependRequest( + key, value, 0, 0, 0, collection_name, channel, server, bucket); + break; + case CouchbaseOperations::DELETE: + request_created = + request->deleteRequest(key, collection_name, channel, server, bucket); + break; + default: + DEBUG_PRINT("Unsupported operation type"); + result->success = false; + result->value = ""; + result->error_message = "Unsupported operation type"; + return false; + } + if (!request_created) { + DEBUG_PRINT("CollectionID does not exist." << op_type); + result->success = false; + result->value = ""; + result->error_message = + "CollectionID does not exist." + std::to_string(op_type); + result->status_code = 0x88; // using 0x88 as the only possible failure code + // that indicates the collectionID is not found + return false; + } + channel->CallMethod(NULL, &cntl, request, response, NULL); + if (cntl.Failed()) { + DEBUG_PRINT("Failed to perform operation on key: " + << key << " to Couchbase: " << cntl.ErrorText()); + result->success = false; + result->value = ""; + result->error_message = cntl.ErrorText(); + return false; + } + if (op_type == CouchbaseOperations::GET) { + std::string value; + uint32_t flags = 0; + uint64_t cas = 0; + if (response->popGet(&value, &flags, &cas) == false) { + result->success = false; + result->value = ""; + result->error_message = response->lastError(); + result->status_code = response->_status_code; + if (result->status_code == 0x88) { + DEBUG_PRINT( + "CollectionID does not exist on server, need to refresh collection " + "manifest from server"); + // could have called sendRequest recursively, + // but if somehow the collectionID keeps on chaning, it would lead to + // infinite recursion and stack overflow in the end. so we retry once + // here instead and return failure if it still fails. + + // (0x88) unknown collection, this means that the collection_manifest + // has been updated on the server side. The collectionID present in the + // local cache/global cache is no longer valid. This can happen if a + // collection is deleted and recreated with the same name. + if (!request->metadata_tracking->refreshCollectionManifest( + channel, server, bucket, + request->local_collection_manifest_cache)) { + DEBUG_PRINT("Failed to refresh collection manifest"); + result->error_message = "Failed to refresh collection manifest"; + } else { + DEBUG_PRINT("Successfully refreshed collection manifest"); + // retry the request; + request->Clear(); + response->Clear(); + cntl.Reset(); + if (!request->getRequest(key, collection_name, channel, server, + bucket)) { + DEBUG_PRINT("CollectionID does not exist."); + result->success = false; + result->value = ""; + result->error_message = "CollectionID does not exist."; + result->status_code = + 0x88; // using 0x88 as the only possible failure code that + // indicates the collectionID is not found + return false; + } + channel->CallMethod(NULL, &cntl, request, response, NULL); + if (cntl.Failed()) { + DEBUG_PRINT("Failed to perform operation on key: " + << key << " to Couchbase: " << cntl.ErrorText()); + result->success = false; + result->value = ""; + result->error_message = cntl.ErrorText(); + return false; // return on failure + } + if (response->popGet(&value, &flags, &cas) == false) { + result->success = false; + result->value = ""; + result->error_message = response->lastError(); + result->status_code = response->_status_code; + return false; // return on failure + } + // Successfully got the value after retry + result->success = true; + result->value = value; + result->status_code = 0; + return true; + } + } + return false; + } + // Successfully got the value + result->success = true; + result->value = value; + result->status_code = 0; + return true; + } else { + uint64_t cas_value = 0; + // pop response on the basis of operation type + bool pop_success = false; + switch (op_type) { + case CouchbaseOperations::UPSERT: + pop_success = response->popUpsert(&cas_value); + break; + case CouchbaseOperations::ADD: + pop_success = response->popAdd(&cas_value); + break; + case CouchbaseOperations::APPEND: + pop_success = response->popAppend(&cas_value); + break; + case CouchbaseOperations::PREPEND: + pop_success = response->popPrepend(&cas_value); + break; + case CouchbaseOperations::DELETE: + pop_success = response->popDelete(); + break; + default: + DEBUG_PRINT("Unsupported operation type in response pop"); + result->success = false; + result->value = ""; + result->error_message = "Unsupported operation type in response pop"; + return false; + } + if (!pop_success) { + result->success = false; + result->value = ""; + result->error_message = response->lastError(); + result->status_code = response->_status_code; + if (result->status_code == 0x88) { + // (0x88) unknown collection, this typically means that the + // collection_manifest has been updated on the server side. and the + // client have a stale copy of collection manifest. In this case, we + // need to refresh the collection manifest and retry the operation. + if (!request->metadata_tracking->refreshCollectionManifest( + channel, server, bucket, + request->local_collection_manifest_cache)) { + DEBUG_PRINT("Failed to refresh collection manifest"); + result->error_message = "Failed to refresh collection manifest"; + return false; + } + // could have called sendRequest recursively, + // but if somehow the collectionID keeps on chaning, it would lead to + // infinite recursion and stack overflow in the end. so we retry once + // here instead and return failure if it still fails. + DEBUG_PRINT("Successfully refreshed collection manifest"); + // retry the request; + request->Clear(); + response->Clear(); + switch (op_type) { + case CouchbaseOperations::UPSERT: + request->upsertRequest(key, value, 0, 0, 0, collection_name, + channel, server, bucket); + break; + case CouchbaseOperations::ADD: + request->addRequest(key, value, 0, 0, 0, collection_name, channel, + server, bucket); + break; + case CouchbaseOperations::APPEND: + request->appendRequest(key, value, 0, 0, 0, collection_name, + channel, server, bucket); + break; + case CouchbaseOperations::PREPEND: + request->prependRequest(key, value, 0, 0, 0, collection_name, + channel, server, bucket); + break; + case CouchbaseOperations::DELETE: + request->deleteRequest(key, collection_name, channel, server, + bucket); + break; + default: + DEBUG_PRINT("Unsupported operation type in response pop"); + result->success = false; + result->value = ""; + result->error_message = + "Unsupported operation type in response pop"; + return false; + } + channel->CallMethod(NULL, &cntl, request, response, NULL); + if (cntl.Failed()) { + DEBUG_PRINT("Failed to perform operation on key: " + << key << " to Couchbase: " << cntl.ErrorText()); + result->success = false; + result->value = ""; + result->error_message = cntl.ErrorText(); + return false; // return on failure + } + pop_success = false; + switch (op_type) { + case CouchbaseOperations::UPSERT: + pop_success = response->popUpsert(&cas_value); + break; + case CouchbaseOperations::ADD: + pop_success = response->popAdd(&cas_value); + break; + case CouchbaseOperations::APPEND: + pop_success = response->popAppend(&cas_value); + break; + case CouchbaseOperations::PREPEND: + pop_success = response->popPrepend(&cas_value); + break; + case CouchbaseOperations::DELETE: + pop_success = response->popDelete(); + break; + default: + DEBUG_PRINT("Unsupported operation type in response pop"); + result->success = false; + result->value = ""; + result->error_message = + "Unsupported operation type in response pop"; + return false; + } + if (!pop_success) { + result->success = false; + result->value = ""; + result->error_message = response->lastError(); + result->status_code = response->_status_code; + return false; // return on failure + } + // Successfully performed the operation after retry + result->success = true; + result->value = ""; + result->status_code = 0; + return true; + } + return false; + } + // Successfully performed the operation + // Note: For operations other than GET, we don't have a value to return + // so we return empty std::string for value. + result->success = true; + result->value = ""; + result->status_code = 0; + return true; + } +} +CouchbaseOperations::Result CouchbaseOperations::get(const std::string& key, + std::string collection_name) { + // create CouchbaseRequest and CouchbaseResponse objects and then using the + // channel which is created for this thread in authenticate() use it to call() + CouchbaseRequest request(&local_bucket_to_collection_manifest_); + CouchbaseResponse response; + brpc::Controller cntl; + CouchbaseOperations::Result result; + sendRequest(CouchbaseOperations::GET, key, "", collection_name, &result, + channel_, server_address_, selected_bucket_, &request, &response); + return result; +} + +bool CouchbaseOperations::CouchbaseRequest::getLocalCachedCollectionId( + const std::string& bucket, const std::string& scope, const std::string& collection, + uint8_t* collection_id) { + if (bucket.empty() || scope.empty() || collection.empty()) { + DEBUG_PRINT("Bucket, scope, and collection names must be non-empty"); + return false; + } + auto it = local_collection_manifest_cache->find(bucket); + if (it != local_collection_manifest_cache->end()) { + CouchbaseManifestManager::CollectionManifest& manifest = it->second; + if (manifest.scope_to_collection_id_map.find(scope) != + manifest.scope_to_collection_id_map.end()) { + auto& collection_map = manifest.scope_to_collection_id_map[scope]; + if (collection_map.find(collection) != collection_map.end()) { + *collection_id = collection_map[collection]; + return true; + } else { + DEBUG_PRINT("Collection name not found in local cache: " << collection); + return false; + } + } else { + DEBUG_PRINT("Scope name not found in local cache: " << scope); + return false; + } + } else { + DEBUG_PRINT("Bucket name not found in local cache: " << bucket); + return false; + } +} + +CouchbaseOperations::Result CouchbaseOperations::upsert( + const std::string& key, const std::string& value, std::string collection_name) { + CouchbaseRequest request(&local_bucket_to_collection_manifest_); + CouchbaseResponse response; + brpc::Controller cntl; + CouchbaseOperations::Result result; + sendRequest(CouchbaseOperations::UPSERT, key, value, collection_name, &result, + channel_, server_address_, selected_bucket_, &request, &response); + return result; +} + +CouchbaseOperations::Result CouchbaseOperations::delete_( + const std::string& key, std::string collection_name) { + CouchbaseRequest request(&local_bucket_to_collection_manifest_); + CouchbaseResponse response; + brpc::Controller cntl; + CouchbaseOperations::Result result; + if (!sendRequest(CouchbaseOperations::DELETE, key, "", collection_name, + &result, channel_, server_address_, selected_bucket_, + &request, &response)) { + return result; + } + return result; +} + +CouchbaseOperations::Result CouchbaseOperations::add(const std::string& key, + const std::string& value, + std::string collection_name) { + CouchbaseRequest request(&local_bucket_to_collection_manifest_); + CouchbaseResponse response; + brpc::Controller cntl; + CouchbaseOperations::Result result; + sendRequest(CouchbaseOperations::ADD, key, value, collection_name, &result, + channel_, server_address_, selected_bucket_, &request, &response); + return result; +} + +CouchbaseOperations::Result CouchbaseOperations::authenticate( + const std::string& username, const std::string& password, + const std::string& server_address, const std::string& bucket_name) { + return authenticateAll(username, password, server_address, bucket_name, false, + ""); +} + +CouchbaseOperations::Result CouchbaseOperations::authenticateSSL( + const std::string& username, const std::string& password, + const std::string& server_address, const std::string& bucket_name, + std::string path_to_cert) { + return authenticateAll(username, password, server_address, bucket_name, true, + path_to_cert); +} + +CouchbaseOperations::Result CouchbaseOperations::authenticateAll( + const std::string& username, const std::string& password, + const std::string& server_address, const std::string& bucket_name, bool enable_ssl, + std::string path_to_cert) { + // Create a channel to the Couchbase server + brpc::ChannelOptions options; + options.protocol = brpc::PROTOCOL_COUCHBASE; + options.connection_type = "single"; + options.timeout_ms = 1000; // 1 second + options.max_retry = 3; + + // CRITICAL: Set unique connection_group to prevent connection sharing + // Each CouchbaseOperations instance connected to same bucket gets its own + // connection group + options.connection_group = server_address + bucket_name; + + // enable_ssl + if (enable_ssl) { + brpc::ChannelSSLOptions* ssl_options = options.mutable_ssl_options(); + ssl_options->sni_name = server_address; + ssl_options->verify.verify_depth = + 1; // Enable certificate verification, to disable SSL set it to 0 + ssl_options->verify.ca_file_path = + path_to_cert; // Path to your downloaded TLS certificate + } + CouchbaseOperations::Result result; + brpc::Channel* new_channel = new brpc::Channel(); + if (new_channel->Init(server_address.c_str(), &options) != 0) { + DEBUG_PRINT("Failed to initialize Couchbase channel to " << server_address); + delete new_channel; + result.success = false; + result.value = ""; + result.error_message = "Failed to initialize Couchbase channel"; + return result; + } + // Create a CouchbaseRequest and CouchbaseResponse for authentication + CouchbaseRequest request; + CouchbaseResponse response; + brpc::Controller cntl; + if (request.authenticateRequest(username.c_str(), password.c_str()) == + false) { + DEBUG_PRINT("Failed to create Authenticate request for user: " << username); + delete new_channel; + result.success = false; + return result; + } + new_channel->CallMethod(NULL, &cntl, &request, &response, NULL); + if (cntl.Failed()) { + DEBUG_PRINT("Failed to access Couchbase: " << cntl.ErrorText()); + delete new_channel; + result.success = false; + result.value = ""; + result.error_message = cntl.ErrorText(); + return result; + } + uint64_t cas; + if (response.popHello(&cas) == false) { + DEBUG_PRINT("Failed to receive HELO response from Couchbase: " + << response.lastError()); + delete new_channel; + result.success = false; + result.value = ""; + result.error_message = response.lastError(); + result.status_code = response._status_code; + return result; + } + if (response.popAuthenticate(&cas) == false) { + DEBUG_PRINT("Failed to authenticate user: " << username << " to Couchbase: " + << response.lastError()); + result.success = false; + result.value = ""; + result.error_message = response.lastError(); + result.status_code = response._status_code; + return result; + } + // Successfully authenticated + channel_ = new_channel; + this->server_address_ = server_address; + result.success = true; + result.status_code = 0; + + DEBUG_PRINT("Instance " << reinterpret_cast(this) + << " authenticated with unique connection_group:" + << server_address_ + bucket_name); + + // select the bucket + result = selectBucket(bucket_name); + return result; +} + +CouchbaseOperations::Result CouchbaseOperations::selectBucket( + const std::string& bucket_name) { + CouchbaseRequest request(&local_bucket_to_collection_manifest_); + CouchbaseResponse response; + brpc::Controller cntl; + CouchbaseOperations::Result result; + if (request.selectBucketRequest(bucket_name.c_str()) == false) { + DEBUG_PRINT( + "Failed to create Select Bucket request for bucket: " << bucket_name); + result.success = false; + result.value = ""; + return result; + } + channel_->CallMethod(NULL, &cntl, &request, &response, NULL); + if (cntl.Failed()) { + DEBUG_PRINT("Failed to select bucket: " + << bucket_name << " from Couchbase: " << cntl.ErrorText()); + result.success = false; + result.value = ""; + result.error_message = cntl.ErrorText(); + return result; + } + if (response.popSelectBucket(NULL) == false) { + result.success = false; + result.value = ""; + result.error_message = response.lastError(); + result.status_code = response._status_code; + return result; + } + // Successfully selected the bucket + selected_bucket_ = bucket_name; + result.success = true; + result.value = ""; + result.status_code = 0; + + // fetch the collection manifest for this bucket and store it in local cache + if (request.local_collection_manifest_cache->find(bucket_name) == + request.local_collection_manifest_cache->end()) { + // only fetch if not already present in the local cache + CouchbaseManifestManager::CollectionManifest manifest; + if (!common_metadata_tracking.getBucketToCollectionManifest( + server_address_, bucket_name, &manifest)) { + DEBUG_PRINT("Collection manifest for bucket: " + << bucket_name + << " not found in global cache, the local cache"); + + // manifest for this bucket/server is not cached yet, will fetch it from + // server now. refresh will also update the local cache with the fetched + // manifest + request.metadata_tracking->refreshCollectionManifest( + channel_, server_address_, bucket_name, + request.local_collection_manifest_cache); + // We simply try once to prefetch the manifest, before any collection + // operation. If it fails, it will be lazily updated when a collection + // operation is performed. + } else { + // update the local cache with the cache manifest from global + // cache(common_metadata_tracking) + DEBUG_PRINT("Updated local cache collection manifest for bucket: " + << bucket_name); + (*request.local_collection_manifest_cache)[bucket_name] = manifest; + } + } else { + DEBUG_PRINT("Collection manifest for bucket: " + << bucket_name << " already present in local cache"); + } + DEBUG_PRINT("Bucket selected successfully " << bucket_name); + return result; +} + +CouchbaseOperations::Result CouchbaseOperations::append( + const std::string& key, const std::string& value, std::string collection_name) { + CouchbaseRequest request(&local_bucket_to_collection_manifest_); + CouchbaseResponse response; + brpc::Controller cntl; + CouchbaseOperations::Result result; + sendRequest(CouchbaseOperations::APPEND, key, value, collection_name, &result, + channel_, server_address_, selected_bucket_, &request, &response); + return result; +} + +CouchbaseOperations::Result CouchbaseOperations::prepend( + const std::string& key, const std::string& value, std::string collection_name) { + CouchbaseRequest request(&local_bucket_to_collection_manifest_); + CouchbaseResponse response; + brpc::Controller cntl; + CouchbaseOperations::Result result; + sendRequest(CouchbaseOperations::PREPEND, key, value, collection_name, + &result, channel_, server_address_, selected_bucket_, &request, + &response); + return result; +} + +CouchbaseOperations::Result CouchbaseOperations::version() { + CouchbaseRequest request; + CouchbaseResponse response; + brpc::Controller cntl; + CouchbaseOperations::Result result; + if (request.versionRequest() == false) { + DEBUG_PRINT("Failed to create Version request"); + result.success = false; + result.value = ""; + return result; + } + channel_->CallMethod(NULL, &cntl, &request, &response, NULL); + if (cntl.Failed()) { + DEBUG_PRINT("Failed to get version from Couchbase: " << cntl.ErrorText()); + result.success = false; + result.value = ""; + result.error_message = cntl.ErrorText(); + return result; + } + std::string version; + if (response.popVersion(&version) == false) { + result.success = false; + result.value = ""; + result.error_message = response.lastError(); + result.status_code = response._status_code; + return result; + } + // Successfully got version + result.success = true; + result.value = version; + result.status_code = 0; + return result; +} + +bool CouchbaseOperations::beginPipeline() { + if (pipeline_active) { + DEBUG_PRINT("Pipeline already active. Call clearPipeline() first."); + return false; + } + + // Clear any previous state + while (!pipeline_operations_queue.empty()) { + pipeline_operations_queue.pop(); + } + pipeline_request_couchbase_req.Clear(); + + pipeline_active = true; + return true; +} + +bool CouchbaseOperations::pipelineRequest(operation_type op_type, + const std::string& key, + const std::string& value, + std::string collection_name) { + if (!pipeline_active) { + DEBUG_PRINT("Pipeline not active. Call beginPipeline() first."); + return false; + } + + switch (op_type) { + case GET: + if (pipeline_request_couchbase_req.getRequest( + key, collection_name, channel_, server_address_, + selected_bucket_) == false) { + return false; + } + pipeline_operations_queue.push(GET); + break; + case UPSERT: + if (pipeline_request_couchbase_req.upsertRequest( + key, value, 0, 0, 0, collection_name, channel_, server_address_, + selected_bucket_) == false) { + return false; + } + pipeline_operations_queue.push(UPSERT); + break; + case ADD: + if (pipeline_request_couchbase_req.addRequest( + key, value, 0, 0, 0, collection_name, channel_, server_address_, + selected_bucket_) == false) { + return false; + } + pipeline_operations_queue.push(ADD); + break; + case APPEND: + if (pipeline_request_couchbase_req.appendRequest( + key, value, 0, 0, 0, collection_name, channel_, server_address_, + selected_bucket_) == false) { + return false; + } + pipeline_operations_queue.push(APPEND); + break; + case PREPEND: + if (pipeline_request_couchbase_req.prependRequest( + key, value, 0, 0, 0, collection_name, channel_, server_address_, + selected_bucket_) == false) { + return false; + } + pipeline_operations_queue.push(PREPEND); + break; + case DELETE: + if (pipeline_request_couchbase_req.deleteRequest( + key, collection_name, channel_, server_address_, + selected_bucket_) == false) { + return false; + } + pipeline_operations_queue.push(DELETE); + break; + default: + DEBUG_PRINT("Invalid operation type for pipelining"); + return false; + } + return true; +} +std::vector CouchbaseOperations::executePipeline() { + std::vector results; + + if (!pipeline_active || pipeline_operations_queue.empty()) { + DEBUG_PRINT("No pipeline active or no operations queued"); + return results; + } + + brpc::Controller cntl; + channel_->CallMethod(NULL, &cntl, &pipeline_request_couchbase_req, + &pipeline_response_couchbase_resp, NULL); + + if (cntl.Failed()) { + DEBUG_PRINT("Pipeline execution failed: " << cntl.ErrorText()); + // Create failure results for all operations + size_t op_count = pipeline_operations_queue.size(); + results.reserve(op_count); + + CouchbaseOperations::Result failure_result; + failure_result.success = false; + failure_result.error_message = cntl.ErrorText(); + + for (size_t i = 0; i < op_count; ++i) { + results.push_back(failure_result); + } + + clearPipeline(); + return results; + } + + // Process each operation in the order they were added + CouchbaseOperations::CouchbaseResponse* response = + &pipeline_response_couchbase_resp; + while (!pipeline_operations_queue.empty()) { + CouchbaseOperations::Result result; + operation_type op_type = pipeline_operations_queue.front(); + pipeline_operations_queue.pop(); + switch (op_type) { + case GET: { + std::string value; + uint32_t flags = 0; + uint64_t cas = 0; + if (response->popGet(&value, &flags, &cas) == false) { + result.success = false; + result.value = ""; + result.error_message = response->lastError(); + result.status_code = response->_status_code; + } else { + result.success = true; + result.value = value; + } + results.push_back(result); + break; + } + case UPSERT: { + if (response->popUpsert(NULL) == false) { + result.success = false; + result.value = ""; + result.error_message = response->lastError(); + result.status_code = response->_status_code; + } else { + result.success = true; + result.value = ""; + } + results.push_back(result); + break; + } + case ADD: { + if (response->popAdd(NULL) == false) { + result.success = false; + result.value = ""; + result.error_message = response->lastError(); + result.status_code = response->_status_code; + } else { + result.success = true; + result.value = ""; + } + results.push_back(result); + break; + } + case APPEND: { + uint64_t cas_value; + if (response->popAppend(&cas_value) == false) { + result.success = false; + result.value = ""; + result.error_message = response->lastError(); + result.status_code = response->_status_code; + } else { + result.success = true; + result.value = ""; + } + results.push_back(result); + break; + } + case PREPEND: { + uint64_t cas_value; + if (response->popPrepend(&cas_value) == false) { + result.success = false; + result.value = ""; + result.error_message = response->lastError(); + result.status_code = response->_status_code; + } else { + result.success = true; + result.value = ""; + } + results.push_back(result); + break; + } + case DELETE: { + if (response->popDelete() == false) { + result.success = false; + result.value = ""; + result.error_message = response->lastError(); + result.status_code = response->_status_code; + } else { + result.success = true; + result.value = ""; + } + results.push_back(result); + break; + } + default: + DEBUG_PRINT("Invalid operation type in pipeline response processing"); + result.success = false; + result.value = ""; + result.error_message = "Invalid operation type"; + results.push_back(result); + break; + } + } + + pipeline_active = false; + pipeline_request_couchbase_req.Clear(); + + return results; +} + +bool CouchbaseOperations::clearPipeline() { + while (!pipeline_operations_queue.empty()) { + pipeline_operations_queue.pop(); + } + pipeline_request_couchbase_req.Clear(); + pipeline_active = false; + return true; +} +} // namespace brpc \ No newline at end of file diff --git a/src/brpc/couchbase.h b/src/brpc/couchbase.h new file mode 100644 index 0000000000..3e52f608d3 --- /dev/null +++ b/src/brpc/couchbase.h @@ -0,0 +1,517 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_COUCHBASE_H +#define BRPC_COUCHBASE_H + +#endif + +#include + +#include +#include +#include +#include +#include +#include + +#include "brpc/nonreflectable_message.h" +#include "brpc/pb_compat.h" +#include "butil/iobuf.h" +#include "butil/strings/string_piece.h" + +namespace brpc { + +// Forward declarations for friend functions +class InputMessageBase; +class Controller; +namespace policy { +void ProcessCouchbaseResponse(InputMessageBase* msg); +void SerializeCouchbaseRequest(butil::IOBuf* buf, Controller* cntl, + const google::protobuf::Message* request); +} // namespace policy + +// Simple C++11 compatible reader-writer lock +class ReaderWriterLock { + private: + std::mutex mutex_; + std::condition_variable reader_cv_; + std::condition_variable writer_cv_; + std::atomic reader_count_; + std::atomic writer_active_; + std::atomic waiting_writers_; + + public: + ReaderWriterLock() + : reader_count_(0), writer_active_(false), waiting_writers_(0) {} + + void lock_shared() { + std::unique_lock lock(mutex_); + reader_cv_.wait(lock, [this] { + return !writer_active_.load() && waiting_writers_.load() == 0; + }); + reader_count_.fetch_add(1); + } + + void unlock_shared() { + reader_count_.fetch_sub(1); + if (reader_count_.load() == 0) { + std::lock_guard lock(mutex_); + writer_cv_.notify_one(); + } + } + + void lock() { + std::unique_lock lock(mutex_); + waiting_writers_.fetch_add(1); + writer_cv_.wait(lock, [this] { + return !writer_active_.load() && reader_count_.load() == 0; + }); + waiting_writers_.fetch_sub(1); + writer_active_.store(true); + } + + void unlock() { + writer_active_.store(false); + std::lock_guard lock(mutex_); + writer_cv_.notify_one(); + reader_cv_.notify_all(); + } +}; + +// RAII helper classes +class SharedLock { + private: + ReaderWriterLock& lock_; + + public: + explicit SharedLock(ReaderWriterLock& lock) : lock_(lock) { + lock_.lock_shared(); + } + ~SharedLock() { lock_.unlock_shared(); } +}; + +class UniqueLock { + private: + ReaderWriterLock& lock_; + + public: + explicit UniqueLock(ReaderWriterLock& lock) : lock_(lock) { lock_.lock(); } + ~UniqueLock() { lock_.unlock(); } +}; + +// manager +class CouchbaseManifestManager { + public: + struct CollectionManifest { + std::string uid; // uid of the manifest, it can be used to track if the manifest + // is updated + std::unordered_map> + scope_to_collection_id_map; // scope -> (collection -> collection_id) + }; + + private: + std::unordered_map> + bucket_to_collection_manifest_; + ReaderWriterLock rw_bucket_to_collection_manifest_mutex_; + + public: + CouchbaseManifestManager() {} + ~CouchbaseManifestManager() { bucket_to_collection_manifest_.clear(); } + bool setBucketToCollectionManifest(std::string server, std::string bucket, + CollectionManifest manifest); + + bool getBucketToCollectionManifest(std::string server, std::string bucket, + CollectionManifest* manifest); + bool getManifestToCollectionId(CollectionManifest* manifest, std::string scope, + std::string collection, uint8_t* collection_id); + + bool jsonToCollectionManifest(const std::string& json, + CollectionManifest* manifest); + bool refreshCollectionManifest( + brpc::Channel* channel, const std::string& server, const std::string& bucket, + std::unordered_map* local_cache = nullptr); +} static common_metadata_tracking; +class CouchbaseOperations { + public: + enum operation_type { + GET = 1, + UPSERT = 2, + ADD = 3, + REPLACE = 4, + APPEND = 5, + PREPEND = 6, + DELETE = 7 + }; + struct Result { + bool success; + std::string error_message; + std::string value; + uint16_t status_code; // 0x00 if success + }; + Result get(const std::string& key, std::string collection_name = "_default"); + Result upsert(const std::string& key, const std::string& value, + std::string collection_name = "_default"); + Result add(const std::string& key, const std::string& value, + std::string collection_name = "_default"); + // Warning: Not tested + // Result replace(const std::string& key, const std::string& value, std::string + // collection_name = "_default"); + Result append(const std::string& key, const std::string& value, + std::string collection_name = "_default"); + Result prepend(const std::string& key, const std::string& value, + std::string collection_name = "_default"); + Result delete_(const std::string& key, std::string collection_name = "_default"); + // Warning: Not tested + // Result Increment(const string& key, uint64_t delta, uint64_t initial_value, + // uint32_t exptime, string collection_name = "_default"); Result + // Decrement(const string& key, uint64_t delta, uint64_t initial_value, + // uint32_t exptime, string collection_name = "_default"); Result Touch(const + // string& key, uint32_t exptime, string collection_name = "_default"); Result + // Flush(uint32_t timeout = 0); + Result version(); + Result authenticateSSL(const std::string& username, const std::string& password, + const std::string& server_address, + const std::string& bucket_name, std::string path_to_cert = ""); + Result authenticate(const std::string& username, const std::string& password, + const std::string& server_address, const std::string& bucket_name); + Result selectBucket(const std::string& bucket_name); + + // Pipeline management + bool beginPipeline(); + bool pipelineRequest(operation_type op_type, const std::string& key, + const std::string& value = "", + std::string collection_name = "_default"); + std::vector executePipeline(); // Return by value instead of pointer + bool clearPipeline(); + + // Pipeline status + bool isPipelineActive() const { return pipeline_active; } + size_t getPipelineSize() const { return pipeline_operations_queue.size(); } + + CouchbaseOperations() + : pipeline_request_couchbase_req(&local_bucket_to_collection_manifest_), + pipeline_active(false) {} + ~CouchbaseOperations() {} + bool getLocalCachedCollectionId(const std::string& bucket, const std::string& scope, + const std::string& collection, uint8_t* coll_id); + + private: + CouchbaseOperations::Result authenticateAll(const std::string& username, + const std::string& password, + const std::string& server_address, + const std::string& bucket_name, + bool enable_ssl, + std::string path_to_cert); + friend void policy::ProcessCouchbaseResponse(InputMessageBase* msg); + friend void policy::SerializeCouchbaseRequest( + butil::IOBuf* buf, Controller* cntl, + const google::protobuf::Message* request); + brpc::Channel* channel_; + std::string server_address_; + std::string selected_bucket_; + + std::unordered_map + local_bucket_to_collection_manifest_; + + public: + // these classes have been made public so that normal user can also create + // advanced bRPC programs as per their requirements. + class CouchbaseRequest : public NonreflectableMessage { + public: + static brpc::CouchbaseManifestManager* metadata_tracking; + int _pipelined_count; + butil::IOBuf _buf; + mutable int _cached_size_; + void sharedCtor(); + void sharedDtor(); + void setCachedSize(int size) const; + bool getOrDelete(uint8_t command, const butil::StringPiece& key, + uint8_t coll_id = 0); + bool counter(uint8_t command, const butil::StringPiece& key, uint64_t delta, + uint64_t initial_value, uint32_t exptime); + + bool store(uint8_t command, const butil::StringPiece& key, + const butil::StringPiece& value, uint32_t flags, + uint32_t exptime, uint64_t cas_value, uint8_t coll_id = 0); + uint32_t hashCrc32(const char* key, size_t key_length); + + public: + std::unordered_map* + local_collection_manifest_cache; + + CouchbaseRequest( + std::unordered_map* + local_cache_reference) + : NonreflectableMessage() { + metadata_tracking = &common_metadata_tracking; + local_collection_manifest_cache = local_cache_reference; + sharedCtor(); + } + CouchbaseRequest() : NonreflectableMessage() { + metadata_tracking = &common_metadata_tracking; + sharedCtor(); + } + ~CouchbaseRequest() { sharedDtor(); } + CouchbaseRequest(const CouchbaseRequest& from) + : NonreflectableMessage() { + metadata_tracking = &common_metadata_tracking; + sharedCtor(); + MergeFrom(from); + } + + inline CouchbaseRequest& operator=(const CouchbaseRequest& from) { + if (this != &from) { + MergeFrom(from); + } + return *this; + } + + bool selectBucketRequest(const butil::StringPiece& bucket_name); + bool authenticateRequest(const butil::StringPiece& username, + const butil::StringPiece& password); + bool helloRequest(); + + // Using GetCollectionManifest instead of fetching collection ID directly + // bool GetCollectionId(const butil::StringPiece& scope_name, + // const butil::StringPiece& collection_name); + + bool getScopeId(const butil::StringPiece& scope_name); + + bool getCollectionManifest(); + + bool getLocalCachedCollectionId(const std::string& bucket, const std::string& scope, + const std::string& collection, uint8_t* coll_id); + + bool getCachedOrFetchCollectionId( + std::string collection_name, uint8_t* coll_id, + brpc::CouchbaseManifestManager* metadata_tracking, + brpc::Channel* channel, const std::string& server, + const std::string& selected_bucket, + std::unordered_map* + local_cache); + + // Collection-aware document operations + bool getRequest(const butil::StringPiece& key, + std::string collection_name = "_default", + brpc::Channel* channel = nullptr, const std::string& server = "", + const std::string& bucket = ""); + + bool upsertRequest(const butil::StringPiece& key, + const butil::StringPiece& value, uint32_t flags, + uint32_t exptime, uint64_t cas_value, + std::string collection_name = "_default", + brpc::Channel* channel = nullptr, + const std::string& server = "", const std::string& bucket = ""); + + bool addRequest(const butil::StringPiece& key, + const butil::StringPiece& value, uint32_t flags, + uint32_t exptime, uint64_t cas_value, + std::string collection_name = "_default", + brpc::Channel* channel = nullptr, const std::string& server = "", + const std::string& bucket = ""); + + bool appendRequest(const butil::StringPiece& key, + const butil::StringPiece& value, uint32_t flags, + uint32_t exptime, uint64_t cas_value, + std::string collection_name = "_default", + brpc::Channel* channel = nullptr, + const std::string& server = "", const std::string& bucket = ""); + + bool prependRequest(const butil::StringPiece& key, + const butil::StringPiece& value, uint32_t flags, + uint32_t exptime, uint64_t cas_value, + std::string collection_name = "_default", + brpc::Channel* channel = nullptr, + const std::string& server = "", const std::string& bucket = ""); + + bool deleteRequest(const butil::StringPiece& key, + std::string collection_name = "_default", + brpc::Channel* channel = nullptr, + const std::string& server = "", const std::string& bucket = ""); + + bool versionRequest(); + + int pipelinedCount() const { return _pipelined_count; } + + butil::IOBuf& rawBuffer() { return _buf; } + const butil::IOBuf& rawBuffer() const { + return _buf; + } // used in couchbase_protocol serialization. + void Swap(CouchbaseRequest* other); + void MergeFrom(const CouchbaseRequest& from) override; + void Clear() override; + bool IsInitialized() const PB_527_OVERRIDE; + }; + + class CouchbaseResponse : public NonreflectableMessage { + public: + static brpc::CouchbaseManifestManager* metadata_tracking; + + private: + std::string _err; + butil::IOBuf _buf; + mutable int _cached_size_; + bool popCounter(uint8_t command, uint64_t* new_value, uint64_t* cas_value); + bool popStore(uint8_t command, uint64_t* cas_value); + + void sharedCtor(); + void sharedDtor(); + void setCachedSize(int size) const; + + public: + uint16_t _status_code; + + CouchbaseResponse() : NonreflectableMessage() { + sharedCtor(); + } + ~CouchbaseResponse() { sharedDtor(); } + CouchbaseResponse(const CouchbaseResponse& from) + : NonreflectableMessage() { + metadata_tracking = &common_metadata_tracking; + sharedCtor(); + MergeFrom(from); + } + inline CouchbaseResponse& operator=(const CouchbaseResponse& from) { + if (this != &from) { + MergeFrom(from); + } + return *this; + } + + // the status codes are from Couchbase Binary Protocol documentation, + // for original reference of status codes visit + // https://github.com/couchbase/kv_engine/blob/master/include/mcbp/protocol/status.h + enum Status { + STATUS_SUCCESS = 0x00, + STATUS_KEY_ENOENT = 0x01, + STATUS_KEY_EEXISTS = 0x02, + STATUS_E2BIG = 0x03, + STATUS_EINVAL = 0x04, + STATUS_NOT_STORED = 0x05, + STATUS_DELTA_BADVAL = 0x06, + STATUS_VBUCKET_BELONGS_TO_ANOTHER_SERVER = 0x07, + STATUS_AUTH_ERROR = 0x20, + STATUS_AUTH_CONTINUE = 0x21, + STATUS_ERANGE = 0x22, + STATUS_ROLLBACK = 0x23, + STATUS_EACCESS = 0x24, + STATUS_NOT_INITIALIZED = 0x25, + STATUS_UNKNOWN_COMMAND = 0x81, + STATUS_ENOMEM = 0x82, + STATUS_NOT_SUPPORTED = 0x83, + STATUS_EINTERNAL = 0x84, + STATUS_EBUSY = 0x85, + STATUS_ETMPFAIL = 0x86, + STATUS_UNKNOWN_COLLECTION = 0x88, + STATUS_NO_COLLECTIONS_MANIFEST = 0x89, + STATUS_CANNOT_APPLY_COLLECTIONS_MANIFEST = 0x8a, + STATUS_COLLECTIONS_MANIFEST_IS_AHEAD = 0x8b, + STATUS_UNKNOWN_SCOPE = 0x8c, + STATUS_DCP_STREAM_ID_INVALID = 0x8d, + STATUS_DURABILITY_INVALID_LEVEL = 0xa0, + STATUS_DURABILITY_IMPOSSIBLE = 0xa1, + STATUS_SYNC_WRITE_IN_PROGRESS = 0xa2, + STATUS_SYNC_WRITE_AMBIGUOUS = 0xa3, + STATUS_SYNC_WRITE_RE_COMMIT_IN_PROGRESS = 0xa4, + STATUS_SUBDOC_PATH_NOT_FOUND = 0xc0, + STATUS_SUBDOC_PATH_MISMATCH = 0xc1, + STATUS_SUBDOC_PATH_EINVAL = 0xc2, + STATUS_SUBDOC_PATH_E2BIG = 0xc3, + STATUS_SUBDOC_DOC_E2DEEP = 0xc4, + STATUS_SUBDOC_VALUE_CANTINSERT = 0xc5, + STATUS_SUBDOC_DOC_NOT_JSON = 0xc6, + STATUS_SUBDOC_NUM_E2BIG = 0xc7, + STATUS_SUBDOC_DELTA_E2BIG = 0xc8, + STATUS_SUBDOC_PATH_EEXISTS = 0xc9, + STATUS_SUBDOC_VALUE_E2DEEP = 0xca, + STATUS_SUBDOC_INVALID_COMBO = 0xcb, + STATUS_SUBDOC_MULTI_PATH_FAILURE = 0xcc, + STATUS_SUBDOC_SUCCESS_DELETED = 0xcd, + STATUS_SUBDOC_XATTR_INVALID_FLAG_COMBO = 0xce, + STATUS_SUBDOC_XATTR_INVALID_KEY_COMBO = 0xcf, + STATUS_SUBDOC_XATTR_UNKNOWN_MACRO = 0xd0, + STATUS_SUBDOC_XATTR_UNKNOWN_VATTR = 0xd1, + STATUS_SUBDOC_XATTR_CANT_MODIFY_VATTR = 0xd2, + STATUS_SUBDOC_MULTI_PATH_FAILURE_DELETED = 0xd3, + STATUS_SUBDOC_INVALID_XATTR_ORDER = 0xd4, + STATUS_SUBDOC_XATTR_UNKNOWN_VATTR_MACRO = 0xd5, + STATUS_SUBDOC_CAN_ONLY_REVIVE_DELETED_DOCUMENTS = 0xd6, + STATUS_SUBDOC_DELETED_DOCUMENT_CANT_HAVE_VALUE = 0xd7, + STATUS_XATTR_EINVAL = 0xe0 + }; + const char* couchbaseBinaryCommandToString(uint8_t cmd); + void MergeFrom(const CouchbaseResponse& from) override; + void Clear() override; + bool IsInitialized() const PB_527_OVERRIDE; + + butil::IOBuf& rawBuffer() { return _buf; } + static const char* statusStr(Status); + + // Helper method to format error messages with status codes + static std::string formatErrorMessage(uint16_t status_code, + const std::string& operation, + const std::string& error_msg = ""); + + // Add methods to handle response parsing + void swap(CouchbaseResponse* other); + bool popGet(butil::IOBuf* value, uint32_t* flags, uint64_t* cas_value); + bool popGet(std::string* value, uint32_t* flags, uint64_t* cas_value); + const std::string& lastError() const { return _err; } + bool popUpsert(uint64_t* cas_value); + bool popAdd(uint64_t* cas_value); + // Warning: Not tested + // bool popReplace(uint64_t* cas_value); + bool popAppend(uint64_t* cas_value); + bool popPrepend(uint64_t* cas_value); + bool popSelectBucket(uint64_t* cas_value); + bool popAuthenticate(uint64_t* cas_value); + bool popHello(uint64_t* cas_value); + + // Collection-related response methods + bool popCollectionId(uint8_t* collection_id); + + bool popManifest(std::string* manifest_json); + + bool popDelete(); + // Warning: Not tested + // bool popFlush(); + // bool popIncrement(uint64_t* new_value, uint64_t* cas_value); + // bool popDecrement(uint64_t* new_value, uint64_t* cas_value); + // bool popTouch(); + bool popVersion(std::string* version); + }; + + friend bool sendRequest(CouchbaseOperations::operation_type op_type, + const std::string& key, const std::string& value, + std::string collection_name, + CouchbaseOperations::Result* result, + brpc::Channel* channel, const std::string& server, + const std::string& bucket, CouchbaseRequest* request, + CouchbaseResponse* response); + + // Pipeline management - per instance + std::queue pipeline_operations_queue; + CouchbaseRequest pipeline_request_couchbase_req; + CouchbaseResponse pipeline_response_couchbase_resp; + bool pipeline_active; +}; + +} // namespace brpc \ No newline at end of file diff --git a/src/brpc/global.cpp b/src/brpc/global.cpp index 0196b6d008..82ec32e9e5 100644 --- a/src/brpc/global.cpp +++ b/src/brpc/global.cpp @@ -75,6 +75,7 @@ #include "brpc/policy/ubrpc2pb_protocol.h" #include "brpc/policy/sofa_pbrpc_protocol.h" #include "brpc/policy/memcache_binary_protocol.h" +#include "brpc/policy/couchbase_protocol.h" #include "brpc/policy/streaming_rpc_protocol.h" #include "brpc/policy/mongo_protocol.h" #include "brpc/policy/redis_protocol.h" @@ -518,6 +519,16 @@ static void GlobalInitializeOrDieImpl() { exit(1); } + Protocol couchbase_protocol = { ParseCouchbaseMessage, + SerializeCouchbaseRequest, + PackCouchbaseRequest, + NULL, ProcessCouchbaseResponse, + NULL, NULL, GetCouchbaseMethodName, + CONNECTION_TYPE_ALL, "couchbase" }; + if (RegisterProtocol(PROTOCOL_COUCHBASE, couchbase_protocol) != 0) { + exit(1); + } + Protocol redis_protocol = { ParseRedisMessage, SerializeRedisRequest, PackRedisRequest, diff --git a/src/brpc/options.proto b/src/brpc/options.proto index 34001d7bb8..4ad97aa828 100644 --- a/src/brpc/options.proto +++ b/src/brpc/options.proto @@ -64,6 +64,7 @@ enum ProtocolType { PROTOCOL_CDS_AGENT = 24; // Client side only PROTOCOL_ESP = 25; // Client side only PROTOCOL_H2 = 26; + PROTOCOL_COUCHBASE = 27; } enum CompressType { diff --git a/src/brpc/policy/couchbase_protocol.cpp b/src/brpc/policy/couchbase_protocol.cpp new file mode 100644 index 0000000000..a014581ed5 --- /dev/null +++ b/src/brpc/policy/couchbase_protocol.cpp @@ -0,0 +1,236 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "brpc/policy/couchbase_protocol.h" + +#include +#include // MethodDescriptor +#include // Message + +#include "brpc/compress.h" // ParseFromCompressedData +#include "brpc/controller.h" // Controller +#include "brpc/couchbase.h" // CouchbaseRequest, CouchbaseResponse +#include "brpc/details/controller_private_accessor.h" +#include "brpc/details/server_private_accessor.h" +#include "brpc/policy/most_common_message.h" +#include "brpc/server.h" // Server +#include "brpc/socket.h" // Socket +#include "brpc/span.h" +#include "butil/containers/flat_map.h" +#include "butil/iobuf.h" // butil::IOBuf +#include "butil/logging.h" // LOG() +#include "butil/sys_byteorder.h" +#include "butil/time.h" + +namespace brpc { + +DECLARE_bool(enable_rpcz); + +namespace policy { + +BAIDU_CASSERT(sizeof(CouchbaseRequestHeader) == 24, must_match); +BAIDU_CASSERT(sizeof(CouchbaseResponseHeader) == 24, must_match); + +static uint64_t supported_cmd_map[8]; +static pthread_once_t supported_cmd_map_once = PTHREAD_ONCE_INIT; + +static void InitSupportedCommandMap() { + butil::bit_array_clear(supported_cmd_map, 256); + butil::bit_array_set(supported_cmd_map, CB_BINARY_GET); + butil::bit_array_set(supported_cmd_map, CB_HELLO_SELECT_FEATURES); + butil::bit_array_set(supported_cmd_map, CB_SELECT_BUCKET); + butil::bit_array_set(supported_cmd_map, CB_GET_SCOPE_ID); + butil::bit_array_set(supported_cmd_map, CB_BINARY_SET); + butil::bit_array_set(supported_cmd_map, CB_BINARY_ADD); + butil::bit_array_set(supported_cmd_map, CB_BINARY_REPLACE); + butil::bit_array_set(supported_cmd_map, CB_BINARY_DELETE); + butil::bit_array_set(supported_cmd_map, CB_BINARY_INCREMENT); + butil::bit_array_set(supported_cmd_map, CB_BINARY_DECREMENT); + butil::bit_array_set(supported_cmd_map, CB_BINARY_FLUSH); + butil::bit_array_set(supported_cmd_map, CB_BINARY_VERSION); + butil::bit_array_set(supported_cmd_map, CB_BINARY_NOOP); + butil::bit_array_set(supported_cmd_map, CB_BINARY_APPEND); + butil::bit_array_set(supported_cmd_map, CB_BINARY_PREPEND); + butil::bit_array_set(supported_cmd_map, CB_BINARY_STAT); + butil::bit_array_set(supported_cmd_map, CB_BINARY_TOUCH); + butil::bit_array_set(supported_cmd_map, CB_BINARY_SASL_AUTH); + // Collection management commands + butil::bit_array_set(supported_cmd_map, CB_GET_COLLECTIONS_MANIFEST); + butil::bit_array_set(supported_cmd_map, CB_COLLECTIONS_GET_CID); + butil::bit_array_set(supported_cmd_map, CB_COLLECTIONS_GET_SCOPE_ID); +} + +inline bool IsSupportedCommand(uint8_t command) { + pthread_once(&supported_cmd_map_once, InitSupportedCommandMap); + return butil::bit_array_get(supported_cmd_map, command); +} + +ParseResult ParseCouchbaseMessage(butil::IOBuf* source, Socket* socket, + bool /*read_eof*/, const void* /*arg*/) { + while (1) { + const uint8_t* p_cbmagic = (const uint8_t*)source->fetch1(); + if (NULL == p_cbmagic) { + return MakeParseError(PARSE_ERROR_NOT_ENOUGH_DATA); + } + if (*p_cbmagic != (uint8_t)CB_MAGIC_RESPONSE) { + return MakeParseError(PARSE_ERROR_TRY_OTHERS); + } + char buf[24]; + const uint8_t* p = (const uint8_t*)source->fetch(buf, sizeof(buf)); + if (NULL == p) { + return MakeParseError(PARSE_ERROR_NOT_ENOUGH_DATA); + } + const CouchbaseResponseHeader* header = (const CouchbaseResponseHeader*)p; + uint32_t total_body_length = butil::NetToHost32(header->total_body_length); + if (source->size() < sizeof(*header) + total_body_length) { + return MakeParseError(PARSE_ERROR_NOT_ENOUGH_DATA); + } + + if (!IsSupportedCommand(header->command)) { + LOG(WARNING) << "Not support command=" << header->command; + source->pop_front(sizeof(*header) + total_body_length); + return MakeParseError(PARSE_ERROR_NOT_ENOUGH_DATA); + } + + PipelinedInfo pi; + if (!socket->PopPipelinedInfo(&pi)) { + LOG(WARNING) << "No corresponding PipelinedInfo in socket, drop"; + source->pop_front(sizeof(*header) + total_body_length); + return MakeParseError(PARSE_ERROR_NOT_ENOUGH_DATA); + } + MostCommonMessage* msg = + static_cast(socket->parsing_context()); + if (msg == NULL) { + msg = MostCommonMessage::Get(); + socket->reset_parsing_context(msg); + } + + // endianness conversions. + const CouchbaseResponseHeader local_header = { + header->magic, + header->command, + butil::NetToHost16(header->key_length), + header->extras_length, + header->data_type, + butil::NetToHost16(header->status), + total_body_length, + butil::NetToHost32(header->opaque), + butil::NetToHost64(header->cas_value), + }; + msg->meta.append(&local_header, sizeof(local_header)); + source->pop_front(sizeof(*header)); + source->cutn(&msg->meta, total_body_length); + if (++msg->pi.count >= pi.count) { + CHECK_EQ(msg->pi.count, pi.count); + msg = static_cast(socket->release_parsing_context()); + msg->pi = pi; + return MakeMessage(msg); + } else { + socket->GivebackPipelinedInfo(pi); + } + } +} + +void ProcessCouchbaseResponse(InputMessageBase* msg_base) { + const int64_t start_parse_us = butil::cpuwide_time_us(); + DestroyingPtr msg( + static_cast(msg_base)); + + const bthread_id_t cid = msg->pi.id_wait; + Controller* cntl = NULL; + const int rc = bthread_id_lock(cid, (void**)&cntl); + if (rc != 0) { + LOG_IF(ERROR, rc != EINVAL && rc != EPERM) + << "Fail to lock correlation_id=" << cid << ": " << berror(rc); + return; + } + + ControllerPrivateAccessor accessor(cntl); + Span* span = accessor.span(); + if (span) { + span->set_base_real_us(msg->base_real_us()); + span->set_received_us(msg->received_us()); + span->set_response_size(msg->meta.length()); + span->set_start_parse_us(start_parse_us); + } + const int saved_error = cntl->ErrorCode(); + if (cntl->response() == NULL) { + cntl->SetFailed(ERESPONSE, "response is NULL!"); + } else if (cntl->response()->GetDescriptor() != + CouchbaseOperations::CouchbaseResponse::descriptor()) { + cntl->SetFailed(ERESPONSE, "Must be CouchbaseResponse"); + } else { + // We work around ParseFrom of pb which is just a placeholder. + ((CouchbaseOperations::CouchbaseResponse*)cntl->response())->rawBuffer() = + msg->meta.movable(); + if (msg->pi.count != accessor.pipelined_count()) { + cntl->SetFailed(ERESPONSE, + "pipelined_count=%d of response does " + "not equal request's=%d", + msg->pi.count, accessor.pipelined_count()); + } + } + // Unlocks correlation_id inside. Revert controller's + // error code if it version check of `cid' fails + msg.reset(); // optional, just release resource ASAP + accessor.OnResponse(cid, saved_error); +} + +void SerializeCouchbaseRequest(butil::IOBuf* buf, Controller* cntl, + const google::protobuf::Message* request) { + if (request == NULL) { + return cntl->SetFailed(EREQUEST, "request is NULL"); + } + if (request->GetDescriptor() != + CouchbaseOperations::CouchbaseRequest::descriptor()) { + return cntl->SetFailed(EREQUEST, "Must be CouchbaseRequest"); + } + const CouchbaseOperations::CouchbaseRequest* mr = + (const CouchbaseOperations::CouchbaseRequest*)request; + // We work around SerializeTo of pb which is just a placeholder. + *buf = mr->rawBuffer(); + ControllerPrivateAccessor(cntl).set_pipelined_count(mr->pipelinedCount()); +} + +void PackCouchbaseRequest(butil::IOBuf* buf, SocketMessage**, + uint64_t /*correlation_id*/, + const google::protobuf::MethodDescriptor*, + Controller* cntl, const butil::IOBuf& request, + const Authenticator* auth) { + if (auth) { + std::string auth_str; + if (auth->GenerateCredential(&auth_str) != 0) { + return cntl->SetFailed(EREQUEST, "Fail to generate credential"); + } + if (auth_str.empty()) { + return cntl->SetFailed(EREQUEST, "Empty auth_str"); + } + buf->append(auth_str); + // pipelined_count(); + } else { + buf->append(request); + } +} + +const std::string& GetCouchbaseMethodName( + const google::protobuf::MethodDescriptor*, const Controller*) { + const static std::string CouchbaseD_STR = "Couchbase"; + return CouchbaseD_STR; +} + +} // namespace policy +} // namespace brpc diff --git a/src/brpc/policy/couchbase_protocol.h b/src/brpc/policy/couchbase_protocol.h new file mode 100644 index 0000000000..15367def0b --- /dev/null +++ b/src/brpc/policy/couchbase_protocol.h @@ -0,0 +1,173 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_POLICY_COUCHBASE_BINARY_PROTOCOL_H +#define BRPC_POLICY_COUCHBASE_BINARY_PROTOCOL_H + +#include "brpc/protocol.h" + +namespace brpc { +namespace policy { + +enum CouchbaseMagic { CB_MAGIC_REQUEST = 0x80, CB_MAGIC_RESPONSE = 0x81 }; + +// Definition of the data types in the packet +// https://github.com/couchbase/kv_engine/blob/master/docs/BinaryProtocol.md +enum CouchbaseBinaryDataType { CB_BINARY_RAW_BYTES = 0x00 }; + +enum CouchbaseJsonDataType { CB_JSON = 0x01 }; + +// Definition of the different command opcodes. +// https://github.com/couchbase/kv_engine/blob/master/docs/BinaryProtocol.md +enum CouchbaseBinaryCommand { + CB_HELLO_SELECT_FEATURES = 0x1f, + CB_SELECT_BUCKET = 0x89, + CB_GET_SCOPE_ID = 0xBC, + CB_BINARY_GET = 0x00, + CB_BINARY_SET = 0x01, + CB_BINARY_ADD = 0x02, + CB_BINARY_REPLACE = 0x03, + CB_BINARY_DELETE = 0x04, + CB_BINARY_INCREMENT = 0x05, + CB_BINARY_DECREMENT = 0x06, + CB_BINARY_QUIT = 0x07, + CB_BINARY_FLUSH = 0x08, + CB_BINARY_GETQ = 0x09, + CB_BINARY_NOOP = 0x0a, + CB_BINARY_VERSION = 0x0b, + CB_BINARY_GETK = 0x0c, + CB_BINARY_GETKQ = 0x0d, + CB_BINARY_APPEND = 0x0e, + CB_BINARY_PREPEND = 0x0f, + CB_BINARY_STAT = 0x10, + CB_BINARY_SETQ = 0x11, + CB_BINARY_ADDQ = 0x12, + CB_BINARY_REPLACEQ = 0x13, + CB_BINARY_DELETEQ = 0x14, + CB_BINARY_INCREMENTQ = 0x15, + CB_BINARY_DECREMENTQ = 0x16, + CB_BINARY_QUITQ = 0x17, + CB_BINARY_FLUSHQ = 0x18, + CB_BINARY_APPENDQ = 0x19, + CB_BINARY_PREPENDQ = 0x1a, + CB_BINARY_TOUCH = 0x1c, + CB_BINARY_GAT = 0x1d, + CB_BINARY_GATQ = 0x1e, + CB_BINARY_GATK = 0x23, + CB_BINARY_GATKQ = 0x24, + + CB_BINARY_SASL_LIST_MECHS = 0x20, + CB_BINARY_SASL_AUTH = 0x21, + CB_BINARY_SASL_STEP = 0x22, + + // Collection Management Commands (Couchbase 7.0+) + CB_GET_CLUSTER_CONFIG = 0xb5, + CB_GET_COLLECTIONS_MANIFEST = 0xba, + CB_COLLECTIONS_GET_CID = 0xbb, + CB_COLLECTIONS_GET_SCOPE_ID = 0xbc, + +}; + +struct CouchbaseRequestHeader { + // Magic number identifying the package (See Couchbase Binary + // Protocol#Magic_Byte) + uint8_t magic; + + // Command code (See Couchbase Binary Protocol#Command_opcodes) + uint8_t command; + + // Length in bytes of the text key that follows the command extras + uint16_t key_length; + + // Length in bytes of the command extras + uint8_t extras_length; + + // Reserved for future use (See Couchbase Binary Protocol#Data_Type) + uint8_t data_type; + + // The virtual bucket for this command + uint16_t vbucket_id; + + // Length in bytes of extra + key + value + uint32_t total_body_length; + + // Will be copied back to you in the response + uint32_t opaque; + + // Data version check + uint64_t cas_value; +}; + +struct CouchbaseResponseHeader { + // Magic number identifying the package (See Couchbase Binary + // Protocol#Magic_Byte) + uint8_t magic; + + // Command code (See Couchbase Binary Protocol#Command_opcodes) + uint8_t command; + + // Length in bytes of the text key that follows the command extras + uint16_t key_length; + + // Length in bytes of the command extras + uint8_t extras_length; + + // Reserved for future use (See Couchbase Binary Protocol#Data_Type) + uint8_t data_type; + + // Status of the response (non-zero on error) + uint16_t status; + + // Length in bytes of extra + key + value + uint32_t total_body_length; + + // Will be copied back to you in the response + uint32_t opaque; + + // Data version check + uint64_t cas_value; +}; + +// Parse couchbase messages. +ParseResult ParseCouchbaseMessage(butil::IOBuf* source, Socket* socket, + bool read_eof, const void* arg); + +// Actions to a couchbase response. +void ProcessCouchbaseResponse(InputMessageBase* msg); + +// Serialize a couchbase request. +void SerializeCouchbaseRequest(butil::IOBuf* buf, Controller* cntl, + const google::protobuf::Message* request); + +// Pack `request' to `method' into `buf'. +void PackCouchbaseRequest(butil::IOBuf* buf, SocketMessage**, + uint64_t correlation_id, + const google::protobuf::MethodDescriptor* method, + Controller* controller, const butil::IOBuf& request, + const Authenticator* auth); + +// process couchbase request. +// since, there is no server side instance running, this function is not +// implemented. void ProcessCouchbaseRequest(InputMessageBase* msg); + +const std::string& GetCouchbaseMethodName( + const google::protobuf::MethodDescriptor*, const Controller*); + +} // namespace policy +} // namespace brpc + +#endif // BRPC_POLICY_COUCHBASE_BINARY_PROTOCOL_H diff --git a/src/brpc/proto_base.proto b/src/brpc/proto_base.proto index b278ddb6bf..3fcdda09fa 100644 --- a/src/brpc/proto_base.proto +++ b/src/brpc/proto_base.proto @@ -25,6 +25,9 @@ message RedisResponseBase {} message EspMessageBase {} +message CouchbaseRequestBase {} +message CouchbaseResponseBase {} + message MemcacheRequestBase {} message MemcacheResponseBase {} diff --git a/test/brpc_couchbase_unittest.cpp b/test/brpc_couchbase_unittest.cpp new file mode 100644 index 0000000000..dacc9a09ce --- /dev/null +++ b/test/brpc_couchbase_unittest.cpp @@ -0,0 +1,85 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +namespace brpc { +DECLARE_int32(idle_timeout_second); +} + +int main(int argc, char* argv[]) { + brpc::FLAGS_idle_timeout_second = 0; + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + +namespace { + +// Unit Tests - No Server Required +class CouchbaseUnitTest : public testing::Test {}; + +TEST_F(CouchbaseUnitTest, RequestBuilders) { + brpc::CouchbaseOperations::CouchbaseRequest req; + req.Clear(); + req.helloRequest(); + req.authenticateRequest("user", "pass"); + req.selectBucketRequest("bucket"); + req.addRequest("key", "value", 0, 0, 0); + req.getRequest("key"); + req.upsertRequest("key", "value", 0, 0, 0); + req.deleteRequest("key"); + EXPECT_TRUE(true); +} + +TEST_F(CouchbaseUnitTest, ResultStruct) { + brpc::CouchbaseOperations::Result result; + + result.success = true; + result.error_message = "Test"; + result.value = R"({"test": "data"})"; + result.status_code = 0x01; + + EXPECT_TRUE(result.success); + EXPECT_EQ("Test", result.error_message); + EXPECT_EQ(R"({"test": "data"})", result.value); + EXPECT_EQ(0x01, result.status_code); + + result.success = false; + result.error_message = ""; + result.value = ""; + result.status_code = 0x00; + + EXPECT_FALSE(result.success); + EXPECT_TRUE(result.error_message.empty()); + EXPECT_TRUE(result.value.empty()); + EXPECT_EQ(0x00, result.status_code); +} + +TEST_F(CouchbaseUnitTest, EdgeCases) { + brpc::CouchbaseOperations::CouchbaseRequest req; + req.addRequest("", "value", 0, 0, 0); + req.addRequest("key", "", 0, 0, 0); + req.addRequest(std::string(1000, 'x'), "val", 0, 0, 0); + req.addRequest("key", std::string(10000, 'x'), 0, 0, 0); + req.addRequest("test::special::!!!","val", 0, 0, 0); + req.addRequest("key", R"({"unicode":"123"})", 0, 0, 0); + EXPECT_TRUE(true); +} + +} // namespace \ No newline at end of file From 227639903220437fbe1efa8ab760616d75d0e174 Mon Sep 17 00:00:00 2001 From: Bright Chen Date: Sat, 29 Nov 2025 23:01:14 +0800 Subject: [PATCH 02/84] Support dynamic expansion of RDMA block pool (#3155) --- src/brpc/rdma/block_pool.cpp | 151 +++++++++++++++++---------------- src/brpc/rdma/block_pool.h | 3 +- src/butil/memory/scope_guard.h | 2 + 3 files changed, 81 insertions(+), 75 deletions(-) diff --git a/src/brpc/rdma/block_pool.cpp b/src/brpc/rdma/block_pool.cpp index 826fc5d181..24907a194a 100644 --- a/src/brpc/rdma/block_pool.cpp +++ b/src/brpc/rdma/block_pool.cpp @@ -25,10 +25,9 @@ #include "butil/iobuf.h" #include "butil/object_pool.h" #include "butil/thread_local.h" -#include "bthread/bthread.h" +#include "butil/memory/scope_guard.h" #include "brpc/rdma/block_pool.h" - namespace brpc { namespace rdma { @@ -98,6 +97,8 @@ struct GlobalInfo { std::vector idle_size[BLOCK_SIZE_COUNT]; int region_num[BLOCK_SIZE_COUNT]; butil::Mutex extend_lock; + std::vector expansion_list[BLOCK_SIZE_COUNT]; + std::vector expansion_size[BLOCK_SIZE_COUNT]; }; static GlobalInfo* g_info = NULL; @@ -129,36 +130,20 @@ uint32_t GetRegionId(const void* buf) { return r->id; } -// When both rdma_memory_pool_max_regions and rdma_memory_pool_buckets are -// greater than 1, dynamic memory expansion may cause concurrent modification -// issues in the memory linked list due to lock contention problems. To address -// this, we increase the region_num count for each block_type. Dynamic memory -// expansion is only permitted when both of the following conditions are met: -// rdma_memory_pool_buckets equals 1 -// g_info->region_num[block_type] is less than 1 -static bool CanExtendBlockRuntime(int block_type) { - return FLAGS_rdma_memory_pool_buckets == 1 || - g_info->region_num[block_type] < 1; -} - -static void* ExtendBlockPoolImpl(void* region_base, size_t region_size, - int block_type) { - if (CanExtendBlockRuntime(block_type) == false) { - LOG(INFO) << "Runtime extend memory only support one bucket or region " - "num is zero for per block_type"; +static void* ExtendBlockPoolImpl(void* region_base, size_t region_size, int block_type) { + auto region_base_guard = butil::MakeScopeGuard([region_base]() { free(region_base); - errno = ENOMEM; - return NULL; - } + }); + if (g_region_num == FLAGS_rdma_memory_pool_max_regions) { - LOG(INFO) << "Memory pool reaches max regions"; - free(region_base); + LOG_EVERY_SECOND(ERROR) << "Memory pool reaches max regions"; errno = ENOMEM; return NULL; } + uint32_t id = g_cb(region_base, region_size); if (id == 0) { - free(region_base); + errno = EINVAL; return NULL; } @@ -170,7 +155,7 @@ static void* ExtendBlockPoolImpl(void* region_base, size_t region_size, for (size_t j = 0; j < i; ++j) { butil::return_object(node[j]); } - free(region_base); + errno = ENOMEM; return NULL; } } @@ -184,12 +169,15 @@ static void* ExtendBlockPoolImpl(void* region_base, size_t region_size, for (size_t i = 0; i < g_buckets; ++i) { node[i]->start = (void*)(region->start + i * (region_size / g_buckets)); node[i]->len = region_size / g_buckets; - node[i]->next = g_info->idle_list[block_type][i]; - g_info->idle_list[block_type][i] = node[i]; - g_info->idle_size[block_type][i] += node[i]->len; + node[i]->next = g_info->expansion_list[block_type][i]; + g_info->expansion_list[block_type][i] = node[i]; + g_info->expansion_size[block_type][i] += node[i]->len; } g_info->region_num[block_type]++; + // `region_base' is inuse, cannot be freed. + region_base_guard.dismiss(); + return region_base; } @@ -203,7 +191,7 @@ static void* ExtendBlockPool(size_t region_size, int block_type) { if (FLAGS_rdma_memory_pool_user_specified_memory) { LOG_EVERY_SECOND(ERROR) << "Fail to extend new region, " "rdma_memory_pool_user_specified_memory is " - "true, ExtendBlockPool is disabled"; + "true, ExtendBlockPool is disabled"; return NULL; } @@ -222,24 +210,27 @@ static void* ExtendBlockPool(size_t region_size, int block_type) { return ExtendBlockPoolImpl(region_base, region_size, block_type); } -void* ExtendBlockPoolByUser(void* region_base, size_t region_size, - int block_type) { - if (FLAGS_rdma_memory_pool_user_specified_memory == false) { +void* ExtendBlockPoolByUser(void* region_base, size_t region_size, int block_type) { + auto region_base_guard = butil::MakeScopeGuard([region_base]() { + free(region_base); + }); + + if (!FLAGS_rdma_memory_pool_user_specified_memory) { LOG_EVERY_SECOND(ERROR) << "User extend memory is disabled"; return NULL; } if (reinterpret_cast(region_base) % 4096 != 0) { LOG_EVERY_SECOND(ERROR) << "region_base must be 4096 aligned"; + errno = EINVAL; return NULL; } - uint64_t index = butil::fast_rand() % g_buckets; - BAIDU_SCOPED_LOCK(*g_info->lock[block_type][index]); - BAIDU_SCOPED_LOCK(g_info->extend_lock); region_size = region_size * BYTES_IN_MB / g_block_size[block_type] / g_buckets; region_size *= g_block_size[block_type] * g_buckets; + region_base_guard.dismiss(); + BAIDU_SCOPED_LOCK(g_info->extend_lock); return ExtendBlockPoolImpl(region_base, region_size, block_type); } @@ -316,6 +307,14 @@ bool InitBlockPool(RegisterCallback cb) { return false; } } + g_info->expansion_list[i].resize(g_buckets, NULL); + if (g_info->expansion_list[i].size() != g_buckets) { + return false; + } + g_info->expansion_size[i].resize(g_buckets, 0); + if (g_info->expansion_size[i].size() != g_buckets) { + return false; + } } g_dump_mutex = new butil::Mutex; @@ -332,66 +331,74 @@ bool InitBlockPool(RegisterCallback cb) { return false; } +static void MoveExpansionList2EmptyIdleList(int block_type, size_t index) { + CHECK(NULL == g_info->idle_list[block_type][index]); + + g_info->idle_list[block_type][index] = g_info->expansion_list[block_type][index]; + g_info->idle_size[block_type][index] += g_info->expansion_size[block_type][index]; + g_info->expansion_list[block_type][index] = NULL; + g_info->expansion_size[block_type][index] = 0; +} + static void* AllocBlockFrom(int block_type) { bool locked = false; if (BAIDU_UNLIKELY(g_dump_enable)) { g_dump_mutex->lock(); locked = true; } + BUTIL_SCOPE_EXIT { + if (locked) { + g_dump_mutex->unlock(); + } + }; + void* ptr = NULL; - if (block_type == 0 && tls_idle_list != NULL){ + if (0 == block_type && NULL != tls_idle_list) { CHECK(tls_idle_num > 0); IdleNode* n = tls_idle_list; tls_idle_list = n->next; ptr = n->start; butil::return_object(n); tls_idle_num--; - if (locked) { - g_dump_mutex->unlock(); - } return ptr; } - uint64_t index = butil::fast_rand() % g_buckets; + size_t index = butil::fast_rand() % g_buckets; BAIDU_SCOPED_LOCK(*g_info->lock[block_type][index]); IdleNode* node = g_info->idle_list[block_type][index]; - if (!node) { + if (NULL == node) { BAIDU_SCOPED_LOCK(g_info->extend_lock); node = g_info->idle_list[block_type][index]; - if (!node) { - // There is no block left, extend a new region - if (!ExtendBlockPool(FLAGS_rdma_memory_pool_increase_size_mb, - block_type)) { + if (NULL == node && NULL != g_info->expansion_list[block_type][index]) { + MoveExpansionList2EmptyIdleList(block_type, index); + node = g_info->idle_list[block_type][index]; + } + if (NULL == node) { + // There is no block left, extend a new region. + if (!ExtendBlockPool(FLAGS_rdma_memory_pool_increase_size_mb, block_type)) { LOG_EVERY_SECOND(ERROR) << "Fail to extend new region. " << "You can set the size of memory pool larger. " << "Refer to the help message of these flags: " << "rdma_memory_pool_initial_size_mb, " << "rdma_memory_pool_increase_size_mb, " << "rdma_memory_pool_max_regions."; - if (locked) { - g_dump_mutex->unlock(); - } return NULL; } + MoveExpansionList2EmptyIdleList(block_type, index); node = g_info->idle_list[block_type][index]; } } - if (node) { - ptr = node->start; - if (node->len > g_block_size[block_type]) { - node->start = (char*)node->start + g_block_size[block_type]; - node->len -= g_block_size[block_type]; - } else { - g_info->idle_list[block_type][index] = node->next; - butil::return_object(node); - } - g_info->idle_size[block_type][index] -= g_block_size[block_type]; + CHECK(NULL != node); + + ptr = node->start; + if (node->len > g_block_size[block_type]) { + node->start = (char*)node->start + g_block_size[block_type]; + node->len -= g_block_size[block_type]; } else { - if (locked) { - g_dump_mutex->unlock(); - } - return NULL; + g_info->idle_list[block_type][index] = node->next; + butil::return_object(node); } + g_info->idle_size[block_type][index] -= g_block_size[block_type]; // Move more blocks from global list to tls list if (block_type == 0) { @@ -417,9 +424,6 @@ static void* AllocBlockFrom(int block_type) { } } - if (locked) { - g_dump_mutex->unlock(); - } return ptr; } @@ -482,6 +486,12 @@ int DeallocBlock(void* buf) { g_dump_mutex->lock(); locked = true; } + BUTIL_SCOPE_EXIT { + if (locked) { + g_dump_mutex->unlock(); + } + }; + if (block_type == 0 && tls_idle_num < (uint32_t)FLAGS_rdma_memory_pool_tls_cache_num) { if (!tls_inited) { tls_inited = true; @@ -494,9 +504,6 @@ int DeallocBlock(void* buf) { tls_idle_num++; node->next = tls_idle_list; tls_idle_list = node; - if (locked) { - g_dump_mutex->unlock(); - } return 0; } @@ -527,9 +534,6 @@ int DeallocBlock(void* buf) { g_info->idle_list[block_type][index] = node; g_info->idle_size[block_type][index] += node->len; } - if (locked) { - g_dump_mutex->unlock(); - } return 0; } @@ -557,7 +561,8 @@ void DumpMemoryPoolInfo(std::ostream& os) { for (int i = 0; i < BLOCK_SIZE_COUNT; ++i) { os << "\tFor block size " << GetBlockSize(i) << ":\n"; for (size_t j = 0; j < g_buckets; ++j) { - os << "\t\tBucket " << j << ": " << g_info->idle_size[i][j] << "\n"; + os << "\t\tBucket " << j << ": {" << g_info->idle_size[i][j] + << ", " << g_info->expansion_size[i][j] << "}\n"; } } os << "Thread Local Cache Info:\n"; diff --git a/src/brpc/rdma/block_pool.h b/src/brpc/rdma/block_pool.h index 00a310824f..f9018e5ecc 100644 --- a/src/brpc/rdma/block_pool.h +++ b/src/brpc/rdma/block_pool.h @@ -80,8 +80,7 @@ bool InitBlockPool(RegisterCallback cb); // FLAGS_rdma_memory_pool_user_specified_memory is true, user is responsibility // of extending memory blocks , this ensuring flexibility for advanced use // cases. -void* ExtendBlockPoolByUser(void* region_base, size_t region_size, - int block_type); +void* ExtendBlockPoolByUser(void* region_base, size_t region_size, int block_type); // Allocate a buf with length at least @a size (require: size>0) // Return the address allocated, NULL if failed and errno is set. diff --git a/src/butil/memory/scope_guard.h b/src/butil/memory/scope_guard.h index 7d72a560d2..377819b5db 100644 --- a/src/butil/memory/scope_guard.h +++ b/src/butil/memory/scope_guard.h @@ -104,4 +104,6 @@ operator+(ScopeExitHelper, Callback&& callback) { auto BRPC_ANONYMOUS_VARIABLE(SCOPE_EXIT) = \ ::butil::internal::ScopeExitHelper() + [&]() noexcept +#define BUTIL_SCOPE_EXIT BRPC_SCOPE_EXIT + #endif // BUTIL_SCOPED_GUARD_H From 71a3b47a29eca9d3c1dbf4c9a5ee1195393b6b25 Mon Sep 17 00:00:00 2001 From: Regal <141622927+ZhengweiZhu@users.noreply.github.com> Date: Tue, 2 Dec 2025 14:09:32 +0800 Subject: [PATCH 03/84] Support naming bthread to help debug (#3158) The bthread name is shown when checking bthread status by curl ip:port/bthreads/xxx, which helps to debug when bthread trace is not enabled. --- src/brpc/acceptor.cpp | 1 + src/brpc/controller.cpp | 1 + src/brpc/event_dispatcher_epoll.cpp | 1 + src/brpc/event_dispatcher_kqueue.cpp | 1 + src/brpc/global.cpp | 4 +++- src/brpc/input_messenger.cpp | 17 +++++++++-------- src/brpc/periodic_task.cpp | 4 +++- src/brpc/rdma/rdma_endpoint.cpp | 9 +++++++-- src/brpc/server.cpp | 1 + src/brpc/socket.cpp | 13 ++++++++++--- src/brpc/socket_map.cpp | 4 +++- src/bthread/bthread.cpp | 7 +++++++ src/bthread/fd.cpp | 4 +++- src/bthread/task_group.cpp | 3 ++- src/bthread/types.h | 14 +++++++++----- 15 files changed, 61 insertions(+), 23 deletions(-) diff --git a/src/brpc/acceptor.cpp b/src/brpc/acceptor.cpp index 616c1a3044..fd6564c987 100644 --- a/src/brpc/acceptor.cpp +++ b/src/brpc/acceptor.cpp @@ -78,6 +78,7 @@ int Acceptor::StartAccept(int listened_fd, int idle_timeout_sec, if (idle_timeout_sec > 0) { bthread_attr_t tmp = BTHREAD_ATTR_NORMAL; tmp.tag = _bthread_tag; + bthread_attr_set_name(&tmp, "CloseIdleConnections"); if (bthread_start_background(&_close_idle_tid, &tmp, CloseIdleConnections, this) != 0) { LOG(FATAL) << "Fail to start bthread"; return -1; diff --git a/src/brpc/controller.cpp b/src/brpc/controller.cpp index d4dbab951b..b30a13476e 100644 --- a/src/brpc/controller.cpp +++ b/src/brpc/controller.cpp @@ -743,6 +743,7 @@ void Controller::OnVersionedRPCReturned(const CompletionInfo& info, bthread_t bt; bthread_attr_t attr = (FLAGS_usercode_in_pthread ? BTHREAD_ATTR_PTHREAD : BTHREAD_ATTR_NORMAL); + bthread_attr_set_name(&attr, "RunEndRPC"); _tmp_completion_info = info; if (bthread_start_background(&bt, &attr, RunEndRPC, this) != 0) { LOG(FATAL) << "Fail to start bthread"; diff --git a/src/brpc/event_dispatcher_epoll.cpp b/src/brpc/event_dispatcher_epoll.cpp index 5a4a6370f7..5a6c23b0e5 100644 --- a/src/brpc/event_dispatcher_epoll.cpp +++ b/src/brpc/event_dispatcher_epoll.cpp @@ -78,6 +78,7 @@ int EventDispatcher::Start(const bthread_attr_t* thread_attr) { // Only event dispatcher thread has flag BTHREAD_GLOBAL_PRIORITY. bthread_attr_t epoll_thread_attr = _thread_attr | BTHREAD_NEVER_QUIT | BTHREAD_GLOBAL_PRIORITY; + bthread_attr_set_name(&epoll_thread_attr, "EventDispatcher::RunThis"); // Polling thread uses the same attr for consumer threads (NORMAL right // now). Previously, we used small stack (32KB) which may be overflowed diff --git a/src/brpc/event_dispatcher_kqueue.cpp b/src/brpc/event_dispatcher_kqueue.cpp index 48b2814798..f73e62004f 100644 --- a/src/brpc/event_dispatcher_kqueue.cpp +++ b/src/brpc/event_dispatcher_kqueue.cpp @@ -78,6 +78,7 @@ int EventDispatcher::Start(const bthread_attr_t* thread_attr) { // Only event dispatcher thread has flag BTHREAD_GLOBAL_PRIORITY. bthread_attr_t kqueue_thread_attr = _thread_attr | BTHREAD_NEVER_QUIT | BTHREAD_GLOBAL_PRIORITY; + bthread_attr_set_name(&kqueue_thread_attr, "EventDispatcher::RunThis"); // Polling thread uses the same attr for consumer threads (NORMAL right // now). Previously, we used small stack (32KB) which may be overflowed diff --git a/src/brpc/global.cpp b/src/brpc/global.cpp index 82ec32e9e5..c561d927d7 100644 --- a/src/brpc/global.cpp +++ b/src/brpc/global.cpp @@ -646,7 +646,9 @@ static void GlobalInitializeOrDieImpl() { // We never join GlobalUpdate, let it quit with the process. bthread_t th; - CHECK(bthread_start_background(&th, NULL, GlobalUpdate, NULL) == 0) + bthread_attr_t attr = BTHREAD_ATTR_NORMAL; + bthread_attr_set_name(&attr, "GlobalUpdate"); + CHECK(bthread_start_background(&th, &attr, GlobalUpdate, NULL) == 0) << "Fail to start GlobalUpdate"; } diff --git a/src/brpc/input_messenger.cpp b/src/brpc/input_messenger.cpp index 45ececbc66..1b8a86f2c6 100644 --- a/src/brpc/input_messenger.cpp +++ b/src/brpc/input_messenger.cpp @@ -197,6 +197,13 @@ static void QueueMessage(InputMessageBase* to_run_msg, if (!to_run_msg) { return; } + +#if BRPC_WITH_RDMA + if (rdma::FLAGS_rdma_disable_bthread) { + ProcessInputMessage(to_run_msg); + return; + } +#endif // Create bthread for last_msg. The bthread is not scheduled // until bthread_flush() is called (in the worse case). @@ -207,14 +214,8 @@ static void QueueMessage(InputMessageBase* to_run_msg, BTHREAD_ATTR_NORMAL) | BTHREAD_NOSIGNAL; tmp.keytable_pool = keytable_pool; tmp.tag = bthread_self_tag(); - -#if BRPC_WITH_RDMA - if (rdma::FLAGS_rdma_disable_bthread) { - ProcessInputMessage(to_run_msg); - return; - } -#endif - + bthread_attr_set_name(&tmp, "ProcessInputMessage"); + if (!FLAGS_usercode_in_coroutine && bthread_start_background( &th, &tmp, ProcessInputMessage, to_run_msg) == 0) { ++*num_bthread_created; diff --git a/src/brpc/periodic_task.cpp b/src/brpc/periodic_task.cpp index 27ea3ec310..3ba9c0ec59 100644 --- a/src/brpc/periodic_task.cpp +++ b/src/brpc/periodic_task.cpp @@ -38,8 +38,10 @@ static void* PeriodicTaskThread(void* arg) { static void RunPeriodicTaskThread(void* arg) { bthread_t th = 0; + bthread_attr_t attr = BTHREAD_ATTR_NORMAL; + bthread_attr_set_name(&attr, "PeriodicTaskThread"); int rc = bthread_start_background( - &th, &BTHREAD_ATTR_NORMAL, PeriodicTaskThread, arg); + &th, &attr, PeriodicTaskThread, arg); if (rc != 0) { LOG(ERROR) << "Fail to start PeriodicTaskThread"; static_cast(arg)->OnDestroyingTask(); diff --git a/src/brpc/rdma/rdma_endpoint.cpp b/src/brpc/rdma/rdma_endpoint.cpp index 1d502a98f7..5176756510 100644 --- a/src/brpc/rdma/rdma_endpoint.cpp +++ b/src/brpc/rdma/rdma_endpoint.cpp @@ -250,7 +250,9 @@ void RdmaConnect::StartConnect(const Socket* socket, _done = done; _data = data; bthread_t tid; - if (bthread_start_background(&tid, &BTHREAD_ATTR_NORMAL, + bthread_attr_t attr = BTHREAD_ATTR_NORMAL; + bthread_attr_set_name(&attr, "RdmaProcessHandshakeAtClient"); + if (bthread_start_background(&tid, &attr, RdmaEndpoint::ProcessHandshakeAtClient, socket->_rdma_ep) < 0) { LOG(FATAL) << "Fail to start handshake bthread"; } else { @@ -309,7 +311,9 @@ void RdmaEndpoint::OnNewDataFromTcp(Socket* m) { ep->_state = S_HELLO_WAIT; SocketUniquePtr s; m->ReAddress(&s); - if (bthread_start_background(&tid, &BTHREAD_ATTR_NORMAL, + bthread_attr_t attr = BTHREAD_ATTR_NORMAL; + bthread_attr_set_name(&attr, "RdmaProcessHandshakeAtServer"); + if (bthread_start_background(&tid, &attr, ProcessHandshakeAtServer, ep) < 0) { ep->_state = UNINIT; LOG(FATAL) << "Fail to start handshake bthread"; @@ -1616,6 +1620,7 @@ int RdmaEndpoint::PollingModeInitialize(bthread_tag_t tag, auto attr = FLAGS_rdma_disable_bthread ? BTHREAD_ATTR_PTHREAD : BTHREAD_ATTR_NORMAL; attr.tag = tag; + bthread_attr_set_name(&attr, "RdmaPolling"); pollers[i].callback = callback; pollers[i].init_fn = init_fn; pollers[i].release_fn = release_fn; diff --git a/src/brpc/server.cpp b/src/brpc/server.cpp index cd83053a42..a82817be8b 100644 --- a/src/brpc/server.cpp +++ b/src/brpc/server.cpp @@ -1236,6 +1236,7 @@ int Server::StartInternal(const butil::EndPoint& endpoint, CHECK_EQ(INVALID_BTHREAD, _derivative_thread); bthread_attr_t tmp = BTHREAD_ATTR_NORMAL; tmp.tag = _options.bthread_tag; + bthread_attr_set_name(&tmp, "UpdateDerivedVars"); if (bthread_start_background(&_derivative_thread, &tmp, UpdateDerivedVars, this) != 0) { LOG(ERROR) << "Fail to create _derivative_thread"; diff --git a/src/brpc/socket.cpp b/src/brpc/socket.cpp index 73ea309a71..ec5300987c 100644 --- a/src/brpc/socket.cpp +++ b/src/brpc/socket.cpp @@ -1491,8 +1491,10 @@ void Socket::AfterAppConnected(int err, void* data) { // requests are not setup yet. check the comment on Setup() in Write() req->Setup(s); bthread_t th; + bthread_attr_t attr = BTHREAD_ATTR_NORMAL; + bthread_attr_set_name(&attr, "KeepWrite"); if (bthread_start_background( - &th, &BTHREAD_ATTR_NORMAL, KeepWrite, req) != 0) { + &th, &attr, KeepWrite, req) != 0) { PLOG(WARNING) << "Fail to start KeepWrite"; KeepWrite(req); } @@ -1532,7 +1534,9 @@ int Socket::KeepWriteIfConnected(int fd, int err, void* data) { bthread_t th; std::unique_ptr thrd_func(brpc::NewCallback( Socket::CheckConnectedAndKeepWrite, fd, err, data)); - if ((err = bthread_start_background(&th, &BTHREAD_ATTR_NORMAL, + bthread_attr_t attr = BTHREAD_ATTR_NORMAL; + bthread_attr_set_name(&attr, "CheckConnectedAndKeepWrite"); + if ((err = bthread_start_background(&th, &attr, RunClosure, thrd_func.get())) == 0) { thrd_func.release(); return 0; @@ -1705,6 +1709,8 @@ int Socket::StartWrite(WriteRequest* req, const WriteOptions& opt) { int saved_errno = 0; bthread_t th; + bthread_attr_t attr = BTHREAD_ATTR_NORMAL; + bthread_attr_set_name(&attr, "KeepWrite"); SocketUniquePtr ptr_for_keep_write; ssize_t nw = 0; int ret = 0; @@ -1779,7 +1785,7 @@ int Socket::StartWrite(WriteRequest* req, const WriteOptions& opt) { KEEPWRITE_IN_BACKGROUND: ReAddress(&ptr_for_keep_write); req->set_socket(ptr_for_keep_write.release()); - if (bthread_start_background(&th, &BTHREAD_ATTR_NORMAL, + if (bthread_start_background(&th, &attr, KeepWrite, req) != 0) { LOG(FATAL) << "Fail to start KeepWrite"; KeepWrite(req); @@ -2266,6 +2272,7 @@ int Socket::OnInputEvent(void* user_data, uint32_t events, bthread_attr_t attr = thread_attr; attr.keytable_pool = p->_keytable_pool; attr.tag = bthread_self_tag(); + bthread_attr_set_name(&attr, "ProcessEvent"); if (FLAGS_usercode_in_coroutine) { ProcessEvent(p); #if BRPC_WITH_RDMA diff --git a/src/brpc/socket_map.cpp b/src/brpc/socket_map.cpp index c5c94bc747..14bea71db5 100644 --- a/src/brpc/socket_map.cpp +++ b/src/brpc/socket_map.cpp @@ -190,7 +190,9 @@ int SocketMap::Init(const SocketMapOptions& options) { } if (_options.idle_timeout_second_dynamic != NULL || _options.idle_timeout_second > 0) { - if (bthread_start_background(&_close_idle_thread, NULL, + bthread_attr_t attr = BTHREAD_ATTR_NORMAL; + bthread_attr_set_name(&attr, "RunWatchConnections"); + if (bthread_start_background(&_close_idle_thread, &attr, RunWatchConnections, this) != 0) { LOG(FATAL) << "Fail to start bthread"; return -1; diff --git a/src/bthread/bthread.cpp b/src/bthread/bthread.cpp index 085d814df6..ac49f269d9 100644 --- a/src/bthread/bthread.cpp +++ b/src/bthread/bthread.cpp @@ -669,3 +669,10 @@ uint64_t bthread_cpu_clock_ns(void) { } } // extern "C" + +void bthread_attr_set_name(bthread_attr_t* attr, const char* name) { + if (attr) { + strncpy(attr->name, name, BTHREAD_NAME_MAX_LENGTH); + attr->name[BTHREAD_NAME_MAX_LENGTH] = '\0'; + } +} diff --git a/src/bthread/fd.cpp b/src/bthread/fd.cpp index b65dca4838..17ca63dcfe 100644 --- a/src/bthread/fd.cpp +++ b/src/bthread/fd.cpp @@ -141,8 +141,10 @@ class EpollThread { PLOG(FATAL) << "Fail to epoll_create/kqueue"; return -1; } + bthread_attr_t attr = BTHREAD_ATTR_NORMAL; + bthread_attr_set_name(&attr, "EpollThread::run_this"); if (bthread_start_background( - &_tid, NULL, EpollThread::run_this, this) != 0) { + &_tid, &attr, EpollThread::run_this, this) != 0) { close(_epfd); _epfd = -1; LOG(FATAL) << "Fail to create epoll bthread"; diff --git a/src/bthread/task_group.cpp b/src/bthread/task_group.cpp index 40daaa1ace..c577b64b16 100644 --- a/src/bthread/task_group.cpp +++ b/src/bthread/task_group.cpp @@ -49,7 +49,7 @@ namespace bthread { static const bthread_attr_t BTHREAD_ATTR_TASKGROUP = { - BTHREAD_STACKTYPE_UNKNOWN, 0, NULL, BTHREAD_TAG_INVALID }; + BTHREAD_STACKTYPE_UNKNOWN, 0, NULL, BTHREAD_TAG_INVALID, {0} }; DEFINE_bool(show_bthread_creation_in_vars, false, "When this flags is on, The time " "from bthread creation to first run will be recorded and shown in /vars"); @@ -1141,6 +1141,7 @@ void print_task(std::ostream& os, bthread_t tid, bool enable_trace, << "\nattr={stack_type=" << attr.stack_type << " flags=" << attr.flags << " specified_tag=" << attr.tag + << " name=" << attr.name << " keytable_pool=" << attr.keytable_pool << "}\nhas_tls=" << has_tls << "\nuptime_ns=" << butil::cpuwide_time_ns() - cpuwide_start_ns diff --git a/src/bthread/types.h b/src/bthread/types.h index a09c2e3817..86148c938b 100644 --- a/src/bthread/types.h +++ b/src/bthread/types.h @@ -97,12 +97,14 @@ typedef struct { size_t nfree; } bthread_keytable_pool_stat_t; +static const size_t BTHREAD_NAME_MAX_LENGTH = 31; // Attributes for thread creation. typedef struct bthread_attr_t { bthread_stacktype_t stack_type; bthread_attrflags_t flags; bthread_keytable_pool_t* keytable_pool; bthread_tag_t tag; + char name[BTHREAD_NAME_MAX_LENGTH + 1]; // do not use std::string to keep POD #if defined(__cplusplus) void operator=(unsigned stacktype_and_flags) { @@ -120,6 +122,8 @@ typedef struct bthread_attr_t { #endif // __cplusplus } bthread_attr_t; +void bthread_attr_set_name(bthread_attr_t* attr, const char* name); + // bthreads started with this attribute will run on stack of worker pthread and // all bthread functions that would block the bthread will block the pthread. // The bthread will not allocate its own stack, simply occupying a little meta @@ -127,22 +131,22 @@ typedef struct bthread_attr_t { // obvious drawback is that you need more worker pthreads when you have a lot // of such bthreads. static const bthread_attr_t BTHREAD_ATTR_PTHREAD = -{ BTHREAD_STACKTYPE_PTHREAD, 0, NULL, BTHREAD_TAG_INVALID }; +{ BTHREAD_STACKTYPE_PTHREAD, 0, NULL, BTHREAD_TAG_INVALID, {0} }; // bthreads created with following attributes will have different size of // stacks. Default is BTHREAD_ATTR_NORMAL. static const bthread_attr_t BTHREAD_ATTR_SMALL = {BTHREAD_STACKTYPE_SMALL, 0, NULL, - BTHREAD_TAG_INVALID}; + BTHREAD_TAG_INVALID, {0}}; static const bthread_attr_t BTHREAD_ATTR_NORMAL = {BTHREAD_STACKTYPE_NORMAL, 0, NULL, - BTHREAD_TAG_INVALID}; + BTHREAD_TAG_INVALID, {0}}; static const bthread_attr_t BTHREAD_ATTR_LARGE = {BTHREAD_STACKTYPE_LARGE, 0, NULL, - BTHREAD_TAG_INVALID}; + BTHREAD_TAG_INVALID, {0}}; // bthreads created with this attribute will print log when it's started, // context-switched, finished. static const bthread_attr_t BTHREAD_ATTR_DEBUG = { BTHREAD_STACKTYPE_NORMAL, BTHREAD_LOG_START_AND_FINISH | BTHREAD_LOG_CONTEXT_SWITCH, NULL, - BTHREAD_TAG_INVALID}; + BTHREAD_TAG_INVALID, {0}}; static const size_t BTHREAD_EPOLL_THREAD_NUM = 1; static const bthread_t BTHREAD_ATOMIC_INIT = 0; From 1b4b29c89b360a8a803a71d868a32f25d9e1f1e9 Mon Sep 17 00:00:00 2001 From: Jay Date: Sun, 7 Dec 2025 22:27:06 +0800 Subject: [PATCH 04/84] butil: fix undefined behaviors (#3135) There are two kinds of problems: 1. signed number overflow is undefined behavior; 2. vsnprintfT may return E2BIG instead of EOVERFLOW. --- src/butil/fast_rand.cpp | 24 +++++++++++---- src/butil/numerics/safe_conversions.h | 25 ++++++++++++++++ .../strings/string_number_conversions.cc | 29 ++----------------- src/butil/strings/stringprintf.cc | 2 +- 4 files changed, 46 insertions(+), 34 deletions(-) diff --git a/src/butil/fast_rand.cpp b/src/butil/fast_rand.cpp index 36e0e83105..cef4585428 100644 --- a/src/butil/fast_rand.cpp +++ b/src/butil/fast_rand.cpp @@ -23,6 +23,7 @@ #include "butil/macros.h" #include "butil/time.h" // gettimeofday_us() #include "butil/fast_rand.h" +#include "butil/numerics/safe_conversions.h" // safe_abs namespace butil { @@ -110,20 +111,31 @@ int64_t fast_rand_in_64(int64_t min, int64_t max) { if (need_init(_tls_seed)) { init_fast_rand_seed(&_tls_seed); } - if (min >= max) { + if (BAIDU_UNLIKELY(min >= max)) { if (min == max) { return min; } - const int64_t tmp = min; - min = max; - max = tmp; + std::swap(min, max); + } + uint64_t range; + if (min >= 0) { + // Always safe to do subtraction. + range = (uint64_t)(max - min) + 1; + return min + (int64_t)fast_rand_impl(range, &_tls_seed); + } + + uint64_t abs_min = safe_abs(min); + if (max >= 0) { + range = abs_min + (uint64_t)(max) + 1; + } else { + range = abs_min - safe_abs(max) + 1; } - int64_t range = max - min + 1; if (range == 0) { // max = INT64_MAX, min = INT64_MIN return (int64_t)xorshift128_next(&_tls_seed); } - return min + (int64_t)fast_rand_impl(max - min + 1, &_tls_seed); + uint64_t r = fast_rand_impl(range, &_tls_seed); + return r >= abs_min ? (int64_t)(r - abs_min) : -((int64_t)(abs_min - r)); } uint64_t fast_rand_in_u64(uint64_t min, uint64_t max) { diff --git a/src/butil/numerics/safe_conversions.h b/src/butil/numerics/safe_conversions.h index 677aa4af0a..9a48811714 100644 --- a/src/butil/numerics/safe_conversions.h +++ b/src/butil/numerics/safe_conversions.h @@ -58,6 +58,31 @@ inline Dst saturated_cast(Src value) { return static_cast(value); } +inline uint64_t safe_abs(uint64_t x) { + return x; +} + +inline uint64_t safe_abs(int64_t x) { + return (x >= 0) ? (uint64_t)x : ((~(uint64_t)(x)) + 1); +} + +inline uint32_t safe_abs(uint32_t x) { + return x; +} + +inline uint32_t safe_abs(int32_t x) { + return (uint32_t)safe_abs((int64_t)x); +} + +#if defined(__APPLE__) +inline unsigned long safe_abs(unsigned long x) { + return x; +} +inline unsigned long safe_abs(long x) { + return (x >= 0) ? (unsigned long)x : ((~(unsigned long)(x)) + 1); +} +#endif + } // namespace butil #endif // BUTIL_SAFE_CONVERSIONS_H_ diff --git a/src/butil/strings/string_number_conversions.cc b/src/butil/strings/string_number_conversions.cc index 29645dec89..bcf3f49ce4 100644 --- a/src/butil/strings/string_number_conversions.cc +++ b/src/butil/strings/string_number_conversions.cc @@ -12,6 +12,7 @@ #include #include "butil/logging.h" +#include "butil/numerics/safe_conversions.h" // safe_abs #include "butil/scoped_clear_errno.h" #include "butil/strings/utf_string_conversions.h" #include "butil/third_party/dmg_fp/dmg_fp.h" @@ -22,30 +23,6 @@ namespace { template struct IntToStringT { - // This is to avoid a compiler warning about unary minus on unsigned type. - // For example, say you had the following code: - // template - // INT abs(INT value) { return value < 0 ? -value : value; } - // Even though if INT is unsigned, it's impossible for value < 0, so the - // unary minus will never be taken, the compiler will still generate a - // warning. We do a little specialization dance... - template - struct ToUnsignedT {}; - - template - struct ToUnsignedT { - static UINT2 ToUnsigned(INT2 value) { - return static_cast(value); - } - }; - - template - struct ToUnsignedT { - static UINT2 ToUnsigned(INT2 value) { - return static_cast(value < 0 ? -value : value); - } - }; - // This set of templates is very similar to the above templates, but // for testing whether an integer is negative. template @@ -74,9 +51,7 @@ struct IntToStringT { STR outbuf(kOutputBufSize, 0); bool is_neg = TestNegT::TestNeg(value); - // Even though is_neg will never be true when INT is parameterized as - // unsigned, even the presence of the unary operation causes a warning. - UINT res = ToUnsignedT::ToUnsigned(value); + UINT res = safe_abs(value); typename STR::iterator it(outbuf.end()); do { diff --git a/src/butil/strings/stringprintf.cc b/src/butil/strings/stringprintf.cc index 3f40e7247d..0ca6366cc8 100644 --- a/src/butil/strings/stringprintf.cc +++ b/src/butil/strings/stringprintf.cc @@ -78,7 +78,7 @@ static void StringAppendVT(StringType* dst, // wrong and no amount of buffer-doubling is going to fix it. return; #else - if (errno != 0 && errno != EOVERFLOW) + if (errno != 0 && errno != EOVERFLOW && errno != E2BIG) return; // Try doubling the buffer size. mem_length *= 2; From d7752d17fd7da35034a8d5096b3ee987848100e3 Mon Sep 17 00:00:00 2001 From: koarz <66543806+koarz@users.noreply.github.com> Date: Mon, 8 Dec 2025 10:54:52 +0800 Subject: [PATCH 05/84] feat: support more ssl verify mode (#3141) * feat: support more ssl verify mode * 1 --- src/brpc/details/ssl_helper.cpp | 15 +++++++++++++-- src/brpc/ssl_options.cpp | 5 ++++- src/brpc/ssl_options.h | 12 ++++++++++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/src/brpc/details/ssl_helper.cpp b/src/brpc/details/ssl_helper.cpp index d33d0ee783..f38b16d6a5 100644 --- a/src/brpc/details/ssl_helper.cpp +++ b/src/brpc/details/ssl_helper.cpp @@ -17,6 +17,7 @@ +#include "brpc/ssl_options.h" #include #ifndef USE_MESALINK @@ -412,8 +413,18 @@ static int SetSSLOptions(SSL_CTX* ctx, const std::string& ciphers, // TODO: Verify the CNAME in certificate matches the requesting host if (verify.verify_depth > 0) { - SSL_CTX_set_verify(ctx, (SSL_VERIFY_PEER - | SSL_VERIFY_FAIL_IF_NO_PEER_CERT), NULL); + if (verify.verify_mode == VerifyMode::VERIFY_FAIL_IF_NO_PEER_CERT) { + SSL_CTX_set_verify(ctx, (SSL_VERIFY_PEER + | SSL_VERIFY_FAIL_IF_NO_PEER_CERT), NULL); + } else if (verify.verify_mode == VerifyMode::VERIFY_PEER) { + SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER, NULL); + } else if (verify.verify_mode == VerifyMode::VERIFY_NONE) { + SSL_CTX_set_verify(ctx, SSL_VERIFY_NONE, NULL); + } else { + // for forward compatibility + SSL_CTX_set_verify(ctx, (SSL_VERIFY_PEER + | SSL_VERIFY_FAIL_IF_NO_PEER_CERT), NULL); + } SSL_CTX_set_verify_depth(ctx, verify.verify_depth); std::string cafile = verify.ca_file_path; if (cafile.empty()) { diff --git a/src/brpc/ssl_options.cpp b/src/brpc/ssl_options.cpp index e3b8f5b184..748749ae82 100644 --- a/src/brpc/ssl_options.cpp +++ b/src/brpc/ssl_options.cpp @@ -20,7 +20,10 @@ namespace brpc { -VerifyOptions::VerifyOptions() : verify_depth(0) {} +VerifyOptions::VerifyOptions() + : verify_depth(0) + , verify_mode(VerifyMode::NOT_SET) +{} ChannelSSLOptions::ChannelSSLOptions() : ciphers("DEFAULT") diff --git a/src/brpc/ssl_options.h b/src/brpc/ssl_options.h index bbe9ccf1c3..8ddda248a6 100644 --- a/src/brpc/ssl_options.h +++ b/src/brpc/ssl_options.h @@ -41,6 +41,13 @@ struct CertInfo { std::vector sni_filters; }; +enum class VerifyMode { + NOT_SET, + VERIFY_NONE, + VERIFY_PEER, + VERIFY_FAIL_IF_NO_PEER_CERT, +}; + struct VerifyOptions { // Constructed with default options VerifyOptions(); @@ -50,6 +57,11 @@ struct VerifyOptions { // Default: 0 int verify_depth; + // Set ssl verify mode for openssl + // If VERIFY_FAIL_IF_NO_PEER_CERT, it will set `SSL_VERIFY_FAIL_IF_NO_PEER_CERT | SSL_VERIFY_PEER` + // Default: NOT_SET + VerifyMode verify_mode; + // Set the trusted CA file to verify the peer's certificate // If empty, use the system default CA files // Default: "" From 86e6a156d4c781ede05c1882dd769c35c5e6bc95 Mon Sep 17 00:00:00 2001 From: Dayuxiaoshui <158081477+Dayuxiaoshui@users.noreply.github.com> Date: Mon, 8 Dec 2025 10:55:28 +0800 Subject: [PATCH 06/84] Add RISC-V architecture support (#3125) This commit adds full support for RISC-V 64-bit architecture to brpc. Changes include: - Add RISC-V atomic operations implementation - Add RISC-V architecture detection in build system - Add RISC-V context switching (bthread support) - Add RISC-V clock cycle counter support (rdcycle) - Update CMake and Makefile for RISC-V compilation All core functionalities have been tested and verified in QEMU RISC-V environment, including: - Atomic operations (32-bit and 64-bit) - Memory barriers - Context switching - Clock cycle counting Co-authored-by: gong-flying --- CMakeLists.txt | 3 + Makefile | 4 + src/bthread/context.cpp | 88 ++++++++++ src/bthread/context.h | 3 + src/bthread/processor.h | 2 + src/bthread/task_group.cpp | 6 +- src/bthread/task_group.h | 2 +- src/butil/atomicops.h | 2 + src/butil/atomicops_internals_riscv_gcc.h | 192 ++++++++++++++++++++++ src/butil/build_config.h | 10 ++ src/butil/time.h | 7 + 11 files changed, 316 insertions(+), 3 deletions(-) create mode 100644 src/butil/atomicops_internals_riscv_gcc.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b5db489b3..6c6f985fc6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -170,6 +170,9 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") elseif((CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")) # segmentation fault in libcontext set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-gcse") + elseif((CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64")) + # RISC-V specific optimizations + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=rv64gc") endif() if(NOT (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0)) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-aligned-new") diff --git a/Makefile b/Makefile index 16a10ae2ea..abe029e360 100644 --- a/Makefile +++ b/Makefile @@ -44,6 +44,10 @@ ifeq ($(shell test $(GCC_VERSION) -ge 40400; echo $$?),0) CXXFLAGS+=-msse4 -msse4.2 endif endif +# RISC-V specific optimizations +ifeq ($(shell uname -m),riscv64) + CXXFLAGS+=-march=rv64gc +endif #not solved yet ifeq ($(CC),gcc) ifeq ($(shell test $(GCC_VERSION) -ge 70000; echo $$?),0) diff --git a/src/bthread/context.cpp b/src/bthread/context.cpp index bafa927d78..b7be731eae 100644 --- a/src/bthread/context.cpp +++ b/src/bthread/context.cpp @@ -900,3 +900,91 @@ __asm ( ); #endif + +#if defined(BTHREAD_CONTEXT_PLATFORM_linux_riscv64) && defined(BTHREAD_CONTEXT_COMPILER_gcc) +__asm ( +".text\n" +".align 3\n" +".global bthread_jump_fcontext\n" +".type bthread_jump_fcontext, %function\n" +"bthread_jump_fcontext:\n" +" addi sp, sp, -160\n" +" # save callee-saved registers\n" +" sd s0, 64(sp)\n" +" sd s1, 72(sp)\n" +" sd s2, 80(sp)\n" +" sd s3, 88(sp)\n" +" sd s4, 96(sp)\n" +" sd s5, 104(sp)\n" +" sd s6, 112(sp)\n" +" sd s7, 120(sp)\n" +" sd s8, 128(sp)\n" +" sd s9, 136(sp)\n" +" sd s10, 144(sp)\n" +" sd s11, 152(sp)\n" +" sd ra, 0(sp)\n" +" sd fp, 8(sp)\n" +" # save floating point registers\n" +" fsd fs0, 16(sp)\n" +" fsd fs1, 24(sp)\n" +" fsd fs2, 32(sp)\n" +" fsd fs3, 40(sp)\n" +" fsd fs4, 48(sp)\n" +" fsd fs5, 56(sp)\n" +" # store current stack pointer\n" +" sd sp, 0(a0)\n" +" # load new stack pointer\n" +" mv sp, a1\n" +" # restore floating point registers\n" +" fld fs0, 16(sp)\n" +" fld fs1, 24(sp)\n" +" fld fs2, 32(sp)\n" +" fld fs3, 40(sp)\n" +" fld fs4, 48(sp)\n" +" fld fs5, 56(sp)\n" +" # restore callee-saved registers\n" +" ld s0, 64(sp)\n" +" ld s1, 72(sp)\n" +" ld s2, 80(sp)\n" +" ld s3, 88(sp)\n" +" ld s4, 96(sp)\n" +" ld s5, 104(sp)\n" +" ld s6, 112(sp)\n" +" ld s7, 120(sp)\n" +" ld s8, 128(sp)\n" +" ld s9, 136(sp)\n" +" ld s10, 144(sp)\n" +" ld s11, 152(sp)\n" +" ld ra, 0(sp)\n" +" ld fp, 8(sp)\n" +" # restore stack pointer\n" +" addi sp, sp, 160\n" +" # return value in a0\n" +" mv a0, a2\n" +" # jump to new context\n" +" ret\n" +); + +__asm ( +".text\n" +".align 3\n" +".global bthread_make_fcontext\n" +".type bthread_make_fcontext, %function\n" +"bthread_make_fcontext:\n" +" # align stack to 16-byte boundary\n" +" andi a0, a0, -16\n" +" addi a0, a0, -160\n" +" # store function pointer at the top of stack\n" +" sd a2, 0(a0)\n" +" # store finish function address\n" +" la t0, finish\n" +" sd t0, 8(a0)\n" +" # return pointer to context data\n" +" ret\n" +"finish:\n" +" # exit with code 0\n" +" li a0, 0\n" +" # call exit\n" +" call _exit\n" +); +#endif diff --git a/src/bthread/context.h b/src/bthread/context.h index 8de85af626..149c7672a3 100644 --- a/src/bthread/context.h +++ b/src/bthread/context.h @@ -42,6 +42,9 @@ #elif __loongarch64 #define BTHREAD_CONTEXT_PLATFORM_linux_loongarch64 #define BTHREAD_CONTEXT_CALL_CONVENTION + #elif __riscv + #define BTHREAD_CONTEXT_PLATFORM_linux_riscv64 + #define BTHREAD_CONTEXT_CALL_CONVENTION #endif #elif defined(__MINGW32__) || defined (__MINGW64__) diff --git a/src/bthread/processor.h b/src/bthread/processor.h index f8939234e8..246c8b936f 100644 --- a/src/bthread/processor.h +++ b/src/bthread/processor.h @@ -28,6 +28,8 @@ # ifndef cpu_relax #if defined(ARCH_CPU_ARM_FAMILY) # define cpu_relax() asm volatile("yield\n": : :"memory") +#elif defined(ARCH_CPU_RISCV_FAMILY) +# define cpu_relax() asm volatile("fence.i\n": : :"memory") #elif defined(ARCH_CPU_LOONGARCH64_FAMILY) # define cpu_relax() asm volatile("nop\n": : :"memory"); #else diff --git a/src/bthread/task_group.cpp b/src/bthread/task_group.cpp index c577b64b16..877a5d406e 100644 --- a/src/bthread/task_group.cpp +++ b/src/bthread/task_group.cpp @@ -101,7 +101,8 @@ AtomicInteger128::Value AtomicInteger128::load() const { #endif // __x86_64__ return {value[0], value[1]}; #else // __x86_64__ || __ARM_NEON - BAIDU_SCOPED_LOCK(_mutex); + // RISC-V and other architectures use mutex fallback + BAIDU_SCOPED_LOCK(const_cast(_mutex)); return _value; #endif // __x86_64__ || __ARM_NEON } @@ -114,7 +115,8 @@ void AtomicInteger128::store(Value value) { int64x2_t v = vld1q_s64(reinterpret_cast(&value)); vst1q_s64(reinterpret_cast(&_value), v); #else - BAIDU_SCOPED_LOCK(_mutex); + // RISC-V and other architectures use mutex fallback + BAIDU_SCOPED_LOCK(const_cast(_mutex)); _value = value; #endif // __x86_64__ || __ARM_NEON } diff --git a/src/bthread/task_group.h b/src/bthread/task_group.h index f3e1d402b2..54140c0dc2 100644 --- a/src/bthread/task_group.h +++ b/src/bthread/task_group.h @@ -73,7 +73,7 @@ class AtomicInteger128 { private: Value _value{}; - // Used to protect `_cpu_time_stat' when __x86_64__ and __ARM_NEON is not defined. + // Used to protect `_cpu_time_stat' when __x86_64__, __ARM_NEON, and __riscv is not defined. FastPthreadMutex _mutex; }; diff --git a/src/butil/atomicops.h b/src/butil/atomicops.h index cda1529f40..7ee383770f 100644 --- a/src/butil/atomicops.h +++ b/src/butil/atomicops.h @@ -157,6 +157,8 @@ Atomic64 Release_Load(volatile const Atomic64* ptr); #include "butil/atomicops_internals_mips_gcc.h" #elif defined(COMPILER_GCC) && defined(ARCH_CPU_LOONGARCH64_FAMILY) #include "butil/atomicops_internals_loongarch64_gcc.h" +#elif defined(COMPILER_GCC) && defined(ARCH_CPU_RISCV_FAMILY) +#include "butil/atomicops_internals_riscv_gcc.h" #else #error "Atomic operations are not supported on your platform" #endif diff --git a/src/butil/atomicops_internals_riscv_gcc.h b/src/butil/atomicops_internals_riscv_gcc.h new file mode 100644 index 0000000000..e7bd78bbed --- /dev/null +++ b/src/butil/atomicops_internals_riscv_gcc.h @@ -0,0 +1,192 @@ +// Copyright 2024 The Apache Software Foundation. All rights reserved. +// Use of this source code is governed by the Apache License, Version 2.0 +// that can be found in the LICENSE file. + +// This file is an internal atomic implementation, use butil/atomicops.h instead. +// RISC-V architecture specific atomic operations implementation using GCC intrinsics. + +#ifndef BUTIL_ATOMICOPS_INTERNALS_RISCV_GCC_H_ +#define BUTIL_ATOMICOPS_INTERNALS_RISCV_GCC_H_ + +namespace butil { +namespace subtle { + +inline void MemoryBarrier() { + __asm__ __volatile__ ("fence" ::: "memory"); // NOLINT +} + +// RISC-V atomic operations using GCC built-in functions +// These are implemented using the standard GCC atomic built-ins which +// are supported on RISC-V since GCC 7.1+ + +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value; + do { + if (__sync_bool_compare_and_swap(ptr, old_value, new_value)) + return old_value; + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + Atomic32 old_value; + do { + old_value = *ptr; + } while (!__sync_bool_compare_and_swap(ptr, old_value, new_value)); + return old_value; +} + +inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment) { + return Barrier_AtomicIncrement(ptr, increment); +} + +inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment) { + for (;;) { + // Atomic exchange the old value with an incremented one. + Atomic32 old_value = *ptr; + Atomic32 new_value = old_value + increment; + if (__sync_bool_compare_and_swap(ptr, old_value, new_value)) { + // The exchange took place as expected. + return new_value; + } + // Otherwise, *ptr changed mid-loop and we need to retry. + } +} + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + // Since NoBarrier_CompareAndSwap uses __sync_bool_compare_and_swap, which + // is a full memory barrier, none is needed here or below in Release. + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { + MemoryBarrier(); + *ptr = value; +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { + Atomic32 value = *ptr; + MemoryBarrier(); + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32* ptr) { + MemoryBarrier(); + return *ptr; +} + +// 64-bit versions of the operations. +// See the 32-bit versions for comments. + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev_value; + do { + if (__sync_bool_compare_and_swap(ptr, old_value, new_value)) + return old_value; + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + Atomic64 old_value; + do { + old_value = *ptr; + } while (!__sync_bool_compare_and_swap(ptr, old_value, new_value)); + return old_value; +} + +inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + return Barrier_AtomicIncrement(ptr, increment); +} + +inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + for (;;) { + // Atomic exchange the old value with an incremented one. + Atomic64 old_value = *ptr; + Atomic64 new_value = old_value + increment; + if (__sync_bool_compare_and_swap(ptr, old_value, new_value)) { + // The exchange took place as expected. + return new_value; + } + // Otherwise, *ptr changed mid-loop and we need to retry. + } +} + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { + MemoryBarrier(); + *ptr = value; +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + return *ptr; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + Atomic64 value = *ptr; + MemoryBarrier(); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { + MemoryBarrier(); + return *ptr; +} + +} // namespace butil::subtle +} // namespace butil + +#endif // BUTIL_ATOMICOPS_INTERNALS_RISCV_GCC_H_ diff --git a/src/butil/build_config.h b/src/butil/build_config.h index 5ddf38215e..18d449b44b 100644 --- a/src/butil/build_config.h +++ b/src/butil/build_config.h @@ -138,6 +138,16 @@ #define ARCH_CPU_LOONGARCH64 1 #define ARCH_CPU_64_BITS 1 #define ARCH_CPU_LITTLE_ENDIAN 1 +#elif defined(__riscv) +#define ARCH_CPU_RISCV_FAMILY 1 +#if defined(__riscv_xlen) && (__riscv_xlen == 64) +#define ARCH_CPU_RISCV64 1 +#define ARCH_CPU_64_BITS 1 +#else +#define ARCH_CPU_RISCV32 1 +#define ARCH_CPU_32_BITS 1 +#endif +#define ARCH_CPU_LITTLE_ENDIAN 1 #else #error Please add support for your architecture in butil/build_config.h #endif diff --git a/src/butil/time.h b/src/butil/time.h index 8b85699840..c57000ea99 100644 --- a/src/butil/time.h +++ b/src/butil/time.h @@ -254,6 +254,13 @@ inline uint64_t clock_cycles() { : "=r" (stable_counter), "=r" (counter_id) ); return stable_counter; +#elif defined(__riscv) + uint64_t cycles; + __asm__ __volatile__ ( + "rdcycle %0" + : "=r" (cycles) + ); + return cycles; #else #error "unsupported arch" #endif From 112a0dfd89aebe345aecb9b875c5020132f8d7b4 Mon Sep 17 00:00:00 2001 From: Zhilei Chen <2522134184@qq.com> Date: Thu, 11 Dec 2025 20:52:40 +0800 Subject: [PATCH 07/84] Add missed definition of RegisterFlagValidatorOrDie for uint32_t (#3171) --- src/brpc/reloadable_flags.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/brpc/reloadable_flags.cpp b/src/brpc/reloadable_flags.cpp index 958dc057ae..69fb4ad5f0 100644 --- a/src/brpc/reloadable_flags.cpp +++ b/src/brpc/reloadable_flags.cpp @@ -66,6 +66,10 @@ bool RegisterFlagValidatorOrDie(const int32_t* flag, bool (*validate_fn)(const char*, int32_t)) { return butil::RegisterFlagValidatorOrDieImpl(flag, validate_fn); } +bool RegisterFlagValidatorOrDie(const uint32_t* flag, + bool (*validate_fn)(const char*, uint32_t)) { + return butil::RegisterFlagValidatorOrDieImpl(flag, validate_fn); +} bool RegisterFlagValidatorOrDie(const int64_t* flag, bool (*validate_fn)(const char*, int64_t)) { return butil::RegisterFlagValidatorOrDieImpl(flag, validate_fn); From d1ad5d10465dc70ddc94443984a99ddb5271bcb7 Mon Sep 17 00:00:00 2001 From: Bright Chen Date: Wed, 17 Dec 2025 20:32:01 +0800 Subject: [PATCH 08/84] Fix a redundant BeforeAdditionalRefReleased call (#3174) --- src/brpc/versioned_ref_with_id.h | 9 ++++----- test/bthread_work_stealing_queue_unittest.cpp | 9 ++------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/src/brpc/versioned_ref_with_id.h b/src/brpc/versioned_ref_with_id.h index 20e0106d93..f77d5afa83 100644 --- a/src/brpc/versioned_ref_with_id.h +++ b/src/brpc/versioned_ref_with_id.h @@ -504,10 +504,9 @@ int VersionedRefWithId::ReleaseAdditionalReference() { do { AdditionalRefStatus expect = ADDITIONAL_REF_USING; if (_additional_ref_status.compare_exchange_strong( - expect, ADDITIONAL_REF_RECYCLED, - butil::memory_order_relaxed, - butil::memory_order_relaxed)) { - BeforeAdditionalRefReleasedWrapper(); + expect, ADDITIONAL_REF_RECYCLED, + butil::memory_order_relaxed, + butil::memory_order_relaxed)) { WRAPPER_CALL(BeforeAdditionalRefReleased, static_cast(this)); return Dereference(); } @@ -591,7 +590,7 @@ void VersionedRefWithId::Revive(int32_t at_least_nref) { int32_t nref = NRefOfVRef(vref); if (nref < at_least_nref) { - // Set the status to REF_RECYLED since no one uses this socket + // Set the status to REF_RECYCLED since no one uses this socket _additional_ref_status.store( ADDITIONAL_REF_RECYCLED, butil::memory_order_relaxed); CHECK_EQ(1, nref); diff --git a/test/bthread_work_stealing_queue_unittest.cpp b/test/bthread_work_stealing_queue_unittest.cpp index 92fbb91095..a8b110371d 100644 --- a/test/bthread_work_stealing_queue_unittest.cpp +++ b/test/bthread_work_stealing_queue_unittest.cpp @@ -21,6 +21,7 @@ #include "butil/macros.h" #include "butil/scoped_lock.h" #include "bthread/work_stealing_queue.h" +#include "bthread/processor.h" namespace { typedef size_t value_type; @@ -39,13 +40,7 @@ void* steal_thread(void* arg) { if (q->steal(&val)) { stolen->push_back(val); } else { -#if defined(ARCH_CPU_ARM_FAMILY) - asm volatile("yield\n": : :"memory"); -#elif defined(ARCH_CPU_LOONGARCH64_FAMILY) - asm volatile("nop\n": : :"memory"); -#else - asm volatile("pause\n": : :"memory"); -#endif + cpu_relax(); } } return stolen; From ac8212c486622f6702cc1c70e56cff721d821fd8 Mon Sep 17 00:00:00 2001 From: Searion <33591311+Searion@users.noreply.github.com> Date: Thu, 18 Dec 2025 10:46:46 +0800 Subject: [PATCH 09/84] fix gdb_bthread_stack bthread_begin error (#3176) Co-authored-by: Haigang Xi --- tools/gdb_bthread_stack.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/gdb_bthread_stack.py b/tools/gdb_bthread_stack.py index acb6628a4e..2b18e0afed 100644 --- a/tools/gdb_bthread_stack.py +++ b/tools/gdb_bthread_stack.py @@ -55,8 +55,8 @@ status = False def get_bthread_num(): - root_agent = gdb.parse_and_eval("&(((((*bthread::g_task_control)._nbthreads)._combiner)._agents).root_)") - global_res = int(gdb.parse_and_eval("((*bthread::g_task_control)._nbthreads)._combiner._global_result")) + root_agent = gdb.parse_and_eval("&(((*(((*bthread::g_task_control)._nbthreads)._combiner._M_ptr))._agents).root_)") + global_res = int(gdb.parse_and_eval("(*(((*bthread::g_task_control)._nbthreads)._combiner._M_ptr))._global_result")) get_agent = "(*(('bvar::detail::AgentCombiner >::Agent' *){}))" last_node = root_agent long_type = gdb.lookup_type("long") From f8ae66475cdd64eee729b29ef588909cb0f6b1a2 Mon Sep 17 00:00:00 2001 From: Bright Chen Date: Mon, 22 Dec 2025 19:00:47 +0800 Subject: [PATCH 10/84] Bugfix: SQ overflow (#3145) * Bugfix: The failure of ibv_post_send is caused by polling send CQE before recv CQE * Split send and recv comp channel * Use wr_id to update _sq_window_size * Send CQ and recv CQ share comp channel * Add IMM window * Deallocate polling cq * Update RDMA documents --- docs/cn/rdma.md | 48 +-- docs/en/rdma.md | 43 +-- src/brpc/rdma/rdma_endpoint.cpp | 525 +++++++++++++++++++------------- src/brpc/rdma/rdma_endpoint.h | 51 ++-- src/brpc/socket.cpp | 25 +- test/brpc_rdma_unittest.cpp | 6 +- 6 files changed, 405 insertions(+), 293 deletions(-) diff --git a/docs/cn/rdma.md b/docs/cn/rdma.md index 29b0b6fb43..e775459893 100644 --- a/docs/cn/rdma.md +++ b/docs/cn/rdma.md @@ -35,7 +35,7 @@ RDMA与TCP不同,不使用socket接口进行通信。但是在实现上仍然 brpc内部使用RDMA RC模式,每个RdmaEndpoint对应一个QP。RDMA连接建立依赖于前置TCP建连,TCP建连后双方交换必要参数,如GID、QPN等,再发起RDMA连接并实现数据传输。这个过程我们称为握手(参见RdmaEndpoint)。因为握手需要TCP连接,因此RdmaEndpoint所在的Socket类中,原本的TCP fd仍然有效。握手过程采用了brpc中已有的AppConnect逻辑。注意,握手用的TCP连接在后续数据传输阶段并不会收发数据,但仍保持为EST状态。一旦TCP连接中断,其上对应的RDMA连接同样会置错。 -RdmaEndpoint数据传输逻辑的第一个重要特性是零拷贝。要发送的所有数据默认都存放在IOBuf的Block中,因此所发送的Block需要等到对端确认接收完成后才可以释放,这些Block的引用被存放于RdmaEndpoint::_sbuf中。而要实现接收零拷贝,则需要确保接受端所预提交的接收缓冲区必须直接在IOBuf的Block里面,被存放于RdmaEndpoint::_rbuf。注意,接收端预提交的每一段Block,有一个固定的大小(recv_block_size)。发送端发送时,一个请求最多只能有这么大,否则接收端则无法成功接收。 +RdmaEndpoint数据传输逻辑的第一个重要特性是零拷贝。要发送的所有数据默认都存放在IOBuf的Block中,因此所发送的Block需要等到发送CQE触发后才可以释放,这些Block的引用被存放于RdmaEndpoint::_sbuf中。而要实现接收零拷贝,则需要确保接受端所预提交的接收缓冲区必须直接在IOBuf的Block里面,被存放于RdmaEndpoint::_rbuf。注意,接收端预提交的每一段Block,有一个固定的大小(recv_block_size)。发送端发送时,一个请求最多只能有这么大,否则接收端则无法成功接收。 RdmaEndpoint数据传输逻辑的第二个重要特性是滑动窗口流控。这一流控机制是为了避免发送端持续在发送,其速度超过了接收端处理的速度。TCP传输中也有类似的逻辑,但是是由内核协议栈来实现的。RdmaEndpoint内实现了这一流控机制,通过接收端显式回复ACK来确认接收端处理完毕。为了减少ACK本身的开销,让ACK以立即数形式返回,可以被附在数据消息里。 @@ -52,26 +52,26 @@ RDMA支持事件驱动和轮询两种模式,默认是事件驱动模式,通 # 参数 可配置参数说明: -* rdma_trace_verbose: 日志中打印RDMA建连相关信息,默认false -* rdma_recv_zerocopy: 是否启用接收零拷贝,默认true -* rdma_zerocopy_min_size: 接收零拷贝最小的msg大小,默认512B -* rdma_recv_block_type: 为接收数据预准备的block类型,分为三类default(8KB)/large(64KB)/huge(2MB),默认为default -* rdma_prepared_qp_size: 程序启动预生成的QP的大小,默认128 -* rdma_prepared_qp_cnt: 程序启动预生成的QP的数量,默认1024 -* rdma_max_sge: 允许的最大发送SGList长度,默认为0,即采用硬件所支持的最大长度 -* rdma_sq_size: SQ大小,默认128 -* rdma_rq_size: RQ大小,默认128 -* rdma_cqe_poll_once: 从CQ中一次性poll出的CQE数量,默认32 -* rdma_gid_index: 使用本地GID表中的Index,默认为-1,即选用最大的可用GID Index -* rdma_port: 使用IB设备的port number,默认为1 -* rdma_device: 使用IB设备的名称,默认为空,即使用第一个active的设备 -* rdma_memory_pool_initial_size_mb: 内存池的初始大小,单位MB,默认1024 -* rdma_memory_pool_increase_size_mb: 内存池每次动态增长的大小,单位MB,默认1024 -* rdma_memory_pool_max_regions: 最大的内存池块数,默认16 -* rdma_memory_pool_buckets: 内存池中为避免竞争采用的bucket数目,默认为4 -* rdma_memory_pool_tls_cache_num: 内存池中thread local的缓存block数目,默认为128 -* rdma_use_polling: 是否使用RDMA的轮询模式,默认false -* rdma_poller_num: 轮询模式下的poller数目,默认1 -* rdma_poller_yield: 轮询模式下的poller是否主动放弃CPU,默认是false -* rdma_edisp_unsched: 让事件驱动器不可以被调度,默认是false -* rdma_disable_bthread: 禁用bthread,默认是false +* rdma_trace_verbose: 日志中打印RDMA建连相关信息,默认false。 +* rdma_recv_zerocopy: 是否启用接收零拷贝,默认true。 +* rdma_zerocopy_min_size: 接收零拷贝最小的msg大小,默认512B。 +* rdma_recv_block_type: 为接收数据预准备的block类型,分为三类default(8KB)/large(64KB)/huge(2MB),默认为default。 +* rdma_prepared_qp_size: 程序启动预生成的QP的大小,默认128。 +* rdma_prepared_qp_cnt: 程序启动预生成的QP的数量,默认1024。 +* rdma_max_sge: 允许的最大发送SGList长度,默认为0,即采用硬件所支持的最大长度。 +* rdma_sq_size: SQ大小,默认128。 +* rdma_rq_size: RQ大小,默认128。 +* rdma_cqe_poll_once: 从CQ中一次性poll出的CQE数量,默认32。 +* rdma_gid_index: 使用本地GID表中的Index,默认为-1,即选用最大的可用GID Index。 +* rdma_port: 使用IB设备的port number,默认为1。 +* rdma_device: 使用IB设备的名称,默认为空,即使用第一个active的设备。 +* rdma_memory_pool_initial_size_mb: 内存池的初始大小,单位MB,默认1024。 +* rdma_memory_pool_increase_size_mb: 内存池每次动态增长的大小,单位MB,默认1024。 +* rdma_memory_pool_max_regions: 最大的内存池块数,默认3。 +* rdma_memory_pool_buckets: 内存池中为避免竞争采用的bucket数目,默认为4。 +* rdma_memory_pool_tls_cache_num: 内存池中thread local的缓存block数目,默认为128。 +* rdma_use_polling: 是否使用RDMA的轮询模式,默认false。 +* rdma_poller_num: 轮询模式下的poller数目,默认1。 +* rdma_poller_yield: 轮询模式下的poller是否主动放弃CPU,默认是false。 +* rdma_edisp_unsched: 让事件驱动器不可以被调度,默认是false。 +* rdma_disable_bthread: 禁用bthread,默认是false。 diff --git a/docs/en/rdma.md b/docs/en/rdma.md index c0e88ce9b2..99f1ecd781 100644 --- a/docs/en/rdma.md +++ b/docs/en/rdma.md @@ -35,7 +35,7 @@ RDMA does not use socket API like TCP. However, the brpc::Socket class is still brpc uses RDMA RC mode. Every RdmaEndpoint has its own QP. Before establishing RDMA connection, a TCP connection is necessary to exchange some information such as GID and QPN. We call this procedure handshake. Since handshake needs TCP connection, the TCP fd in the corresponding Socket is still valid. The handshake procedure is completed in the AppConnect way in brpc. The TCP connection will keep in EST state but not be used for data transmission after RDMA connection is established. Once the TCP connection is closed, the corresponding RDMA connection will be set error. -The first key feature in RdmaEndpoint data transmission is zero copy. All data which need to transmit is in the Blocks of IOBuf. Thus all the Blocks need to be released after the remote side completes the receiving. The reference of these Blocks are stored in RdmaEndpoint::_sbuf. In order to realize receiving zero copy, the receive side must post receive buffers in Blocks of IOBuf, which are stored in RdmaEndpoint::_rbuf. Note that all the Blocks posted in the receive side has a fixed size (recv_block_size). The transmit side can only send message smaller than that. Otherwise the receive side cannot receive data successfully. +The first key feature in RdmaEndpoint data transmission is zero copy. All data which need to transmit is in the Blocks of IOBuf. Thus all the Blocks need to be released after the sent CQEs are triggered. The reference of these Blocks are stored in RdmaEndpoint::_sbuf. In order to realize receiving zero copy, the receive side must post receive buffers in Blocks of IOBuf, which are stored in RdmaEndpoint::_rbuf. Note that all the Blocks posted in the receive side has a fixed size (recv_block_size). The transmit side can only send message smaller than that. Otherwise the receive side cannot receive data successfully. The second key feature in RdmaEndpoint data transmission is sliding window flow control. The flow control is to avoid fast transmit side overwhelming slow receive side. TCP has similar mechanism in kernel TCP stack. RdmaEndpoint implements this mechanism with explicit ACKs from receive side. to reduce the overhead of ACKs, the ACK number can be piggybacked in ordinary data message as immediate data. @@ -50,21 +50,26 @@ RDMA is hardware-related. It has some different concepts such as device, port, G # Parameters Configurable parameters: -* rdma_trace_verbose: to print RDMA connection information in log,default is false -* rdma_recv_zerocopy: enable zero copy in receive side,default is true -* rdma_zerocopy_min_size: the min message size for receive zero copy (in Byte),default is 512 -* rdma_recv_block_type: the block type used for receiving, can be default(8KB)/large(64KB)/huge(2MB),default is default -* rdma_prepared_qp_size: the size of QPs created at the beginning of the application,default is 128 -* rdma_prepared_qp_cnt: the number of QPs created at the beginning of the application,default is 1024 -* rdma_max_sge: the max length of sglist, default is 0, which is the max length allowed by the device -* rdma_sq_size: the size of SQ,default is 128 -* rdma_rq_size: the size of RQ,default is 128 -* rdma_cqe_poll_once: the number of CQE pooled from CQ once,default is 32 -* rdma_gid_index: the index of local GID table used,default is -1,which is the maximum GID index -* rdma_port: the port number used,default is 1 -* rdma_device: the IB device name,default is empty,which is the first active device -* rdma_memory_pool_initial_size_mb: the initial region size of RDMA memory pool (in MB),default is 1024 -* rdma_memory_pool_increase_size_mb: the step increase region size of RDMA memory pool (in MB),default is 1024 -* rdma_memory_pool_max_regions: the max number of regions in RDMA memory pool,default is 16 -* rdma_memory_pool_buckets: the number of buckets for avoiding mutex contention in RDMA memory pool,default is 4 -* rdma_memory_pool_tls_cache_num: the number of thread local cached blocks in RDMA memory pool,default is 128 +* rdma_trace_verbose: to print RDMA connection information in log,default is false. +* rdma_recv_zerocopy: enable zero copy in receive side,default is true. +* rdma_zerocopy_min_size: the min message size for receive zero copy (in Byte),default is 512. +* rdma_recv_block_type: the block type used for receiving, can be default(8KB)/large(64KB)/huge(2MB),default is default. +* rdma_prepared_qp_size: the size of QPs created at the beginning of the application,default is 128. +* rdma_prepared_qp_cnt: the number of QPs created at the beginning of the application,default is 1024. +* rdma_max_sge: the max length of sglist, default is 0, which is the max length allowed by the device. +* rdma_sq_size: the size of SQ,default is 128. +* rdma_rq_size: the size of RQ,default is 128. +* rdma_cqe_poll_once: the number of CQE pooled from CQ once,default is 32. +* rdma_gid_index: the index of local GID table used,default is -1,which is the maximum GID index. +* rdma_port: the port number used,default is 1. +* rdma_device: the IB device name,default is empty,which is the first active device. +* rdma_memory_pool_initial_size_mb: the initial region size of RDMA memory pool (in MB),default is 1024. +* rdma_memory_pool_increase_size_mb: the step increase region size of RDMA memory pool (in MB),default is 1024. +* rdma_memory_pool_max_regions: the max number of regions in RDMA memory pool,default is 3. +* rdma_memory_pool_buckets: the number of buckets for avoiding mutex contention in RDMA memory pool,default is 4. +* rdma_memory_pool_tls_cache_num: the number of thread local cached blocks in RDMA memory pool,default is 128. +* rdma_use_polling: Whether to use RDMA polling mode, default is false. +* rdma_poller_num: The number of pollers in polling mode, default is 1. +* rdma_poller_yield: Whether pollers in polling mode voluntarily relinquish the CPU, default is false. +* rdma_edisp_unsched`: Prevents the event driver from being scheduled, default is false. +* rdma_disable_bthread: Disables bthread, default is false. diff --git a/src/brpc/rdma/rdma_endpoint.cpp b/src/brpc/rdma/rdma_endpoint.cpp index 5176756510..616ef33252 100644 --- a/src/brpc/rdma/rdma_endpoint.cpp +++ b/src/brpc/rdma/rdma_endpoint.cpp @@ -147,24 +147,21 @@ void HelloMessage::Deserialize(void* data) { qp_num = butil::NetToHost32(*(uint32_t*)((char*)current_pos + 16)); } -RdmaResource::RdmaResource() - : qp(NULL) - , cq(NULL) - , comp_channel(NULL) - , next(NULL) { } - RdmaResource::~RdmaResource() { - if (qp) { + if (NULL != qp) { IbvDestroyQp(qp); - qp = NULL; } - if (cq) { - IbvDestroyCq(cq); - cq = NULL; + if (NULL != polling_cq) { + IbvDestroyCq(polling_cq); + } + if (NULL != send_cq) { + IbvDestroyCq(send_cq); } - if (comp_channel) { + if (NULL != recv_cq) { + IbvDestroyCq(recv_cq); + } + if (NULL != comp_channel) { IbvDestroyCompChannel(comp_channel); - comp_channel = NULL; } } @@ -172,13 +169,11 @@ RdmaEndpoint::RdmaEndpoint(Socket* s) : _socket(s) , _state(UNINIT) , _resource(NULL) - , _cq_events(0) + , _send_cq_events(0) + , _recv_cq_events(0) , _cq_sid(INVALID_SOCKET_ID) , _sq_size(FLAGS_rdma_sq_size) , _rq_size(FLAGS_rdma_rq_size) - , _sbuf() - , _rbuf() - , _rbuf_data() , _remote_recv_block_size(0) , _accumulated_ack(0) , _unsolicited(0) @@ -189,7 +184,9 @@ RdmaEndpoint::RdmaEndpoint(Socket* s) , _rq_received(0) , _local_window_capacity(0) , _remote_window_capacity(0) - , _window_size(0) + , _sq_imm_window_size(0) + , _remote_rq_window_size(0) + , _sq_window_size(0) , _new_rq_wrs(0) { if (_sq_size < MIN_QP_SIZE) { @@ -215,22 +212,28 @@ RdmaEndpoint::~RdmaEndpoint() { void RdmaEndpoint::Reset() { DeallocateResources(); - _cq_events = 0; - _cq_sid = INVALID_SOCKET_ID; _state = UNINIT; + _resource = NULL; + _send_cq_events = 0; + _recv_cq_events = 0; + _cq_sid = INVALID_SOCKET_ID; _sbuf.clear(); _rbuf.clear(); _rbuf_data.clear(); + _remote_recv_block_size = 0; _accumulated_ack = 0; _unsolicited = 0; + _unsolicited_bytes = 0; _sq_current = 0; _sq_unsignaled = 0; - _local_window_capacity = 0; - _remote_window_capacity = 0; - _window_size.store(0, butil::memory_order_relaxed); - _new_rq_wrs = 0; _sq_sent = 0; _rq_received = 0; + _local_window_capacity = 0; + _remote_window_capacity = 0; + _sq_imm_window_size = 0; + _remote_rq_window_size.store(0, butil::memory_order_relaxed); + _sq_window_size.store(0, butil::memory_order_relaxed); + _new_rq_wrs.store(0, butil::memory_order_relaxed); } void RdmaConnect::StartConnect(const Socket* socket, @@ -255,6 +258,7 @@ void RdmaConnect::StartConnect(const Socket* socket, if (bthread_start_background(&tid, &attr, RdmaEndpoint::ProcessHandshakeAtClient, socket->_rdma_ep) < 0) { LOG(FATAL) << "Fail to start handshake bthread"; + Run(); } else { s.release(); } @@ -516,8 +520,12 @@ void* RdmaEndpoint::ProcessHandshakeAtClient(void* arg) { ep->_local_window_capacity = std::min(ep->_sq_size, remote_msg.rq_size) - RESERVED_WR_NUM; ep->_remote_window_capacity = - std::min(ep->_rq_size, remote_msg.sq_size) - RESERVED_WR_NUM, - ep->_window_size.store(ep->_local_window_capacity, butil::memory_order_relaxed); + std::min(ep->_rq_size, remote_msg.sq_size) - RESERVED_WR_NUM; + ep->_sq_imm_window_size = RESERVED_WR_NUM; + ep->_remote_rq_window_size.store( + ep->_local_window_capacity, butil::memory_order_relaxed); + ep->_sq_window_size.store( + ep->_local_window_capacity, butil::memory_order_relaxed); ep->_state = C_BRINGUP_QP; if (ep->BringUpQp(remote_msg.lid, remote_msg.gid, remote_msg.qp_num) < 0) { @@ -548,11 +556,11 @@ void* RdmaEndpoint::ProcessHandshakeAtClient(void* arg) { if (s->_rdma_state == Socket::RDMA_ON) { ep->_state = ESTABLISHED; LOG_IF(INFO, FLAGS_rdma_trace_verbose) - << "Handshake ends (use rdma) on " << s->description(); + << "Client handshake ends (use rdma) on " << s->description(); } else { ep->_state = FALLBACK_TCP; LOG_IF(INFO, FLAGS_rdma_trace_verbose) - << "Handshake ends (use tcp) on " << s->description(); + << "Client handshake ends (use tcp) on " << s->description(); } errno = 0; @@ -624,8 +632,12 @@ void* RdmaEndpoint::ProcessHandshakeAtServer(void* arg) { ep->_local_window_capacity = std::min(ep->_sq_size, remote_msg.rq_size) - RESERVED_WR_NUM; ep->_remote_window_capacity = - std::min(ep->_rq_size, remote_msg.sq_size) - RESERVED_WR_NUM, - ep->_window_size.store(ep->_local_window_capacity, butil::memory_order_relaxed); + std::min(ep->_rq_size, remote_msg.sq_size) - RESERVED_WR_NUM; + ep->_sq_imm_window_size = RESERVED_WR_NUM; + ep->_remote_rq_window_size.store( + ep->_local_window_capacity, butil::memory_order_relaxed); + ep->_sq_window_size.store( + ep->_local_window_capacity, butil::memory_order_relaxed); ep->_state = S_ALLOC_QPCQ; if (ep->AllocateResources() < 0) { @@ -701,13 +713,13 @@ void* RdmaEndpoint::ProcessHandshakeAtServer(void* arg) { s->_rdma_state = Socket::RDMA_ON; ep->_state = ESTABLISHED; LOG_IF(INFO, FLAGS_rdma_trace_verbose) - << "Handshake ends (use rdma) on " << s->description(); + << "Server handshake ends (use rdma) on " << s->description(); } } else { s->_rdma_state = Socket::RDMA_OFF; ep->_state = FALLBACK_TCP; LOG_IF(INFO, FLAGS_rdma_trace_verbose) - << "Handshake ends (use tcp) on " << s->description(); + << "Server handshake ends (use tcp) on " << s->description(); } ep->TryReadOnTcp(); @@ -720,7 +732,8 @@ bool RdmaEndpoint::IsWritable() const { return false; } - return _window_size.load(butil::memory_order_relaxed) > 0; + return _remote_rq_window_size.load(butil::memory_order_relaxed) > 0 && + _sq_window_size.load(butil::memory_order_relaxed) > 0; } // RdmaIOBuf inherits from IOBuf to provide a new function. @@ -790,13 +803,16 @@ ssize_t RdmaEndpoint::CutFromIOBufList(butil::IOBuf** from, size_t ndata) { size_t total_len = 0; size_t current = 0; - uint32_t window = 0; + uint32_t remote_rq_window_size = + _remote_rq_window_size.load(butil::memory_order_relaxed); + uint32_t sq_window_size = + _sq_window_size.load(butil::memory_order_relaxed); ibv_send_wr wr; int max_sge = GetRdmaMaxSge(); ibv_sge sglist[max_sge]; while (current < ndata) { - window = _window_size.load(butil::memory_order_relaxed); - if (window == 0) { + if (remote_rq_window_size == 0 || sq_window_size == 0) { + // There is no space left in SQ or remote RQ. if (total_len > 0) { break; } else { @@ -815,7 +831,7 @@ ssize_t RdmaEndpoint::CutFromIOBufList(butil::IOBuf** from, size_t ndata) { size_t sge_index = 0; while (sge_index < (uint32_t)max_sge && this_len < _remote_recv_block_size) { - if (data->size() == 0) { + if (data->empty()) { // The current IOBuf is empty, find next one ++current; if (current == ndata) { @@ -826,8 +842,7 @@ ssize_t RdmaEndpoint::CutFromIOBufList(butil::IOBuf** from, size_t ndata) { } ssize_t len = data->cut_into_sglist_and_iobuf( - sglist, &sge_index, to, max_sge, - _remote_recv_block_size - this_len); + sglist, &sge_index, to, max_sge, _remote_recv_block_size - this_len); if (len < 0) { return -1; } @@ -845,7 +860,7 @@ ssize_t RdmaEndpoint::CutFromIOBufList(butil::IOBuf** from, size_t ndata) { wr.imm_data = butil::HostToNet32(imm); // Avoid too much recv completion event to reduce the cpu overhead bool solicited = false; - if (window == 1 || current + 1 >= ndata) { + if (remote_rq_window_size == 1 || sq_window_size == 1 || current + 1 >= ndata) { // Only last message in the write queue or last message in the // current window will be flagged as solicited. solicited = true; @@ -878,6 +893,7 @@ ssize_t RdmaEndpoint::CutFromIOBufList(butil::IOBuf** from, size_t ndata) { // Refer to: // http::www.rdmamojo.com/2014/06/30/working-unsignaled-completions/ wr.send_flags |= IBV_SEND_SIGNALED; + wr.wr_id = _sq_unsignaled; _sq_unsignaled = 0; } @@ -886,9 +902,9 @@ ssize_t RdmaEndpoint::CutFromIOBufList(butil::IOBuf** from, size_t ndata) { if (err != 0) { // We use other way to guarantee the Send Queue is not full. // So we just consider this error as an unrecoverable error. - LOG(WARNING) << "Fail to ibv_post_send: " << berror(err) - << ", window=" << window - << ", sq_current=" << _sq_current; + std::ostringstream oss; + DebugInfo(oss, ", "); + LOG(WARNING) << "Fail to ibv_post_send: " << berror(err) << " " << oss.str(); errno = err; return -1; } @@ -898,18 +914,22 @@ ssize_t RdmaEndpoint::CutFromIOBufList(butil::IOBuf** from, size_t ndata) { _sq_current = 0; } - // Update _window_size. Note that _window_size will never be negative. + // Update `_remote_rq_window_size' and `_sq_window_size'. Note that + // `_remote_rq_window_size' and `_sq_window_size' will never be negative. // Because there is at most one thread can enter this function for each - // Socket, and the other thread of HandleCompletion can only add this - // counter. - _window_size.fetch_sub(1, butil::memory_order_relaxed); + // Socket, and the other thread of HandleCompletion can only add these + // counters. + remote_rq_window_size = + _remote_rq_window_size.fetch_sub(1, butil::memory_order_relaxed) - 1; + sq_window_size = _sq_window_size.fetch_sub(1, butil::memory_order_relaxed) - 1; } return total_len; } int RdmaEndpoint::SendAck(int num) { - if (_new_rq_wrs.fetch_add(num, butil::memory_order_relaxed) > _remote_window_capacity / 2) { + if (_new_rq_wrs.fetch_add(num, butil::memory_order_relaxed) > _remote_window_capacity / 2 && + _sq_imm_window_size > 0) { return SendImm(_new_rq_wrs.exchange(0, butil::memory_order_relaxed)); } return 0; @@ -924,17 +944,24 @@ int RdmaEndpoint::SendImm(uint32_t imm) { memset(&wr, 0, sizeof(wr)); wr.opcode = IBV_WR_SEND_WITH_IMM; wr.imm_data = butil::HostToNet32(imm); - wr.send_flags |= IBV_SEND_SOLICITED; - wr.send_flags |= IBV_SEND_SIGNALED; + wr.send_flags |= IBV_SEND_SOLICITED | IBV_SEND_SIGNALED; + wr.wr_id = 0; ibv_send_wr* bad = NULL; int err = ibv_post_send(_resource->qp, &wr, &bad); if (err != 0) { + std::ostringstream oss; + DebugInfo(oss, ", "); // We use other way to guarantee the Send Queue is not full. // So we just consider this error as an unrecoverable error. - LOG(WARNING) << "Fail to ibv_post_send: " << berror(err); + LOG(WARNING) << "Fail to ibv_post_send: " << berror(err) << " " << oss.str(); return -1; } + + // `_sq_imm_window_size' will never be negative. + // Because IMM can only be sent if + // `_sq_imm_window_size` is greater than 0. + _sq_imm_window_size -= 1; return 0; } @@ -942,8 +969,30 @@ ssize_t RdmaEndpoint::HandleCompletion(ibv_wc& wc) { bool zerocopy = FLAGS_rdma_recv_zerocopy; switch (wc.opcode) { case IBV_WC_SEND: { // send completion - // Do nothing - break; + if (0 == wc.wr_id) { + _sq_imm_window_size += 1; + // If there are any unacknowledged recvs, send an ack. + SendAck(0); + return 0; + } + // Update SQ window. + uint16_t wnd_to_update = wc.wr_id; + for (uint16_t i = 0; i < wnd_to_update; ++i) { + _sbuf[_sq_sent++].clear(); + if (_sq_sent == _sq_size - RESERVED_WR_NUM) { + _sq_sent = 0; + } + } + butil::subtle::MemoryBarrier(); + + _sq_window_size.fetch_add(wnd_to_update, butil::memory_order_relaxed); + if (_remote_rq_window_size.load(butil::memory_order_relaxed) >= + _local_window_capacity / 8) { + // Do not wake up writing thread right after polling IBV_WC_SEND. + // Otherwise the writing thread may switch to background too quickly. + _socket->WakeAsEpollOut(); + } + return 0; } case IBV_WC_RECV: { // recv completion // Please note that only the first wc.byte_len bytes is valid @@ -953,32 +1002,21 @@ ssize_t RdmaEndpoint::HandleCompletion(ibv_wc& wc) { } CHECK(_state != FALLBACK_TCP); if (zerocopy) { - butil::IOBuf tmp; - _rbuf[_rq_received].cutn(&tmp, wc.byte_len); - _socket->_read_buf.append(tmp); + _rbuf[_rq_received].cutn(&_socket->_read_buf, wc.byte_len); } else { // Copy data when the receive data is really small _socket->_read_buf.append(_rbuf_data[_rq_received], wc.byte_len); } } - if (wc.imm_data > 0) { - // Clear sbuf here because we ignore event wakeup for send completions - uint32_t acks = butil::NetToHost32(wc.imm_data); - uint32_t num = acks; - while (num > 0) { - _sbuf[_sq_sent++].clear(); - if (_sq_sent == _sq_size - RESERVED_WR_NUM) { - _sq_sent = 0; - } - --num; - } - butil::subtle::MemoryBarrier(); - + if (0 != (wc.wc_flags & IBV_WC_WITH_IMM) && wc.imm_data > 0) { // Update window + uint32_t acks = butil::NetToHost32(wc.imm_data); uint32_t wnd_thresh = _local_window_capacity / 8; - if (_window_size.fetch_add(acks, butil::memory_order_relaxed) >= wnd_thresh - || acks >= wnd_thresh) { - // Do not wake up writing thread right after _window_size > 0. + uint32_t remote_rq_window_size = + _remote_rq_window_size.fetch_add(acks, butil::memory_order_relaxed); + if (_sq_window_size.load(butil::memory_order_relaxed) > 0 && + (remote_rq_window_size >= wnd_thresh || acks >= wnd_thresh)) { + // Do not wake up writing thread right after _remote_rq_window_size > 0. // Otherwise the writing thread may switch to background too quickly. _socket->WakeAsEpollOut(); } @@ -1050,71 +1088,73 @@ int RdmaEndpoint::PostRecv(uint32_t num, bool zerocopy) { return 0; } -static RdmaResource* AllocateQpCq(uint16_t sq_size, uint16_t rq_size) { - RdmaResource* res = new (std::nothrow) RdmaResource; - if (!res) { - return NULL; - } +static ibv_qp* AllocateQp(ibv_cq* send_cq, ibv_cq* recv_cq, uint32_t sq_size, uint32_t rq_size) { + ibv_qp_init_attr attr; + memset(&attr, 0, sizeof(attr)); + attr.send_cq = send_cq; + attr.recv_cq = recv_cq; + attr.cap.max_send_wr = sq_size; + attr.cap.max_recv_wr = rq_size; + attr.cap.max_send_sge = GetRdmaMaxSge(); + attr.cap.max_recv_sge = 1; + attr.qp_type = IBV_QPT_RC; + return IbvCreateQp(GetRdmaPd(), &attr); +} +static RdmaResource* AllocateQpCq(uint16_t sq_size, uint16_t rq_size) { + std::unique_ptr resource(new RdmaResource); if (!FLAGS_rdma_use_polling) { - res->comp_channel = IbvCreateCompChannel(GetRdmaContext()); - if (!res->comp_channel) { + resource->comp_channel = IbvCreateCompChannel(GetRdmaContext()); + if (NULL == resource->comp_channel) { PLOG(WARNING) << "Fail to create comp channel for CQ"; - delete res; return NULL; } - butil::make_close_on_exec(res->comp_channel->fd); - if (butil::make_non_blocking(res->comp_channel->fd) < 0) { + if (butil::make_close_on_exec(resource->comp_channel->fd) < 0) { + PLOG(WARNING) << "Fail to set comp channel close-on-exec"; + return NULL; + } + if (butil::make_non_blocking(resource->comp_channel->fd) < 0) { PLOG(WARNING) << "Fail to set comp channel nonblocking"; - delete res; return NULL; } - res->cq = IbvCreateCq(GetRdmaContext(), 2 * FLAGS_rdma_prepared_qp_size, - NULL, res->comp_channel, GetRdmaCompVector()); - if (!res->cq) { - PLOG(WARNING) << "Fail to create CQ"; - delete res; + resource->send_cq = IbvCreateCq(GetRdmaContext(), FLAGS_rdma_prepared_qp_size, + NULL, resource->comp_channel, GetRdmaCompVector()); + if (NULL == resource->send_cq) { + PLOG(WARNING) << "Fail to create send CQ"; return NULL; } - } else { - res->cq = IbvCreateCq(GetRdmaContext(), 2 * FLAGS_rdma_prepared_qp_size, - NULL, NULL, 0); - if (!res->cq) { - PLOG(WARNING) << "Fail to create CQ"; - delete res; + + resource->recv_cq = IbvCreateCq(GetRdmaContext(), FLAGS_rdma_prepared_qp_size, + NULL, resource->comp_channel, GetRdmaCompVector()); + if (NULL == resource->recv_cq) { + PLOG(WARNING) << "Fail to create recv CQ"; return NULL; } - } - ibv_qp_init_attr attr; - memset(&attr, 0, sizeof(attr)); - attr.send_cq = res->cq; - attr.recv_cq = res->cq; - // NOTE: Since we hope to reduce send completion events, we set signaled - // send_wr every 1/4 of the total wnd. The wnd will increase when the ack - // is received, which means the receive side has already received the data - // in the corresponding send_wr. However, the ack does not mean the send_wr - // has been removed from SQ if it is set unsignaled. The reason is that - // the unsignaled send_wr is removed from SQ only after the CQE of next - // signaled send_wr is polled. Thus in a rare case, a new send_wr cannot be - // posted to SQ even in the wnd is not empty. In order to solve this - // problem, we enlarge the size of SQ to contain redundant 1/4 of the wnd, - // which is the maximum number of unsignaled send_wrs. - attr.cap.max_send_wr = sq_size * 5 / 4; /*NOTE*/ - attr.cap.max_recv_wr = rq_size; - attr.cap.max_send_sge = GetRdmaMaxSge(); - attr.cap.max_recv_sge = 1; - attr.qp_type = IBV_QPT_RC; - res->qp = IbvCreateQp(GetRdmaPd(), &attr); - if (!res->qp) { - PLOG(WARNING) << "Fail to create QP"; - delete res; - return NULL; + resource->qp = AllocateQp(resource->send_cq, resource->recv_cq, sq_size, rq_size); + if (NULL == resource->qp) { + PLOG(WARNING) << "Fail to create QP"; + return NULL; + } + } else { + resource->polling_cq = + IbvCreateCq(GetRdmaContext(), 2 * FLAGS_rdma_prepared_qp_size, NULL, NULL, 0); + if (NULL == resource->polling_cq) { + PLOG(WARNING) << "Fail to create polling CQ"; + return NULL; + } + resource->qp = AllocateQp(resource->polling_cq, + resource->polling_cq, + sq_size, rq_size); + if (NULL == resource->qp) { + PLOG(WARNING) << "Fail to create QP"; + return NULL; + } } - return res; + return resource.release(); } int RdmaEndpoint::AllocateResources() { @@ -1143,6 +1183,13 @@ int RdmaEndpoint::AllocateResources() { } if (!FLAGS_rdma_use_polling) { + if (0 != ReqNotifyCq(true)) { + return -1; + } + if (0 != ReqNotifyCq(false)) { + return -1; + } + SocketOptions options; options.user = this; options.keytable_pool = _socket->_keytable_pool; @@ -1150,13 +1197,6 @@ int RdmaEndpoint::AllocateResources() { options.on_edge_triggered_events = PollCq; if (Socket::Create(options, &_cq_sid) < 0) { PLOG(WARNING) << "Fail to create socket for cq"; - return -1; - } - - int err = ibv_req_notify_cq(_resource->cq, 1); - if (err != 0) { - LOG(WARNING) << "Fail to arm CQ comp channel: " << berror(err); - return -1; } } else { SocketOptions options; @@ -1263,6 +1303,15 @@ int RdmaEndpoint::BringUpQp(uint16_t lid, ibv_gid gid, uint32_t qp_num) { return 0; } +static void DeallocateCq(ibv_cq* cq) { + if (NULL == cq) { + return; + } + + int err = IbvDestroyCq(cq); + LOG_IF(WARNING, 0 != err) << "Fail to destroy CQ: " << berror(err); +} + void RdmaEndpoint::DeallocateResources() { if (!_resource) { return; @@ -1280,88 +1329,119 @@ void RdmaEndpoint::DeallocateResources() { move_to_rdma_resource_list = true; } } - int fd = -1; - if (_resource->comp_channel) { - fd = _resource->comp_channel->fd; + + if (NULL != _resource->send_cq) { + IbvAckCqEvents(_resource->send_cq, _send_cq_events); + } + if (NULL != _resource->recv_cq) { + IbvAckCqEvents(_resource->recv_cq, _recv_cq_events); } - int err; + + bool remove_consumer = true; if (!move_to_rdma_resource_list) { - if (_resource->qp) { - err = IbvDestroyQp(_resource->qp); - if (err != 0) { - LOG(WARNING) << "Fail to destroy QP: " << berror(err); - } + if (NULL != _resource->qp) { + int err = IbvDestroyQp(_resource->qp); + LOG_IF(WARNING, 0 != err) << "Fail to destroy QP: " << berror(err); _resource->qp = NULL; } - if (_resource->cq) { - IbvAckCqEvents(_resource->cq, _cq_events); - err = IbvDestroyCq(_resource->cq); - if (err != 0) { - PLOG(WARNING) << "Fail to destroy CQ: " << berror(err); - } - _resource->cq = NULL; - } - if (_resource->comp_channel) { - // destroy comp_channel will destroy this fd + + DeallocateCq(_resource->polling_cq); + DeallocateCq(_resource->send_cq); + DeallocateCq(_resource->recv_cq); + + if (NULL != _resource->comp_channel) { + // Destroy send_comp_channel will destroy this fd, // so that we should remove it from epoll fd first - _socket->_io_event.RemoveConsumer(fd); - fd = -1; - err = IbvDestroyCompChannel(_resource->comp_channel); - if (err != 0) { - LOG(WARNING) << "Fail to destroy CQ channel: " << berror(err); - } - _resource->comp_channel = NULL; + int fd = _resource->comp_channel->fd; + GetGlobalEventDispatcher(fd, _socket->_io_event.bthread_tag()).RemoveConsumer(fd); + remove_consumer = false; + int err = IbvDestroyCompChannel(_resource->comp_channel); + LOG_IF(WARNING, 0 != err) << "Fail to destroy CQ channel: " << berror(err); + } + + _resource->polling_cq = NULL; + _resource->send_cq = NULL; + _resource->recv_cq = NULL; + _resource->comp_channel = NULL; delete _resource; _resource = NULL; } - SocketUniquePtr s; - if (_cq_sid != INVALID_SOCKET_ID) { + if (INVALID_SOCKET_ID != _cq_sid) { + SocketUniquePtr s; if (Socket::Address(_cq_sid, &s) == 0) { - s->_user = NULL; // do not release user (this RdmaEndpoint) - if (fd >= 0) { - _socket->_io_event.RemoveConsumer(fd); + if (remove_consumer) { + s->_io_event.RemoveConsumer(s->_fd); } - s->_fd = -1; // already remove fd from epoll fd + s->_user = NULL; // Do not release user (this RdmaEndpoint). + s->_fd = -1; // Already remove fd from epoll fd. s->SetFailed(); } - _cq_sid = INVALID_SOCKET_ID; } if (move_to_rdma_resource_list) { - if (_resource->cq) { - IbvAckCqEvents(_resource->cq, _cq_events); - } BAIDU_SCOPED_LOCK(*g_rdma_resource_mutex); _resource->next = g_rdma_resource_list; g_rdma_resource_list = _resource; } - - _resource = NULL; } static const int MAX_CQ_EVENTS = 128; -int RdmaEndpoint::GetAndAckEvents() { - int events = 0; void* context = NULL; - while (1) { - if (IbvGetCqEvent(_resource->comp_channel, &_resource->cq, &context) != 0) { +int RdmaEndpoint::GetAndAckEvents(SocketUniquePtr& s) { + void* context = NULL; + ibv_cq* cq = NULL; + while (true) { + if (IbvGetCqEvent(_resource->comp_channel, &cq, &context) != 0) { if (errno != EAGAIN) { + const int saved_errno = errno; + PLOG(ERROR) << "Fail to get cq event from " << s->description(); + s->SetFailed(saved_errno, "Fail to get cq event from %s: %s", + s->description().c_str(), berror(saved_errno)); return -1; } break; } - ++events; + if (cq == _resource->send_cq) { + ++_send_cq_events; + } else if (cq == _resource->recv_cq) { + ++_recv_cq_events; + } else { + // Unexpected CQ event that does not belong to + // this endpoint's send/recv CQs. + LOG(WARNING) << "Unexpected CQ event from cq=" << cq + << " of " << s->description(); + // Acknowledge this single event immediately + // to avoid leaking unacknowledged events. + IbvAckCqEvents(cq, 1); + } } - if (events == 0) { - return 0; + if (_send_cq_events >= MAX_CQ_EVENTS) { + IbvAckCqEvents(_resource->send_cq, _send_cq_events); + _send_cq_events = 0; + } + if (_recv_cq_events >= MAX_CQ_EVENTS) { + IbvAckCqEvents(_resource->recv_cq, _recv_cq_events); + _recv_cq_events = 0; } - _cq_events += events; - if (_cq_events >= MAX_CQ_EVENTS) { - IbvAckCqEvents(_resource->cq, _cq_events); - _cq_events = 0; + return 0; +} + +int RdmaEndpoint::ReqNotifyCq(bool send_cq) { + errno = ibv_req_notify_cq( + send_cq ? _resource->send_cq : _resource->recv_cq, + send_cq ? 0 : 1); + if (0 != errno) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to arm " << (send_cq ? "send" : "recv") + << " CQ comp channel from " << _socket->description(); + _socket->SetFailed(saved_errno, "Fail to arm %s CQ channel from %s: %s", + send_cq ? "send" : "recv", _socket->description().c_str(), + berror(saved_errno)); + return -1; } + return 0; } @@ -1377,14 +1457,17 @@ void RdmaEndpoint::PollCq(Socket* m) { } CHECK(ep == s->_rdma_ep); + bool send = false; + ibv_cq* cq = ep->_resource->recv_cq; + if (!FLAGS_rdma_use_polling) { - if (ep->GetAndAckEvents() < 0) { - const int saved_errno = errno; - PLOG(ERROR) << "Fail to get cq event: " << s->description(); - s->SetFailed(saved_errno, "Fail to get cq event from %s: %s", - s->description().c_str(), berror(saved_errno)); + if (ep->GetAndAckEvents(s) < 0) { return; } + } else { + // Polling is considered as non-send, so no need to change `send'. + // Only need to poll polling_cq. + cq = ep->_resource->polling_cq; } int progress = Socket::PROGRESS_INIT; @@ -1392,7 +1475,7 @@ void RdmaEndpoint::PollCq(Socket* m) { InputMessenger::InputMessageClosure last_msg; ibv_wc wc[FLAGS_rdma_cqe_poll_once]; while (true) { - int cnt = ibv_poll_cq(ep->_resource->cq, FLAGS_rdma_cqe_poll_once, wc); + int cnt = ibv_poll_cq(cq, FLAGS_rdma_cqe_poll_once, wc); if (cnt < 0) { const int saved_errno = errno; PLOG(WARNING) << "Fail to poll cq: " << s->description(); @@ -1404,18 +1487,24 @@ void RdmaEndpoint::PollCq(Socket* m) { if (FLAGS_rdma_use_polling) { return; } + + if (!send) { + // It's send cq's turn. + send = true; + cq = ep->_resource->send_cq; + continue; + } + // `recv_cq' and `send_cq' have been polled. if (!notified) { // Since RDMA only provides one shot event, we have to call the // notify function every time. Because there is a possibility // that the event arrives after the poll but before the notify, // we should re-poll the CQ once after the notify to check if // there is an available CQE. - errno = ibv_req_notify_cq(ep->_resource->cq, 1); - if (errno != 0) { - const int saved_errno = errno; - PLOG(WARNING) << "Fail to arm CQ comp channel: " << s->description(); - s->SetFailed(saved_errno, "Fail to arm cq channel from %s: %s", - s->description().c_str(), berror(saved_errno)); + if (0 != ep->ReqNotifyCq(true)) { + return; + } + if (0 != ep->ReqNotifyCq(false)) { return; } notified = true; @@ -1424,11 +1513,14 @@ void RdmaEndpoint::PollCq(Socket* m) { if (!m->MoreReadEvents(&progress)) { break; } - if (ep->GetAndAckEvents() < 0) { - s->SetFailed(errno, "Fail to ack CQ event on %s", - s->description().c_str()); + + if (0 != ep->GetAndAckEvents(s)) { return; } + + // Restart polling from `recv_cq'. + send = false; + cq = ep->_resource->recv_cq; notified = false; continue; } @@ -1437,7 +1529,7 @@ void RdmaEndpoint::PollCq(Socket* m) { ssize_t bytes = 0; for (int i = 0; i < cnt; ++i) { if (s->Failed()) { - continue; + return; } if (wc[i].status != IBV_WC_SUCCESS) { @@ -1453,11 +1545,15 @@ void RdmaEndpoint::PollCq(Socket* m) { const int saved_errno = errno; PLOG(WARNING) << "Fail to handle RDMA completion: " << s->description(); s->SetFailed(saved_errno, "Fail to handle rdma completion from %s: %s", - s->description().c_str(), berror(saved_errno)); + s->description().c_str(), berror(saved_errno)); } else if (nr > 0) { bytes += nr; } } + // Send CQE has no messages to process. + if (send) { + continue; + } // Just call PrcessNewMessage once for all of these CQEs. // Otherwise it may call too many bthread_flush to affect performance. @@ -1491,20 +1587,21 @@ std::string RdmaEndpoint::GetStateStr() const { } } -void RdmaEndpoint::DebugInfo(std::ostream& os) const { - os << "\nrdma_state=ON" - << "\nhandshake_state=" << GetStateStr() - << "\nrdma_window_size=" << _window_size.load(butil::memory_order_relaxed) - << "\nrdma_local_window_capacity=" << _local_window_capacity - << "\nrdma_remote_window_capacity=" << _remote_window_capacity - << "\nrdma_sbuf_head=" << _sq_current - << "\nrdma_sbuf_tail=" << _sq_sent - << "\nrdma_rbuf_head=" << _rq_received - << "\nrdma_unacked_rq_wr=" << _new_rq_wrs - << "\nrdma_received_ack=" << _accumulated_ack - << "\nrdma_unsolicited_sent=" << _unsolicited - << "\nrdma_unsignaled_sq_wr=" << _sq_unsignaled - << "\n"; +void RdmaEndpoint::DebugInfo(std::ostream& os, butil::StringPiece connector) const { + os << "rdma_state=ON" + << connector << "handshake_state=" << GetStateStr() + << connector << "rdma_sq_imm_window_size=" << _sq_imm_window_size + << connector << "rdma_remote_rq_window_size=" << _remote_rq_window_size.load(butil::memory_order_relaxed) + << connector << "rdma_sq_window_size=" << _sq_window_size.load(butil::memory_order_relaxed) + << connector << "rdma_local_window_capacity=" << _local_window_capacity + << connector << "rdma_remote_window_capacity=" << _remote_window_capacity + << connector << "rdma_sbuf_head=" << _sq_current + << connector << "rdma_sbuf_tail=" << _sq_sent + << connector << "rdma_rbuf_head=" << _rq_received + << connector << "rdma_unacked_rq_wr=" << _new_rq_wrs.load(butil::memory_order_relaxed) + << connector << "rdma_received_ack=" << _accumulated_ack + << connector << "rdma_unsolicited_sent=" << _unsolicited + << connector << "rdma_unsignaled_sq_wr=" << _sq_unsignaled; } int RdmaEndpoint::GlobalInitialize() { @@ -1515,6 +1612,8 @@ int RdmaEndpoint::GlobalInitialize() { } else if (FLAGS_rdma_recv_block_type == "huge") { g_rdma_recv_block_size = GetBlockSize(2) - IOBUF_BLOCK_HEADER_LEN; } else { + LOG(ERROR) << "rdma_recv_block_type incorrect " + << "(valid value: default/large/huge)"; errno = EINVAL; return -1; } @@ -1558,9 +1657,9 @@ void RdmaEndpoint::GlobalRelease() { std::vector RdmaEndpoint::_poller_groups; int RdmaEndpoint::PollingModeInitialize(bthread_tag_t tag, - std::function callback, - std::function init_fn, - std::function release_fn) { + std::function callback, + std::function init_fn, + std::function release_fn) { if (!FLAGS_rdma_use_polling) { return 0; } @@ -1642,7 +1741,7 @@ void RdmaEndpoint::PollingModeRelease(bthread_tag_t tag) { auto& running = group.running; running.store(false, std::memory_order_relaxed); for (int i = 0; i < FLAGS_rdma_poller_num; ++i) { - bthread_join(pollers[i].tid, nullptr); + bthread_join(pollers[i].tid, NULL); } } @@ -1651,7 +1750,7 @@ void RdmaEndpoint::PollerAddCqSid() { auto& group = _poller_groups[bthread_self_tag()]; auto& pollers = group.pollers; auto& poller = pollers[index]; - if (_cq_sid != INVALID_SOCKET_ID) { + if (INVALID_SOCKET_ID != _cq_sid) { poller.op_queue.Enqueue(CqSidOp{_cq_sid, CqSidOp::ADD}); } } @@ -1661,7 +1760,7 @@ void RdmaEndpoint::PollerRemoveCqSid() { auto& group = _poller_groups[bthread_self_tag()]; auto& pollers = group.pollers; auto& poller = pollers[index]; - if (_cq_sid != INVALID_SOCKET_ID) { + if (INVALID_SOCKET_ID != _cq_sid) { poller.op_queue.Enqueue(CqSidOp{_cq_sid, CqSidOp::REMOVE}); } } diff --git a/src/brpc/rdma/rdma_endpoint.h b/src/brpc/rdma/rdma_endpoint.h index de7cd5f6d8..eb4714ef0d 100644 --- a/src/brpc/rdma/rdma_endpoint.h +++ b/src/brpc/rdma/rdma_endpoint.h @@ -54,26 +54,30 @@ class RdmaConnect : public AppConnect { private: void Run(); - void (*_done)(int, void*); - void* _data; + void (*_done)(int, void*){NULL}; + void* _data{NULL}; }; struct RdmaResource { - ibv_qp* qp; - ibv_cq* cq; - ibv_comp_channel* comp_channel; - RdmaResource* next; - RdmaResource(); + RdmaResource* next{NULL}; + ibv_qp* qp{NULL}; + // For polling mode. + ibv_cq* polling_cq{NULL}; + // For event mode. + ibv_cq* send_cq{NULL}; + ibv_cq* recv_cq{NULL}; + ibv_comp_channel* comp_channel{NULL}; + RdmaResource() = default; ~RdmaResource(); DISALLOW_COPY_AND_ASSIGN(RdmaResource); }; class BAIDU_CACHELINE_ALIGNMENT RdmaEndpoint : public SocketUser { friend class RdmaConnect; -friend class brpc::Socket; +friend class Socket; public: - RdmaEndpoint(Socket* s); - ~RdmaEndpoint(); + explicit RdmaEndpoint(Socket* s); + ~RdmaEndpoint() override; // Global initialization // Return 0 if success, -1 if failed and errno set @@ -92,7 +96,8 @@ friend class brpc::Socket; bool IsWritable() const; // For debug - void DebugInfo(std::ostream& os) const; + void DebugInfo(std::ostream& os, + butil::StringPiece connector = "\n") const; // Callback when there is new epollin event on TCP fd static void OnNewDataFromTcp(Socket* m); @@ -195,7 +200,10 @@ friend class brpc::Socket; int BringUpQp(uint16_t lid, ibv_gid gid, uint32_t qp_num); // Get event from comp channel and ack the events - int GetAndAckEvents(); + int GetAndAckEvents(SocketUniquePtr& s); + + // Request completion notification on a send/recv CQ. + int ReqNotifyCq(bool send_cq); // Poll CQ and get the work completion static void PollCq(Socket* m); @@ -221,10 +229,11 @@ friend class brpc::Socket; // rdma resource RdmaResource* _resource; - // the number of events requiring ack - int _cq_events; + // The number of events requiring ack. + unsigned int _send_cq_events; + unsigned int _recv_cq_events; - // the SocketId which wrap the comp channel of CQ + // The SocketId which wrap the comp channel of CQ. SocketId _cq_sid; // Capacity of local Send Queue and local Recv Queue @@ -257,8 +266,12 @@ friend class brpc::Socket; uint16_t _local_window_capacity; // The capacity of remote window: min(local RQ, remote SQ) uint16_t _remote_window_capacity; + // The number of IMM WRs we can post to the local Send Queue. + uint16_t _sq_imm_window_size; + // The number of WRs we can send to remote side. + butil::atomic _remote_rq_window_size; // The number of WRs we can post to the local Send Queue - butil::atomic _window_size; + butil::atomic _sq_window_size; // The number of new WRs posted in the local Recv Queue butil::atomic _new_rq_wrs; @@ -282,9 +295,9 @@ friend class brpc::Socket; butil::MPSCQueue> op_queue; // Callback used for io_uring/spdk etc std::function callback; - // Init and Destory function - std::function init_fn; - std::function release_fn; + // Init and Destroy function + std::function init_fn; + std::function release_fn; }; // Poller group struct BAIDU_CACHELINE_ALIGNMENT PollerGroup { diff --git a/src/brpc/socket.cpp b/src/brpc/socket.cpp index ec5300987c..9490650b78 100644 --- a/src/brpc/socket.cpp +++ b/src/brpc/socket.cpp @@ -895,7 +895,7 @@ void Socket::BeforeRecycled() { const SocketId asid = _agent_socket_id.load(butil::memory_order_relaxed); if (asid != INVALID_SOCKET_ID) { SocketUniquePtr ptr; - if (Socket::Address(asid, &ptr) == 0) { + if (Address(asid, &ptr) == 0) { ptr->ReleaseAdditionalReference(); } } @@ -1319,7 +1319,7 @@ int Socket::Connect(const timespec* abstime, SocketOptions options; options.bthread_tag = _io_event.bthread_tag(); options.user = req; - if (Socket::Create(options, &connect_id) != 0) { + if (Create(options, &connect_id) != 0) { LOG(FATAL) << "Fail to create Socket"; delete req; return -1; @@ -1328,7 +1328,7 @@ int Socket::Connect(const timespec* abstime, // `connect_id'. We hold an additional reference here to // ensure `req' to be valid in this scope SocketUniquePtr s; - CHECK_EQ(0, Socket::Address(connect_id, &s)); + CHECK_EQ(0, Address(connect_id, &s)); // Add `sockfd' into epoll so that `HandleEpollOutRequest' will // be called with `req' when epoll event reaches @@ -1425,7 +1425,7 @@ int Socket::ConnectIfNot(const timespec* abstime, WriteRequest* req) { void Socket::WakeAsEpollOut() { _epollout_butex->fetch_add(1, butil::memory_order_release); - bthread::butex_wake_except(_epollout_butex, 0); + bthread::butex_wake_except(_epollout_butex, INVALID_BTHREAD); } int Socket::OnOutputEvent(void* user_data, uint32_t, @@ -1436,7 +1436,7 @@ int Socket::OnOutputEvent(void* user_data, uint32_t, // added into epoll, these sockets miss the signal inside // `SetFailed' and therefore must be signalled here using // `AddressFailedAsWell' to prevent waiting forever - if (Socket::AddressFailedAsWell(id, &s) < 0) { + if (AddressFailedAsWell(id, &s) < 0) { // Ignore recycled sockets return -1; } @@ -1456,7 +1456,7 @@ int Socket::OnOutputEvent(void* user_data, uint32_t, void Socket::HandleEpollOutTimeout(void* arg) { SocketId id = (SocketId)arg; SocketUniquePtr s; - if (Socket::Address(id, &s) != 0) { + if (Address(id, &s) != 0) { return; } EpollOutRequest* req = dynamic_cast(s->user()); @@ -1532,12 +1532,11 @@ int Socket::KeepWriteIfConnected(int fd, int err, void* data) { // Run ssl connect in a new bthread to avoid blocking // the current bthread (thus blocking the EventDispatcher) bthread_t th; - std::unique_ptr thrd_func(brpc::NewCallback( - Socket::CheckConnectedAndKeepWrite, fd, err, data)); + std::unique_ptr thrd_func( + NewCallback(CheckConnectedAndKeepWrite, fd, err, data)); bthread_attr_t attr = BTHREAD_ATTR_NORMAL; bthread_attr_set_name(&attr, "CheckConnectedAndKeepWrite"); - if ((err = bthread_start_background(&th, &attr, - RunClosure, thrd_func.get())) == 0) { + if ((err = bthread_start_background(&th, &attr, RunClosure, thrd_func.get())) == 0) { thrd_func.release(); return 0; } else { @@ -2323,7 +2322,7 @@ std::ostream& operator<<(std::ostream& os, const ObjectPtr& obj) { void Socket::DebugSocket(std::ostream& os, SocketId id) { SocketUniquePtr ptr; - int ret = Socket::AddressFailedAsWell(id, &ptr); + int ret = AddressFailedAsWell(id, &ptr); if (ret < 0) { os << "SocketId=" << id << " is invalid or recycled"; return; @@ -2920,7 +2919,7 @@ int Socket::GetShortSocket(SocketUniquePtr* short_socket) { opt.app_connect = _app_connect; opt.use_rdma = (_rdma_ep) ? true : false; if (get_client_side_messenger()->Create(opt, &id) != 0 || - Socket::Address(id, short_socket) != 0) { + Address(id, short_socket) != 0) { return -1; } (*short_socket)->ShareStats(this); @@ -2931,7 +2930,7 @@ int Socket::GetAgentSocket(SocketUniquePtr* out, bool (*checkfn)(Socket*)) { SocketId id = _agent_socket_id.load(butil::memory_order_relaxed); SocketUniquePtr tmp_sock; do { - if (Socket::Address(id, &tmp_sock) == 0) { + if (Address(id, &tmp_sock) == 0) { if (checkfn == NULL || checkfn(tmp_sock.get())) { out->swap(tmp_sock); return 0; diff --git a/test/brpc_rdma_unittest.cpp b/test/brpc_rdma_unittest.cpp index 066d01277e..ccb280f1c8 100644 --- a/test/brpc_rdma_unittest.cpp +++ b/test/brpc_rdma_unittest.cpp @@ -209,8 +209,7 @@ TEST_F(RdmaTest, client_hello_msg_invalid_magic_str) { uint8_t data[RDMA_HELLO_MSG_LEN]; memcpy(data, "PRPC", 4); // send as normal baidu_std protocol - memset(data + 4, 0, 32); - ASSERT_EQ(38, write(sockfd, data, 38)); + ASSERT_EQ(4, write(sockfd, data, 4)); usleep(100000); // wait for server to handle the msg ASSERT_EQ(rdma::RdmaEndpoint::FALLBACK_TCP, s->_rdma_ep->_state); @@ -660,9 +659,6 @@ TEST_F(RdmaTest, client_send_data_on_tcp_after_ack_send) { ASSERT_EQ(rdma::RdmaEndpoint::FALLBACK_TCP, s->_rdma_ep->_state); ASSERT_EQ(sizeof(flags), write(sockfd1, &flags, sizeof(flags))); usleep(100000); - ASSERT_EQ(rdma::RdmaEndpoint::FALLBACK_TCP, s->_rdma_ep->_state); - close(sockfd1); - usleep(100000); // wait for server to handle the msg ASSERT_EQ(NULL, GetSocketFromServer(0)); butil::fd_guard sockfd2(socket(AF_INET, SOCK_STREAM, 0)); From 0690288d73e4b6ceac44a87994592b992e0bdeb7 Mon Sep 17 00:00:00 2001 From: yujiasheng <1846100778@qq.com> Date: Mon, 29 Dec 2025 21:45:19 +0800 Subject: [PATCH 11/84] fix: avoid SIGSEGV in read_proc_io during static initialization (#3184) Signed-off-by: jiasheng.yu Co-authored-by: jiasheng.yu --- src/bvar/default_variables.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/bvar/default_variables.cpp b/src/bvar/default_variables.cpp index 395d05e1f7..a84c3ab08b 100644 --- a/src/bvar/default_variables.cpp +++ b/src/bvar/default_variables.cpp @@ -22,6 +22,8 @@ #include // getrusage #include // dirent #include // setw +#include +#include #if defined(__APPLE__) #include #include @@ -430,7 +432,12 @@ static bool read_proc_io(ProcIO* s) { #if defined(OS_LINUX) butil::ScopedFILE fp("/proc/self/io", "r"); if (NULL == fp) { - PLOG_ONCE(WARNING) << "Fail to open /proc/self/io"; + static bool ever_printed_io_err = false; + if (!ever_printed_io_err) { + fprintf(stderr, "WARNING: Fail to open /proc/self/io, errno=%d. " + "I/O related bvars will be unavailable.\n", errno); + ever_printed_io_err = true; + } return false; } errno = 0; From 4a3c7869c67a8ced3e655efe54c3cee563640bcc Mon Sep 17 00:00:00 2001 From: Li Yin Date: Wed, 31 Dec 2025 10:48:43 +0800 Subject: [PATCH 12/84] Fix compile issue when the linking project is using C++23 (#3180) The root cause is unique_ptr has constexpr destructor since C++23 libcxx/include/__memory/unique_ptr.h:75:19: error: invalid application of 'sizeof' to an incomplete type 'brpc::RedisCommandHandler' 75 | static_assert(sizeof(_Tp) >= 0, "cannot delete an incomplete type"); | ^~~~~~~~~~~ libcxx/include/__memory/unique_ptr.h:290:7: note: in instantiation of member function 'std::default_delete::operator()' requested here 290 | __deleter_(__tmp); | ^ libcxx/include/__memory/unique_ptr.h:259:71: note: in instantiation of member function 'std::unique_ptr::reset' requested here 259 | _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 ~unique_ptr() { reset(); } | ^ src/brpc/redis.h:220:14: note: in instantiation of member function 'std::unique_ptr::~unique_ptr' requested here 220 | explicit RedisConnContext(const RedisService* rs) | ^ src/brpc/redis.h:190:7: note: forward declaration of 'brpc::RedisCommandHandler' 190 | class RedisCommandHandler; Co-authored-by: yin.li --- src/brpc/redis.cpp | 13 +++++++++---- src/brpc/redis.h | 5 +---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/brpc/redis.cpp b/src/brpc/redis.cpp index 9af036f857..b9c5363f3e 100644 --- a/src/brpc/redis.cpp +++ b/src/brpc/redis.cpp @@ -101,10 +101,10 @@ bool RedisRequest::AddCommand(const butil::StringPiece& command) { CHECK(st.ok()) << st; _has_error = true; return false; - } + } } -bool RedisRequest::AddCommandByComponents(const butil::StringPiece* components, +bool RedisRequest::AddCommandByComponents(const butil::StringPiece* components, size_t n) { if (_has_error) { return false; @@ -117,7 +117,7 @@ bool RedisRequest::AddCommandByComponents(const butil::StringPiece* components, CHECK(st.ok()) << st; _has_error = true; return false; - } + } } bool RedisRequest::AddCommandWithArgs(const char* fmt, ...) { @@ -356,7 +356,7 @@ bool RedisService::AddCommandHandler(const std::string& name, RedisCommandHandle _command_map[lcname] = handler; return true; } - + RedisCommandHandler* RedisService::FindCommandHandler(const butil::StringPiece& name) const { auto it = _command_map.find(name.as_string()); if (it != _command_map.end()) { @@ -371,6 +371,11 @@ RedisCommandHandler* RedisCommandHandler::NewTransactionHandler() { } // ========== impl of RedisConnContext ========== +RedisConnContext::RedisConnContext(const RedisService* rs) + : redis_service(rs) + , batched_size(0) + , session(nullptr) {} + RedisConnContext::~RedisConnContext() { } void RedisConnContext::Destroy() { diff --git a/src/brpc/redis.h b/src/brpc/redis.h index 50064519f7..c140baf34b 100644 --- a/src/brpc/redis.h +++ b/src/brpc/redis.h @@ -217,10 +217,7 @@ class RedisCommandParser; // This class is as parsing_context in socket. class RedisConnContext : public Destroyable { public: - explicit RedisConnContext(const RedisService* rs) - : redis_service(rs) - , batched_size(0) - , session(nullptr) {} + explicit RedisConnContext(const RedisService* rs); ~RedisConnContext(); // @Destroyable From e0e0090f5db699e22a34cfadbfa673ddd471e0b9 Mon Sep 17 00:00:00 2001 From: Xiaofeng Wang Date: Sun, 4 Jan 2026 11:34:25 +0800 Subject: [PATCH 13/84] Wrap absl::string_view as std::string to support protobuf v30+ (#3187) * Wrap absl::string_view as std::string to support protobuf v30+ Closes #3181 * remove unnecessary specialization for abs:string_view * keep path for consistency --- src/brpc/amf.cpp | 2 +- src/brpc/builtin/protobufs_service.cpp | 11 ++-- src/brpc/channel.cpp | 10 ++-- src/brpc/nshead_pb_service_adaptor.cpp | 10 ++-- src/brpc/policy/baidu_rpc_protocol.cpp | 19 ++++--- src/brpc/policy/http_rpc_protocol.cpp | 47 ++++++++------- src/brpc/policy/hulu_pbrpc_protocol.cpp | 10 +++- src/brpc/policy/mongo_protocol.cpp | 7 ++- src/brpc/policy/nshead_mcpack_protocol.cpp | 13 +++-- src/brpc/policy/sofa_pbrpc_protocol.cpp | 7 ++- src/brpc/policy/ubrpc2pb_protocol.cpp | 16 +++--- src/brpc/server.cpp | 17 +++--- src/brpc/server.h | 2 +- src/butil/strings/string_util.h | 20 +++++++ src/json2pb/json_to_pb.cpp | 20 ++++--- src/json2pb/pb_to_json.cpp | 16 +++--- src/json2pb/pb_to_json.h | 4 ++ src/json2pb/protobuf_map.cpp | 4 +- src/json2pb/protobuf_type_resolver.h | 5 +- src/mcpack2pb/generator.cpp | 66 +++++++++++----------- 20 files changed, 184 insertions(+), 122 deletions(-) diff --git a/src/brpc/amf.cpp b/src/brpc/amf.cpp index 023eaa9b91..b251ccf236 100644 --- a/src/brpc/amf.cpp +++ b/src/brpc/amf.cpp @@ -1000,7 +1000,7 @@ void WriteAMFObject(const google::protobuf::Message& message, continue; } } - const std::string& name = field->name(); + const auto& name = field->name(); if (name.size() >= 65536u) { LOG(ERROR) << "name is too long!"; return stream->set_bad(); diff --git a/src/brpc/builtin/protobufs_service.cpp b/src/brpc/builtin/protobufs_service.cpp index 2344e46134..99a3fea37d 100644 --- a/src/brpc/builtin/protobufs_service.cpp +++ b/src/brpc/builtin/protobufs_service.cpp @@ -17,13 +17,16 @@ #include // ServiceDescriptor + +#include "brpc/builtin/protobufs_service.h" + #include "brpc/controller.h" // Controller #include "brpc/server.h" // Server #include "brpc/closure_guard.h" // ClosureGuard #include "brpc/details/method_status.h"// MethodStatus -#include "brpc/builtin/protobufs_service.h" #include "brpc/builtin/common.h" +#include "butil/strings/string_util.h" namespace brpc { @@ -42,7 +45,7 @@ int ProtobufsService::Init() { } const google::protobuf::ServiceDescriptor* d = iter->second.service->GetDescriptor(); - _map[d->full_name()] = d->DebugString(); + _map[butil::EnsureString(d->full_name())] = d->DebugString(); const int method_count = d->method_count(); for (int j = 0; j < method_count; ++j) { const google::protobuf::MethodDescriptor* md = d->method(j); @@ -53,13 +56,13 @@ int ProtobufsService::Init() { while (!stack.empty()) { const google::protobuf::Descriptor* d = stack.back(); stack.pop_back(); - _map[d->full_name()] = d->DebugString(); + _map[butil::EnsureString(d->full_name())] = d->DebugString(); for (int i = 0; i < d->field_count(); ++i) { const google::protobuf::FieldDescriptor* f = d->field(i); if (f->type() == google::protobuf::FieldDescriptor::TYPE_MESSAGE || f->type() == google::protobuf::FieldDescriptor::TYPE_GROUP) { const google::protobuf::Descriptor* sub_d = f->message_type(); - if (sub_d != d && _map.find(sub_d->full_name()) == _map.end()) { + if (sub_d != d && _map.find(butil::EnsureString(sub_d->full_name())) == _map.end()) { stack.push_back(sub_d); } } diff --git a/src/brpc/channel.cpp b/src/brpc/channel.cpp index 0252e97d74..0fd43d7c9c 100644 --- a/src/brpc/channel.cpp +++ b/src/brpc/channel.cpp @@ -492,17 +492,17 @@ void Channel::CallMethod(const google::protobuf::MethodDescriptor* method, if (cntl->_sender == NULL && IsTraceable(Span::tls_parent())) { const int64_t start_send_us = butil::cpuwide_time_us(); - const std::string* method_name = NULL; + std::string method_name; if (_get_method_name) { - method_name = &_get_method_name(method, cntl); + method_name = butil::EnsureString(_get_method_name(method, cntl)); } else if (method) { - method_name = &method->full_name(); + method_name = butil::EnsureString(method->full_name()); } else { const static std::string NULL_METHOD_STR = "null-method"; - method_name = &NULL_METHOD_STR; + method_name = NULL_METHOD_STR; } Span* span = Span::CreateClientSpan( - *method_name, start_send_real_us - start_send_us); + method_name, start_send_real_us - start_send_us); span->set_log_id(cntl->log_id()); span->set_base_cid(correlation_id); span->set_protocol(_options.protocol); diff --git a/src/brpc/nshead_pb_service_adaptor.cpp b/src/brpc/nshead_pb_service_adaptor.cpp index ed3552f5a1..889f957540 100644 --- a/src/brpc/nshead_pb_service_adaptor.cpp +++ b/src/brpc/nshead_pb_service_adaptor.cpp @@ -19,8 +19,7 @@ #include // MethodDescriptor #include // Message -#include "butil/time.h" -#include "butil/iobuf.h" // butil::IOBuf +#include "brpc/nshead_pb_service_adaptor.h" #include "brpc/controller.h" // Controller #include "brpc/socket.h" // Socket @@ -28,9 +27,12 @@ #include "brpc/span.h" #include "brpc/details/server_private_accessor.h" #include "brpc/details/controller_private_accessor.h" -#include "brpc/nshead_pb_service_adaptor.h" #include "brpc/policy/most_common_message.h" +#include "butil/iobuf.h" // butil::IOBuf +#include "butil/strings/string_util.h" +#include "butil/time.h" + namespace brpc { @@ -126,7 +128,7 @@ void NsheadPbServiceAdaptor::ProcessNsheadRequest( google::protobuf::Service* svc = sp->service; const google::protobuf::MethodDescriptor* method = sp->method; ControllerPrivateAccessor(controller).set_method(method); - done->SetMethodName(method->full_name()); + done->SetMethodName(butil::EnsureString(method->full_name())); pbdone->pbreq.reset(svc->GetRequestPrototype(method).New()); pbdone->pbres.reset(svc->GetResponsePrototype(method).New()); diff --git a/src/brpc/policy/baidu_rpc_protocol.cpp b/src/brpc/policy/baidu_rpc_protocol.cpp index 5adf77b2c5..0dba01624a 100644 --- a/src/brpc/policy/baidu_rpc_protocol.cpp +++ b/src/brpc/policy/baidu_rpc_protocol.cpp @@ -21,10 +21,13 @@ #include #include #include -#include "butil/logging.h" // LOG() + #include "butil/iobuf.h" // butil::IOBuf -#include "butil/raw_pack.h" // RawPacker RawUnpacker +#include "butil/logging.h" // LOG() #include "butil/memory/scope_guard.h" +#include "butil/raw_pack.h" // RawPacker RawUnpacker +#include "butil/strings/string_util.h" + #include "json2pb/json_to_pb.h" #include "json2pb/pb_to_json.h" #include "brpc/controller.h" // Controller @@ -233,7 +236,7 @@ static bool SerializeResponse(const google::protobuf::Message& res, cntl.SetFailed(ERESPONSE, "Fail to serialize response=%s, " "ContentType=%s, CompressType=%s, ChecksumType=%s", - res.GetDescriptor()->full_name().c_str(), + butil::EnsureString(res.GetDescriptor()->full_name()).c_str(), ContentTypeToCStr(content_type), CompressTypeToCStr(compress_type), ChecksumTypeToCStr(checksum_type)); @@ -775,7 +778,7 @@ void ProcessRpcRequest(InputMessageBase* msg_base) { cntl->SetFailed( ELIMIT, "Rejected by %s's ConcurrencyLimiter, concurrency=%d", - mp->method->full_name().c_str(), rejected_cc); + butil::EnsureString(mp->method->full_name()).c_str(), rejected_cc); break; } } @@ -784,7 +787,7 @@ void ProcessRpcRequest(InputMessageBase* msg_base) { accessor.set_method(method); if (span) { - span->ResetServerSpanName(method->full_name()); + span->ResetServerSpanName(butil::EnsureString(method->full_name())); } if (!server->AcceptRequest(cntl.get())) { @@ -812,7 +815,7 @@ void ProcessRpcRequest(InputMessageBase* msg_base) { EREQUEST, "Fail to parse request=%s, ContentType=%s, " "CompressType=%s, ChecksumType=%s, request_size=%d", - messages->Request()->GetDescriptor()->full_name().c_str(), + butil::EnsureString(messages->Request()->GetDescriptor()->full_name()).c_str(), ContentTypeToCStr(content_type), CompressTypeToCStr(compress_type), ChecksumTypeToCStr(checksum_type), req_size); @@ -996,7 +999,7 @@ void ProcessRpcResponse(InputMessageBase* msg_base) { EREQUEST, "Fail to parse response=%s, ContentType=%s, " "CompressType=%s, ChecksumType=%s, request_size=%d", - cntl->response()->GetDescriptor()->full_name().c_str(), + butil::EnsureString(cntl->response()->GetDescriptor()->full_name()).c_str(), ContentTypeToCStr(content_type), CompressTypeToCStr(compress_type), ChecksumTypeToCStr(checksum_type), res_size); @@ -1033,7 +1036,7 @@ void SerializeRpcRequest(butil::IOBuf* request_buf, Controller* cntl, EREQUEST, "Fail to compress request=%s, " "ContentType=%s, CompressType=%s, ChecksumType=%s", - request->GetDescriptor()->full_name().c_str(), + butil::EnsureString(request->GetDescriptor()->full_name()).c_str(), ContentTypeToCStr(content_type), CompressTypeToCStr(compress_type), ChecksumTypeToCStr(checksum_type)); } diff --git a/src/brpc/policy/http_rpc_protocol.cpp b/src/brpc/policy/http_rpc_protocol.cpp index 872c2897cc..d0150a63fd 100644 --- a/src/brpc/policy/http_rpc_protocol.cpp +++ b/src/brpc/policy/http_rpc_protocol.cpp @@ -20,12 +20,16 @@ #include #include #include + #include "brpc/policy/http_rpc_protocol.h" -#include "butil/unique_ptr.h" // std::unique_ptr -#include "butil/string_splitter.h" // StringMultiSplitter + #include "butil/string_printf.h" -#include "butil/time.h" +#include "butil/string_splitter.h" // StringMultiSplitter +#include "butil/strings/string_util.h" #include "butil/sys_byteorder.h" +#include "butil/time.h" +#include "butil/unique_ptr.h" // std::unique_ptr + #include "json2pb/pb_to_json.h" // ProtoMessageToJson #include "json2pb/json_to_pb.h" // JsonToProtoMessage #include "brpc/compress.h" @@ -284,7 +288,7 @@ static bool JsonToProtoMessage(const butil::IOBuf& body, bool ok = json2pb::JsonToProtoMessage(&wrapper, message, options, &error); if (!ok) { cntl->SetFailed(error_code, "Fail to parse http json body as %s: %s", - message->GetDescriptor()->full_name().c_str(), + butil::EnsureString(message->GetDescriptor()->full_name()).c_str(), error.c_str()); } return ok; @@ -305,7 +309,7 @@ static bool ProtoMessageToJson(const google::protobuf::Message& message, bool ok = json2pb::ProtoMessageToJson(message, wrapper, options, &error); if (!ok) { cntl->SetFailed(error_code, "Fail to convert %s to json: %s", - message.GetDescriptor()->full_name().c_str(), + butil::EnsureString(message.GetDescriptor()->full_name()).c_str(), error.c_str()); } return ok; @@ -321,7 +325,7 @@ static bool ProtoJsonToProtoMessage(const butil::IOBuf& body, bool ok = json2pb::ProtoJsonToProtoMessage(&wrapper, message, options, &error); if (!ok) { cntl->SetFailed(error_code, "Fail to parse http proto-json body as %s: %s", - message->GetDescriptor()->full_name().c_str(), + butil::EnsureString(message->GetDescriptor()->full_name()).c_str(), error.c_str()); } return ok; @@ -337,7 +341,7 @@ static bool ProtoMessageToProtoJson(const google::protobuf::Message& message, bool ok = json2pb::ProtoMessageToProtoJson(message, wrapper, options, &error); if (!ok) { cntl->SetFailed(error_code, "Fail to convert %s to proto-json: %s", - message.GetDescriptor()->full_name().c_str(), error.c_str()); + butil::EnsureString(message.GetDescriptor()->full_name()).c_str(), error.c_str()); } return ok; } @@ -527,13 +531,13 @@ void ProcessHttpResponse(InputMessageBase* msg) { if (content_type == HTTP_CONTENT_PROTO) { if (!ParsePbFromIOBuf(cntl->response(), res_body)) { cntl->SetFailed(ERESPONSE, "Fail to parse content as %s", - cntl->response()->GetDescriptor()->full_name().c_str()); + butil::EnsureString(cntl->response()->GetDescriptor()->full_name()).c_str()); break; } } else if (content_type == HTTP_CONTENT_PROTO_TEXT) { if (!ParsePbTextFromIOBuf(cntl->response(), res_body)) { cntl->SetFailed(ERESPONSE, "Fail to parse proto-text content as %s", - cntl->response()->GetDescriptor()->full_name().c_str()); + butil::EnsureString(cntl->response()->GetDescriptor()->full_name()).c_str()); break; } } else if (content_type == HTTP_CONTENT_JSON) { @@ -612,13 +616,13 @@ void SerializeHttpRequest(butil::IOBuf* /*not used*/, if (!pbreq->SerializeToZeroCopyStream(&wrapper)) { cntl->request_attachment().clear(); return cntl->SetFailed(EREQUEST, "Fail to serialize %s", - pbreq->GetTypeName().c_str()); + butil::EnsureString(pbreq->GetTypeName()).c_str()); } } else if (content_type == HTTP_CONTENT_PROTO_TEXT) { if (!google::protobuf::TextFormat::Print(*pbreq, &wrapper)) { cntl->request_attachment().clear(); return cntl->SetFailed(EREQUEST, "Fail to print %s as proto-text", - pbreq->GetTypeName().c_str()); + butil::EnsureString(pbreq->GetTypeName()).c_str()); } } else if (content_type == HTTP_CONTENT_PROTO_JSON) { if (!ProtoMessageToProtoJson(*pbreq, &wrapper, cntl, EREQUEST)) { @@ -880,11 +884,13 @@ HttpResponseSender::~HttpResponseSender() { butil::IOBufAsZeroCopyOutputStream wrapper(&cntl->response_attachment()); if (content_type == HTTP_CONTENT_PROTO) { if (!res->SerializeToZeroCopyStream(&wrapper)) { - cntl->SetFailed(ERESPONSE, "Fail to serialize %s", res->GetTypeName().c_str()); + cntl->SetFailed(ERESPONSE, "Fail to serialize %s", + butil::EnsureString(res->GetTypeName()).c_str()); } } else if (content_type == HTTP_CONTENT_PROTO_TEXT) { if (!google::protobuf::TextFormat::Print(*res, &wrapper)) { - cntl->SetFailed(ERESPONSE, "Fail to print %s as proto-text", res->GetTypeName().c_str()); + cntl->SetFailed(ERESPONSE, "Fail to print %s as proto-text", + butil::EnsureString(res->GetTypeName()).c_str()); } } else if (content_type == HTTP_CONTENT_PROTO_JSON) { ProtoMessageToProtoJson(*res, &wrapper, cntl, ERESPONSE); @@ -1535,7 +1541,7 @@ void ProcessHttpRequest(InputMessageBase *msg) { cntl->request_attachment().swap(req_body); google::protobuf::Closure* done = new HttpResponseSenderAsDone(&resp_sender); if (span) { - span->ResetServerSpanName(md->full_name()); + span->ResetServerSpanName(butil::EnsureString(md->full_name())); span->set_start_callback_us(butil::cpuwide_time_us()); span->AsParent(); } @@ -1565,18 +1571,19 @@ void ProcessHttpRequest(InputMessageBase *msg) { // Switch to service-specific error. non_service_error.release(); MethodStatus* method_status = mp->status; + const std::string method_full_name = butil::EnsureString(mp->method->full_name()); resp_sender.set_method_status(method_status); if (method_status) { int rejected_cc = 0; if (!method_status->OnRequested(&rejected_cc)) { cntl->SetFailed(ELIMIT, "Rejected by %s's ConcurrencyLimiter, concurrency=%d", - mp->method->full_name().c_str(), rejected_cc); + method_full_name.c_str(), rejected_cc); return; } } if (span) { - span->ResetServerSpanName(mp->method->full_name()); + span->ResetServerSpanName(method_full_name); } // NOTE: accesses to builtin services are not counted as part of // concurrency, therefore are not limited by ServerOptions.max_concurrency. @@ -1616,6 +1623,8 @@ void ProcessHttpRequest(InputMessageBase *msg) { google::protobuf::Message* req = messages->Request(); google::protobuf::Message* res = messages->Response(); + const std::string request_full_name = butil::EnsureString(req->GetDescriptor()->full_name()); + if (__builtin_expect(!req || !res, 0)) { PLOG(FATAL) << "Fail to new req or res"; cntl->SetFailed("Fail to new req or res"); @@ -1632,7 +1641,7 @@ void ProcessHttpRequest(InputMessageBase *msg) { if (!req->IsInitialized()) { cntl->SetFailed(EREQUEST, "%s needs to be created from a" " non-empty json, it has required fields.", - req->GetDescriptor()->full_name().c_str()); + request_full_name.c_str()); return; } // else all fields of the request are optional. } else { @@ -1677,13 +1686,13 @@ void ProcessHttpRequest(InputMessageBase *msg) { if (content_type == HTTP_CONTENT_PROTO) { if (!ParsePbFromIOBuf(req, req_body)) { cntl->SetFailed(EREQUEST, "Fail to parse http body as %s", - req->GetDescriptor()->full_name().c_str()); + request_full_name.c_str()); return; } } else if (content_type == HTTP_CONTENT_PROTO_TEXT) { if (!ParsePbTextFromIOBuf(req, req_body)) { cntl->SetFailed(EREQUEST, "Fail to parse http proto-text body as %s", - req->GetDescriptor()->full_name().c_str()); + request_full_name.c_str()); return; } } else if (content_type == HTTP_CONTENT_PROTO_JSON) { diff --git a/src/brpc/policy/hulu_pbrpc_protocol.cpp b/src/brpc/policy/hulu_pbrpc_protocol.cpp index 02ec8efcad..bd0c496027 100644 --- a/src/brpc/policy/hulu_pbrpc_protocol.cpp +++ b/src/brpc/policy/hulu_pbrpc_protocol.cpp @@ -20,7 +20,10 @@ #include // Message #include #include + +#include "butil/strings/string_util.h" #include "butil/time.h" + #include "brpc/controller.h" // Controller #include "brpc/socket.h" // Socket #include "brpc/server.h" // Server @@ -469,17 +472,18 @@ void ProcessHuluRequest(InputMessageBase* msg_base) { // Switch to service-specific error. non_service_error.release(); method_status = sp->status; + const google::protobuf::MethodDescriptor* method = sp->method; + const std::string method_full_name = butil::EnsureString(method->full_name()); if (method_status) { int rejected_cc = 0; if (!method_status->OnRequested(&rejected_cc)) { cntl->SetFailed(ELIMIT, "Rejected by %s's ConcurrencyLimiter, concurrency=%d", - sp->method->full_name().c_str(), rejected_cc); + method_full_name.c_str(), rejected_cc); break; } } google::protobuf::Service* svc = sp->service; - const google::protobuf::MethodDescriptor* method = sp->method; accessor.set_method(method); if (!server->AcceptRequest(cntl.get())) { @@ -487,7 +491,7 @@ void ProcessHuluRequest(InputMessageBase* msg_base) { } if (span) { - span->ResetServerSpanName(method->full_name()); + span->ResetServerSpanName(method_full_name); } const int reqsize = msg->payload.length(); butil::IOBuf req_buf; diff --git a/src/brpc/policy/mongo_protocol.cpp b/src/brpc/policy/mongo_protocol.cpp index 82bb3e0b36..5df304e1bc 100644 --- a/src/brpc/policy/mongo_protocol.cpp +++ b/src/brpc/policy/mongo_protocol.cpp @@ -18,8 +18,11 @@ #include // MethodDescriptor #include // Message #include -#include "butil/time.h" + #include "butil/iobuf.h" // butil::IOBuf +#include "butil/strings/string_util.h" +#include "butil/time.h" + #include "brpc/controller.h" // Controller #include "brpc/socket.h" // Socket #include "brpc/server.h" // Server @@ -249,7 +252,7 @@ void ProcessMongoRequest(InputMessageBase* msg_base) { if (!method_status->OnRequested(&rejected_cc)) { mongo_done->cntl.SetFailed( ELIMIT, "Rejected by %s's ConcurrencyLimiter, concurrency=%d", - mp->method->full_name().c_str(), rejected_cc); + butil::EnsureString(mp->method->full_name()).c_str(), rejected_cc); break; } } diff --git a/src/brpc/policy/nshead_mcpack_protocol.cpp b/src/brpc/policy/nshead_mcpack_protocol.cpp index 40d38836c6..052fd0f3b7 100644 --- a/src/brpc/policy/nshead_mcpack_protocol.cpp +++ b/src/brpc/policy/nshead_mcpack_protocol.cpp @@ -20,8 +20,9 @@ #include // Message #include -#include "butil/time.h" #include "butil/iobuf.h" // butil::IOBuf +#include "butil/strings/string_util.h" +#include "butil/time.h" #include "brpc/controller.h" // Controller #include "brpc/socket.h" // Socket @@ -49,7 +50,7 @@ void NsheadMcpackAdaptor::ParseNsheadMeta( const google::protobuf::ServiceDescriptor* sd = service->GetDescriptor(); if (sd->method_count() == 0) { cntl->SetFailed(ENOMETHOD, "No method in service=%s", - sd->full_name().c_str()); + butil::EnsureString(sd->full_name()).c_str()); return; } const google::protobuf::MethodDescriptor* method = sd->method(0); @@ -59,7 +60,7 @@ void NsheadMcpackAdaptor::ParseNsheadMeta( void NsheadMcpackAdaptor::ParseRequestFromIOBuf( const NsheadMeta&, const NsheadMessage& raw_req, Controller* cntl, google::protobuf::Message* pb_req) const { - const std::string& msg_name = pb_req->GetDescriptor()->full_name(); + const std::string msg_name = butil::EnsureString(pb_req->GetDescriptor()->full_name()); mcpack2pb::MessageHandler handler = mcpack2pb::find_message_handler(msg_name); if (!handler.parse_from_iobuf(pb_req, raw_req.body)) { cntl->SetFailed(EREQUEST, "Fail to parse request message, " @@ -86,7 +87,7 @@ void NsheadMcpackAdaptor::SerializeResponseToIOBuf( return; } - const std::string& msg_name = pb_res->GetDescriptor()->full_name(); + const std::string msg_name = butil::EnsureString(pb_res->GetDescriptor()->full_name()); mcpack2pb::MessageHandler handler = mcpack2pb::find_message_handler(msg_name); if (!handler.serialize_to_iobuf(*pb_res, &raw_res->body, ::mcpack2pb::FORMAT_MCPACK_V2)) { @@ -124,7 +125,7 @@ void ProcessNsheadMcpackResponse(InputMessageBase* msg_base) { // silently ignore response. return; } - const std::string& msg_name = res->GetDescriptor()->full_name(); + const std::string msg_name = butil::EnsureString(res->GetDescriptor()->full_name()); mcpack2pb::MessageHandler handler = mcpack2pb::find_message_handler(msg_name); if (!handler.parse_from_iobuf(res, msg->payload)) { return cntl->CloseConnection("Fail to parse response message"); @@ -143,7 +144,7 @@ void SerializeNsheadMcpackRequest(butil::IOBuf* buf, Controller* cntl, "nshead_mcpack protocol doesn't support compression"); return; } - const std::string& msg_name = pb_req->GetDescriptor()->full_name(); + const std::string msg_name = butil::EnsureString(pb_req->GetDescriptor()->full_name()); mcpack2pb::MessageHandler handler = mcpack2pb::find_message_handler(msg_name); if (!handler.serialize_to_iobuf(*pb_req, buf, ::mcpack2pb::FORMAT_MCPACK_V2)) { cntl->SetFailed(EREQUEST, "Fail to serialize %s", msg_name.c_str()); diff --git a/src/brpc/policy/sofa_pbrpc_protocol.cpp b/src/brpc/policy/sofa_pbrpc_protocol.cpp index 9ee772dcff..2fb33ed578 100644 --- a/src/brpc/policy/sofa_pbrpc_protocol.cpp +++ b/src/brpc/policy/sofa_pbrpc_protocol.cpp @@ -20,7 +20,10 @@ #include // Message #include #include + #include "butil/time.h" +#include "butil/strings/string_util.h" + #include "brpc/controller.h" // Controller #include "brpc/socket.h" // Socket #include "brpc/server.h" // Server @@ -424,7 +427,7 @@ void ProcessSofaRequest(InputMessageBase* msg_base) { int rejected_cc = 0; if (!method_status->OnRequested(&rejected_cc)) { cntl->SetFailed(ELIMIT, "Rejected by %s's ConcurrencyLimiter, concurrency=%d", - sp->method->full_name().c_str(), rejected_cc); + butil::EnsureString(sp->method->full_name()).c_str(), rejected_cc); break; } } @@ -437,7 +440,7 @@ void ProcessSofaRequest(InputMessageBase* msg_base) { } if (span) { - span->ResetServerSpanName(method->full_name()); + span->ResetServerSpanName(butil::EnsureString(method->full_name())); } req.reset(svc->GetRequestPrototype(method).New()); if (!ParseFromCompressedData(msg->payload, req.get(), req_cmp_type)) { diff --git a/src/brpc/policy/ubrpc2pb_protocol.cpp b/src/brpc/policy/ubrpc2pb_protocol.cpp index 312ec5d92a..fe2c4619cb 100644 --- a/src/brpc/policy/ubrpc2pb_protocol.cpp +++ b/src/brpc/policy/ubrpc2pb_protocol.cpp @@ -20,8 +20,10 @@ #include // Message #include -#include "butil/time.h" #include "butil/iobuf.h" // butil::IOBuf +#include "butil/strings/string_util.h" +#include "butil/time.h" + #include "brpc/controller.h" // Controller #include "brpc/socket.h" // Socket #include "brpc/server.h" // Server @@ -169,7 +171,7 @@ void UbrpcAdaptor::ParseNsheadMeta( void UbrpcAdaptor::ParseRequestFromIOBuf( const NsheadMeta&, const NsheadMessage& raw_req, Controller* cntl, google::protobuf::Message* pb_req) const { - const std::string& msg_name = pb_req->GetDescriptor()->full_name(); + const std::string msg_name = butil::EnsureString(pb_req->GetDescriptor()->full_name()); mcpack2pb::MessageHandler handler = mcpack2pb::find_message_handler(msg_name); if (handler.parse_body == NULL) { return cntl->SetFailed(EREQUEST, "Fail to find parser of %s", @@ -227,7 +229,7 @@ void UbrpcAdaptor::SerializeResponseToIOBuf( // return AppendError(meta, cntl, raw_res->body); // } - const std::string& msg_name = pb_res->GetDescriptor()->full_name(); + const std::string msg_name = butil::EnsureString(pb_res->GetDescriptor()->full_name()); mcpack2pb::MessageHandler handler = mcpack2pb::find_message_handler(msg_name); if (handler.serialize_body == NULL) { cntl->SetFailed(ERESPONSE, "Fail to find serializer of %s", @@ -279,7 +281,7 @@ static void ParseResponse(Controller* cntl, butil::IOBuf& buf, // silently ignore response. return; } - const std::string& msg_name = res->GetDescriptor()->full_name(); + const std::string msg_name = butil::EnsureString(res->GetDescriptor()->full_name()); mcpack2pb::MessageHandler handler = mcpack2pb::find_message_handler(msg_name); if (handler.parse_body == NULL) { return cntl->SetFailed(ERESPONSE, "Fail to find parser of %s", @@ -480,7 +482,7 @@ static void SerializeUbrpcRequest(butil::IOBuf* buf, Controller* cntl, if (cntl->method() == NULL) { return cntl->SetFailed(ENOMETHOD, "method is NULL"); } - const std::string& msg_name = request->GetDescriptor()->full_name(); + const std::string msg_name = butil::EnsureString(request->GetDescriptor()->full_name()); mcpack2pb::MessageHandler handler = mcpack2pb::find_message_handler(msg_name); if (handler.serialize_body == NULL) { return cntl->SetFailed(EREQUEST, "Fail to find serializer of %s", @@ -500,9 +502,9 @@ static void SerializeUbrpcRequest(butil::IOBuf* buf, Controller* cntl, sr.begin_mcpack_array("content", mcpack2pb::FIELD_OBJECT); sr.begin_object(); { - sr.add_string("service_name", cntl->method()->service()->name()); + sr.add_string("service_name", butil::EnsureString(cntl->method()->service()->name())); sr.add_int64("id", cntl->call_id().value); - sr.add_string("method", cntl->method()->name()); + sr.add_string("method", butil::EnsureString(cntl->method()->name())); sr.begin_object("params"); const char* const request_name = cntl->idl_names().request_name; if (request_name != NULL && *request_name) { diff --git a/src/brpc/server.cpp b/src/brpc/server.cpp index a82817be8b..8e2368bcb2 100644 --- a/src/brpc/server.cpp +++ b/src/brpc/server.cpp @@ -30,6 +30,7 @@ #include "butil/time.h" #include "butil/class_name.h" #include "butil/string_printf.h" +#include "butil/strings/string_util.h" #include "butil/debug/leak_annotations.h" #include "brpc/log.h" #include "brpc/compress.h" @@ -411,9 +412,9 @@ void* Server::UpdateDerivedVars(void* arg) { } } -const std::string& Server::ServiceProperty::service_name() const { +const std::string Server::ServiceProperty::service_name() const { if (service) { - return service->GetDescriptor()->full_name(); + return butil::EnsureString(service->GetDescriptor()->full_name()); } else if (restful_map) { return restful_map->service_name(); } @@ -1439,7 +1440,7 @@ int Server::AddServiceInternal(google::protobuf::Service* service, mp.service = service; mp.method = md; mp.status = new MethodStatus; - _method_map[md->full_name()] = mp; + _method_map[butil::EnsureString(md->full_name())] = mp; if (is_idl_support && sd->name() != sd->full_name()/*has ns*/) { MethodProperty mp2 = mp; mp2.own_method_status = false; @@ -1462,8 +1463,8 @@ int Server::AddServiceInternal(google::protobuf::Service* service, const ServiceProperty ss = { is_builtin_service, svc_opt.ownership, service, NULL }; - _fullname_service_map[sd->full_name()] = ss; - _service_map[sd->name()] = ss; + _fullname_service_map[butil::EnsureString(sd->full_name())] = ss; + _service_map[butil::EnsureString(sd->name())] = ss; if (is_builtin_service) { ++_builtin_service_count; } else { @@ -1505,7 +1506,7 @@ int Server::AddServiceInternal(google::protobuf::Service* service, // handling is not affected. for (size_t i = 0; i < mappings.size(); ++i) { const std::string full_method_name = - sd->full_name() + "." + mappings[i].method_name; + butil::EnsureString(sd->full_name()) + "." + mappings[i].method_name; MethodProperty* mp = _method_map.seek(full_method_name); if (mp == NULL) { LOG(ERROR) << "Unknown method=`" << full_method_name << '\''; @@ -1730,9 +1731,9 @@ int Server::RemoveService(google::protobuf::Service* service) { } const google::protobuf::ServiceDescriptor* sd = service->GetDescriptor(); - ServiceProperty* ss = _fullname_service_map.seek(sd->full_name()); + ServiceProperty* ss = _fullname_service_map.seek(butil::EnsureString(sd->full_name())); if (ss == NULL) { - RPC_VLOG << "Fail to find service=" << sd->full_name().c_str(); + RPC_VLOG << "Fail to find service=" << sd->full_name(); return -1; } RemoveMethodsOf(service); diff --git a/src/brpc/server.h b/src/brpc/server.h index f833325cde..c262375c67 100644 --- a/src/brpc/server.h +++ b/src/brpc/server.h @@ -391,7 +391,7 @@ class Server { return !is_builtin_service && !restful_map; } - const std::string& service_name() const; + const std::string service_name() const; }; typedef butil::FlatMap ServiceMap; diff --git a/src/butil/strings/string_util.h b/src/butil/strings/string_util.h index 4d78e20f01..bd3328a770 100644 --- a/src/butil/strings/string_util.h +++ b/src/butil/strings/string_util.h @@ -11,6 +11,8 @@ #include // va_list #include +#include +#include #include #include "butil/base_export.h" @@ -258,6 +260,24 @@ BUTIL_EXPORT bool IsStringUTF8(const StringPiece& str); BUTIL_EXPORT bool IsStringASCII(const StringPiece& str); BUTIL_EXPORT bool IsStringASCII(const string16& str); +inline std::string EnsureString(const std::string& s) { + return s; +} + +inline std::string EnsureString(std::string&& s) { + return std::move(s); +} + +inline std::string EnsureString(const char* s) { + return s ? std::string(s) : std::string(); +} + +// Enabled only when std::string is constructible from T. +template ::value>::type> +inline std::string EnsureString(T&& v) { + return std::string(std::forward(v)); +} + } // namespace butil #if defined(OS_WIN) diff --git a/src/json2pb/json_to_pb.cpp b/src/json2pb/json_to_pb.cpp index e758bdb3ab..87fd080a15 100644 --- a/src/json2pb/json_to_pb.cpp +++ b/src/json2pb/json_to_pb.cpp @@ -28,6 +28,7 @@ #include #include #include "butil/strings/string_number_conversions.h" +#include "butil/strings/string_util.h" #include "butil/third_party/rapidjson/error/error.h" #include "butil/third_party/rapidjson/rapidjson.h" #include "json2pb/json_to_pb.h" @@ -53,8 +54,9 @@ perr->append(", ", 2); \ } \ butil::string_appendf(perr, fmt, ##__VA_ARGS__); \ - if ((pb) != nullptr) { \ - butil::string_appendf(perr, " [%s]", (pb)->GetDescriptor()->name().c_str()); \ + if ((pb) != nullptr) { \ + butil::string_appendf(perr, " [%s]", \ + butil::EnsureString((pb)->GetDescriptor()->name()).c_str()); \ } \ } else { } @@ -126,7 +128,7 @@ inline bool value_invalid(const google::protobuf::FieldDescriptor* field, const string_append_value(value, err); butil::string_appendf(err, "' for %sfield `%s' which SHOULD be %s", optional ? "optional " : "", - field->full_name().c_str(), type); + butil::EnsureString(field->full_name()).c_str(), type); } if (!optional) { return false; @@ -324,7 +326,7 @@ static bool JsonValueToProtoField(const BUTIL_RAPIDJSON_NAMESPACE::Value& value, int depth) { if (value.IsNull()) { if (field->is_required()) { - J2PERROR(err, "Missing required field: %s", field->full_name().c_str()); + J2PERROR(err, "Missing required field: %s", butil::EnsureString(field->full_name()).c_str()); return false; } return true; @@ -333,7 +335,7 @@ static bool JsonValueToProtoField(const BUTIL_RAPIDJSON_NAMESPACE::Value& value, if (field->is_repeated()) { if (!value.IsArray()) { J2PERROR(err, "Invalid value for repeated field: %s", - field->full_name().c_str()); + butil::EnsureString(field->full_name()).c_str()); return false; } } @@ -506,7 +508,7 @@ bool JsonMapToProtoMap(const BUTIL_RAPIDJSON_NAMESPACE::Value& value, int depth) { if (!value.IsObject()) { J2PERROR(err, "Non-object value for map field: %s", - map_desc->full_name().c_str()); + butil::EnsureString(map_desc->full_name()).c_str()); return false; } @@ -584,7 +586,7 @@ bool JsonValueToProtoMessage(const BUTIL_RAPIDJSON_NAMESPACE::Value& json_value, for (size_t i = 0; i < fields.size(); ++i) { const google::protobuf::FieldDescriptor* field = fields[i]; - const std::string& orig_name = field->name(); + const std::string orig_name = butil::EnsureString(field->name()); bool res = decode_name(orig_name, field_name_str_temp); const std::string& field_name_str = (res ? field_name_str_temp : orig_name); @@ -593,7 +595,7 @@ bool JsonValueToProtoMessage(const BUTIL_RAPIDJSON_NAMESPACE::Value& json_value, json_value.FindMember(field_name_str.data()); if (member == json_value.MemberEnd()) { if (field->is_required()) { - J2PERROR(err, "Missing required field: %s", field->full_name().c_str()); + J2PERROR(err, "Missing required field: %s", butil::EnsureString(field->full_name()).c_str()); return false; } continue; @@ -604,7 +606,7 @@ bool JsonValueToProtoMessage(const BUTIL_RAPIDJSON_NAMESPACE::Value& json_value, json_value.FindMember(field_name_str.data()); if (member == NULL) { if (field->is_required()) { - J2PERROR(err, "Missing required field: %s", field->full_name().c_str()); + J2PERROR(err, "Missing required field: %s", butil::EnsureString(field->full_name()).c_str()); return false; } continue; diff --git a/src/json2pb/pb_to_json.cpp b/src/json2pb/pb_to_json.cpp index f232226785..b9d01c2a94 100644 --- a/src/json2pb/pb_to_json.cpp +++ b/src/json2pb/pb_to_json.cpp @@ -30,8 +30,10 @@ #include "json2pb/rapidjson.h" #include "json2pb/pb_to_json.h" #include "json2pb/protobuf_type_resolver.h" -#include "butil/iobuf.h" + #include "butil/base64.h" +#include "butil/iobuf.h" +#include "butil/strings/string_util.h" namespace json2pb { @@ -172,7 +174,7 @@ bool PbToJsonConverter::Convert(const google::protobuf::Message& message, Handle if (!field->is_repeated() && !reflection->HasField(message, field)) { // Field that has not been set if (field->is_required()) { - _error = "Missing required field: " + field->full_name(); + _error = "Missing required field: " + butil::EnsureString(field->full_name()); return false; } // Whether dumps default fields @@ -186,7 +188,7 @@ bool PbToJsonConverter::Convert(const google::protobuf::Message& message, Handle continue; } - const std::string& orig_name = field->name(); + const std::string orig_name = butil::EnsureString(field->name()); bool decoded = decode_name(orig_name, field_name_str); const std::string& name = decoded ? field_name_str : orig_name; handler.Key(name.data(), name.size(), false); @@ -205,7 +207,7 @@ bool PbToJsonConverter::Convert(const google::protobuf::Message& message, Handle // Write a json object corresponding to hold protobuf map // such as {"key": value, ...} - const std::string& orig_name = map_desc->name(); + const std::string orig_name = butil::EnsureString(map_desc->name()); bool decoded = decode_name(orig_name, field_name_str); const std::string& name = decoded ? field_name_str : orig_name; handler.Key(name.data(), name.size(), false); @@ -306,8 +308,8 @@ bool PbToJsonConverter::_PbFieldToJson( handler.StartArray(); if (_option.enum_option == OUTPUT_ENUM_BY_NAME) { for (int index = 0; index < field_size; ++index) { - const std::string& enum_name = reflection->GetRepeatedEnum( - message, field, index)->name(); + const std::string enum_name = butil::EnsureString(reflection->GetRepeatedEnum( + message, field, index)->name()); handler.String(enum_name.data(), enum_name.size(), false); } } else { @@ -321,7 +323,7 @@ bool PbToJsonConverter::_PbFieldToJson( } else { if (_option.enum_option == OUTPUT_ENUM_BY_NAME) { const std::string& enum_name = - reflection->GetEnum(message, field)->name(); + butil::EnsureString(reflection->GetEnum(message, field)->name()); handler.String(enum_name.data(), enum_name.size(), false); } else { handler.AddInt(reflection->GetEnum(message, field)->number()); diff --git a/src/json2pb/pb_to_json.h b/src/json2pb/pb_to_json.h index 8de635170e..4dda3a76a1 100644 --- a/src/json2pb/pb_to_json.h +++ b/src/json2pb/pb_to_json.h @@ -93,7 +93,11 @@ bool ProtoMessageToJson(const google::protobuf::Message& message, std::string* error = NULL); // See for details. +#if GOOGLE_PROTOBUF_VERSION >= 6030000 +using Pb2ProtoJsonOptions = google::protobuf::util::JsonPrintOptions; +#else using Pb2ProtoJsonOptions = google::protobuf::util::JsonOptions; +#endif #if GOOGLE_PROTOBUF_VERSION >= 5026002 #define AlwaysPrintPrimitiveFields(options) options.always_print_fields_with_no_presence diff --git a/src/json2pb/protobuf_map.cpp b/src/json2pb/protobuf_map.cpp index f552bf6279..7553523482 100644 --- a/src/json2pb/protobuf_map.cpp +++ b/src/json2pb/protobuf_map.cpp @@ -38,12 +38,12 @@ bool IsProtobufMap(const FieldDescriptor* field) { if (NULL == key_desc || key_desc->is_repeated() || key_desc->cpp_type() != FieldDescriptor::CPPTYPE_STRING - || strcmp(KEY_NAME, key_desc->name().c_str()) != 0) { + || key_desc->name() != KEY_NAME) { return false; } const FieldDescriptor* value_desc = entry_desc->field(VALUE_INDEX); if (NULL == value_desc - || strcmp(VALUE_NAME, value_desc->name().c_str()) != 0) { + || value_desc->name() != VALUE_NAME) { return false; } return true; diff --git a/src/json2pb/protobuf_type_resolver.h b/src/json2pb/protobuf_type_resolver.h index a73a42315e..7eff6c61e8 100644 --- a/src/json2pb/protobuf_type_resolver.h +++ b/src/json2pb/protobuf_type_resolver.h @@ -23,8 +23,9 @@ #include #include #include -#include "butil/string_printf.h" #include "butil/memory/singleton_on_pthread_once.h" +#include "butil/string_printf.h" +#include "butil/strings/string_util.h" namespace json2pb { @@ -32,7 +33,7 @@ namespace json2pb { inline std::string GetTypeUrl(const google::protobuf::Message& message) { return butil::string_printf(PROTOBUF_TYPE_URL_PREFIX"/%s", - message.GetDescriptor()->full_name().c_str()); + butil::EnsureString(message.GetDescriptor()->full_name()).c_str()); } // unique_ptr deleter for TypeResolver only deletes the object diff --git a/src/mcpack2pb/generator.cpp b/src/mcpack2pb/generator.cpp index 0fb789360c..fe47fb33fa 100644 --- a/src/mcpack2pb/generator.cpp +++ b/src/mcpack2pb/generator.cpp @@ -24,16 +24,18 @@ #include #include #include -#include "butil/string_printf.h" + #include "butil/file_util.h" -#include "mcpack2pb/mcpack2pb.h" +#include "butil/string_printf.h" +#include "butil/strings/string_util.h" #include "idl_options.pb.h" +#include "mcpack2pb/mcpack2pb.h" namespace mcpack2pb { -const std::string& get_idl_name(const google::protobuf::FieldDescriptor* f) { - const std::string& real_name = f->options().GetExtension(idl_name); - return real_name.empty() ? f->name() : real_name; +const std::string get_idl_name(const google::protobuf::FieldDescriptor* f) { + const std::string real_name = butil::EnsureString(f->options().GetExtension(idl_name)); + return real_name.empty() ? butil::EnsureString(f->name()) : real_name; } bool is_integral_type(ConvertibleIdlType type) { @@ -52,7 +54,7 @@ bool is_integral_type(ConvertibleIdlType type) { } } -const char* field_to_string(const google::protobuf::FieldDescriptor* f) { +const std::string field_to_string(const google::protobuf::FieldDescriptor* f) { switch (f->type()) { case google::protobuf::FieldDescriptor::TYPE_DOUBLE: return "double"; case google::protobuf::FieldDescriptor::TYPE_FLOAT: return "float"; @@ -65,11 +67,11 @@ const char* field_to_string(const google::protobuf::FieldDescriptor* f) { case google::protobuf::FieldDescriptor::TYPE_STRING: return "string"; case google::protobuf::FieldDescriptor::TYPE_GROUP: case google::protobuf::FieldDescriptor::TYPE_MESSAGE: - return f->message_type()->name().c_str(); + return butil::EnsureString(f->message_type()->name()); case google::protobuf::FieldDescriptor::TYPE_BYTES: return "bytes"; case google::protobuf::FieldDescriptor::TYPE_UINT32: return "uint32"; case google::protobuf::FieldDescriptor::TYPE_ENUM: - return f->enum_type()->name().c_str(); + return butil::EnsureString(f->enum_type()->name()); case google::protobuf::FieldDescriptor::TYPE_SFIXED32: return "sfixed32"; case google::protobuf::FieldDescriptor::TYPE_SFIXED64: return "sfixed64"; case google::protobuf::FieldDescriptor::TYPE_SINT32: return "sint32"; @@ -269,8 +271,8 @@ static bool generate_parsing(const google::protobuf::Descriptor* d, std::set & ref_msgs, std::set & ref_maps, google::protobuf::io::Printer& impl) { - std::string var_name = mcpack2pb::to_var_name(d->full_name()); - std::string cpp_name = mcpack2pb::to_cpp_name(d->full_name()); + std::string var_name = mcpack2pb::to_var_name(butil::EnsureString(d->full_name())); + std::string cpp_name = mcpack2pb::to_cpp_name(butil::EnsureString(d->full_name())); ref_msgs.insert(var_name); impl.Print("\n// $msg$ from mcpack\n", "msg", d->full_name()); @@ -338,7 +340,7 @@ static bool generate_parsing(const google::protobuf::Descriptor* d, " return false;\n" "}\n" , "msg", cpp_name - , "enum", to_cpp_name(f->enum_type()->full_name()) + , "enum", to_cpp_name(butil::EnsureString(f->enum_type()->full_name())) , "lcfield", f->lowercase_name()); break; case google::protobuf::FieldDescriptor::CPPTYPE_FLOAT: @@ -380,8 +382,8 @@ static bool generate_parsing(const google::protobuf::Descriptor* d, , "lcfield", f->lowercase_name()); break; case google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE: { - std::string var_name2 = mcpack2pb::to_var_name(f->message_type()->full_name()); - std::string cpp_name2 = mcpack2pb::to_cpp_name(f->message_type()->full_name()); + std::string var_name2 = mcpack2pb::to_var_name(butil::EnsureString(f->message_type()->full_name())); + std::string cpp_name2 = mcpack2pb::to_cpp_name(butil::EnsureString(f->message_type()->full_name())); if (is_map_entry(f->message_type())) { ref_maps.insert(var_name2); impl.Print( @@ -544,7 +546,7 @@ static bool generate_parsing(const google::protobuf::Descriptor* d, " return value.stream()->good();\n" "}\n" , "msg", cpp_name - , "enum", to_cpp_name(f->enum_type()->full_name()) + , "enum", to_cpp_name(butil::EnsureString(f->enum_type()->full_name())) , "lcfield", f->lowercase_name()); break; case google::protobuf::FieldDescriptor::CPPTYPE_STRING: @@ -566,7 +568,7 @@ static bool generate_parsing(const google::protobuf::Descriptor* d, , "lcfield", f->lowercase_name()); break; case google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE: { - std::string var_name2 = mcpack2pb::to_var_name(f->message_type()->full_name()); + std::string var_name2 = mcpack2pb::to_var_name(butil::EnsureString(f->message_type()->full_name())); ref_msgs.insert(var_name2); impl.Print( "{\n" @@ -858,8 +860,8 @@ static bool generate_serializing(const google::protobuf::Descriptor* d, std::set & ref_msgs, std::set & ref_maps, google::protobuf::io::Printer & impl) { - std::string var_name = mcpack2pb::to_var_name(d->full_name()); - std::string cpp_name = mcpack2pb::to_cpp_name(d->full_name()); + std::string var_name = mcpack2pb::to_var_name(butil::EnsureString(d->full_name())); + std::string cpp_name = mcpack2pb::to_cpp_name(butil::EnsureString(d->full_name())); ref_msgs.insert(var_name); impl.Print( "void serialize_$vmsg$_body(\n" @@ -946,8 +948,8 @@ static bool generate_serializing(const google::protobuf::Descriptor* d, return false; } const google::protobuf::Descriptor* msg2 = f->message_type(); - std::string var_name2 = mcpack2pb::to_var_name(msg2->full_name()); - std::string cpp_name2 = mcpack2pb::to_cpp_name(msg2->full_name()); + std::string var_name2 = mcpack2pb::to_var_name(butil::EnsureString(msg2->full_name())); + std::string cpp_name2 = mcpack2pb::to_cpp_name(butil::EnsureString(msg2->full_name())); if (is_map_entry(msg2)) { ref_maps.insert(var_name2); impl.Print( @@ -988,7 +990,7 @@ static bool generate_serializing(const google::protobuf::Descriptor* d, break; case google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE: { std::string var_name3 = mcpack2pb::to_var_name( - value_desc->message_type()->full_name()); + butil::EnsureString(value_desc->message_type()->full_name())); ref_msgs.insert(var_name3); impl.Print( " serializer.begin_object(pair.key());\n" @@ -1048,7 +1050,7 @@ static bool generate_serializing(const google::protobuf::Descriptor* d, << to_mcpack_typestr(cit2, f2) << " (idl)"; return false; } - std::string var_name3 = mcpack2pb::to_var_name(f2->message_type()->full_name()); + std::string var_name3 = mcpack2pb::to_var_name(butil::EnsureString(f2->message_type()->full_name())); ref_msgs.insert(var_name3); if (f2->is_repeated()) { impl.Print( @@ -1091,7 +1093,7 @@ static bool generate_serializing(const google::protobuf::Descriptor* d, } } else if (f2->is_repeated()) { const std::string msgstr = butil::string_printf( - "msg.%s(i)", f->lowercase_name().c_str()); + "msg.%s(i)", butil::EnsureString(f->lowercase_name()).c_str()); switch (f2->cpp_type()) { case google::protobuf::FieldDescriptor::CPPTYPE_INT32: case google::protobuf::FieldDescriptor::CPPTYPE_UINT32: @@ -1141,7 +1143,7 @@ static bool generate_serializing(const google::protobuf::Descriptor* d, } } else { const std::string msgstr = butil::string_printf( - "msg.%s(i)", f->lowercase_name().c_str()); + "msg.%s(i)", butil::EnsureString(f->lowercase_name()).c_str()); switch (f2->cpp_type()) { case google::protobuf::FieldDescriptor::CPPTYPE_INT32: case google::protobuf::FieldDescriptor::CPPTYPE_INT64: @@ -1239,7 +1241,7 @@ static bool generate_serializing(const google::protobuf::Descriptor* d, << to_mcpack_typestr(cit, f) << " (idl)"; return false; } - std::string var_name2 = mcpack2pb::to_var_name(f->message_type()->full_name()); + std::string var_name2 = mcpack2pb::to_var_name(butil::EnsureString(f->message_type()->full_name())); ref_msgs.insert(var_name2); impl.Print("if (msg.has_$lcfield$()) {\n" " serializer.begin_object(\"$field$\");\n" @@ -1298,8 +1300,8 @@ static std::string protobuf_style_normalize_filename(const std::string & fname) static bool generate_registration( const google::protobuf::FileDescriptor* file, google::protobuf::io::Printer & impl) { - const std::string cpp_ns = to_cpp_name(file->package()); - std::string norm_fname = protobuf_style_normalize_filename(file->name()); + const std::string cpp_ns = to_cpp_name(butil::EnsureString(file->package())); + std::string norm_fname = protobuf_style_normalize_filename(butil::EnsureString(file->name())); impl.Print( "\n// register all message handlers\n" "struct RegisterMcpackFunctions_$norm_fname$ {\n" @@ -1309,7 +1311,7 @@ static bool generate_registration( impl.Indent(); for (int i = 0; i < file->message_type_count(); ++i) { const google::protobuf::Descriptor* d = file->message_type(i); - std::string var_name = mcpack2pb::to_var_name(d->full_name()); + std::string var_name = mcpack2pb::to_var_name(butil::EnsureString(d->full_name())); impl.Print( "\n" @@ -1359,8 +1361,8 @@ bool McpackToProtobuf::Generate(const google::protobuf::FileDescriptor* file, // skip the file. return true; } - - std::string cpp_name = file->name(); + + std::string cpp_name = butil::EnsureString(file->name()); const size_t pos = cpp_name.find_last_of('.'); if (pos == std::string::npos) { ::butil::string_printf(error, "Bad filename=%s", cpp_name.c_str()); @@ -1391,16 +1393,16 @@ bool McpackToProtobuf::Generate(const google::protobuf::FileDescriptor* file, if (!generate_parsing(d, ref_msgs, ref_maps, gimpl_printer)) { ::butil::string_printf( error, "Fail to generate parsing code for %s", - d->full_name().c_str()); + butil::EnsureString(d->full_name()).c_str()); return false; } if (!generate_serializing(d, ref_msgs, ref_maps, gimpl_printer)) { ::butil::string_printf( error, "Fail to generate serializing code for %s", - d->full_name().c_str()); + butil::EnsureString(d->full_name()).c_str()); return false; } - std::string var_name = mcpack2pb::to_var_name(d->full_name()); + std::string var_name = mcpack2pb::to_var_name(butil::EnsureString(d->full_name())); gdecl_printer.Print( "::mcpack2pb::FieldMap* g_$vmsg$_fields = NULL;\n" , "vmsg", var_name); From aec1531cbdbbbb3a14be5a943ccd7d0a6893c99e Mon Sep 17 00:00:00 2001 From: koarz <66543806+koarz@users.noreply.github.com> Date: Mon, 5 Jan 2026 14:04:37 +0800 Subject: [PATCH 14/84] feat: enable TLS key logging via SSLKEYLOGFILE env (#3173) * feat: enable TLS key logging via SSLKEYLOGFILE env * fix --- src/brpc/details/ssl_helper.cpp | 47 +++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/src/brpc/details/ssl_helper.cpp b/src/brpc/details/ssl_helper.cpp index f38b16d6a5..322b9cc3ff 100644 --- a/src/brpc/details/ssl_helper.cpp +++ b/src/brpc/details/ssl_helper.cpp @@ -18,10 +18,14 @@ #include "brpc/ssl_options.h" +#include "butil/files/scoped_file.h" #include #ifndef USE_MESALINK #include // recv +#include // pthread_once +#include // fopen +#include // getenv #include #include #include @@ -185,6 +189,47 @@ static void SSLMessageCallback(int write_p, int version, int content_type, #endif // TLS1_RT_HEARTBEAT } +#if defined(OPENSSL_IS_BORINGSSL) || (OPENSSL_VERSION_NUMBER >= 0x10101000L) +static pthread_once_t g_ssl_keylog_once = PTHREAD_ONCE_INIT; +static FILE* g_ssl_keylog_file = NULL; + +static void InitSSLKeyLogFile() { + const char* path = getenv("SSLKEYLOGFILE"); + if (path == NULL || path[0] == '\0') { + return; + } + g_ssl_keylog_file = fopen(path, "ae"); + if (g_ssl_keylog_file == NULL) { + PLOG(WARNING) << "Fail to open SSLKEYLOGFILE=" << path; + } else { + setvbuf(g_ssl_keylog_file, NULL, _IOLBF, 0); + LOG(WARNING) << "SSLKEYLOGFILE is enabled (path: " << path << "). " + << "Sensitive TLS session keys will be written to this file. " + << "This feature is intended for debugging only and should NOT be used in production environments."; + } +} + +static void SSLKeyLogCallback(const SSL* ssl, const char* line) { + (void)ssl; + if (line == NULL || g_ssl_keylog_file == NULL) { + return; + } + // Write the full key log line with newline in one call to keep output atomic. + fprintf(g_ssl_keylog_file, "%s\n", line); +} + +static void MaybeSetKeyLogCallback(SSL_CTX* ctx) { + pthread_once(&g_ssl_keylog_once, InitSSLKeyLogFile); + if (ctx != NULL && g_ssl_keylog_file != NULL) { + SSL_CTX_set_keylog_callback(ctx, SSLKeyLogCallback); + } +} +#else +static void MaybeSetKeyLogCallback(SSL_CTX* ctx) { + (void)ctx; +} +#endif + #ifndef OPENSSL_NO_DH static DH* SSLGetDHCallback(SSL* ssl, int exp, int keylen) { (void)exp; @@ -494,6 +539,7 @@ SSL_CTX* CreateClientSSLContext(const ChannelSSLOptions& options) { LOG(ERROR) << "Fail to new SSL_CTX: " << SSLError(ERR_get_error()); return NULL; } + MaybeSetKeyLogCallback(ssl_ctx.get()); if (!options.client_cert.certificate.empty() && LoadCertificate(ssl_ctx.get(), @@ -532,6 +578,7 @@ SSL_CTX* CreateServerSSLContext(const std::string& certificate, LOG(ERROR) << "Fail to new SSL_CTX: " << SSLError(ERR_get_error()); return NULL; } + MaybeSetKeyLogCallback(ssl_ctx.get()); if (LoadCertificate(ssl_ctx.get(), certificate, private_key, hostnames) != 0) { From 4fc5ba2fbc42c7e0570a32e78b65f9efd3c9cd60 Mon Sep 17 00:00:00 2001 From: Anant Shukla Date: Mon, 12 Jan 2026 09:50:13 +0530 Subject: [PATCH 15/84] Fix port parsing validation in str2endpoint (#3193) * Fix port parsing validation in str2endpoint Signed-off-by: Anant Shukla * Add unit tests for rejecting trailing characters after port parsing Signed-off-by: Anant Shukla --------- Signed-off-by: Anant Shukla --- src/butil/endpoint.cpp | 2 +- test/endpoint_unittest.cpp | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/butil/endpoint.cpp b/src/butil/endpoint.cpp index d243252d6d..a8d8936c63 100644 --- a/src/butil/endpoint.cpp +++ b/src/butil/endpoint.cpp @@ -288,7 +288,7 @@ int str2endpoint(const char* str, EndPoint* point) { if (end == str + i) { return -1; } else if (*end) { - for (++end; isspace(*end); ++end); + for (; isspace(*end); ++end); if (*end) { return -1; } diff --git a/test/endpoint_unittest.cpp b/test/endpoint_unittest.cpp index cf47131599..af3098f475 100644 --- a/test/endpoint_unittest.cpp +++ b/test/endpoint_unittest.cpp @@ -115,6 +115,19 @@ TEST(EndPointTest, endpoint) { ASSERT_EQ(289, p6.port); #endif } +TEST(EndPointTest, endpoint_reject_trailing_characters_after_port) { + butil::EndPoint ep; + + // invalid: non-whitespace after port + ASSERT_EQ(-1, butil::str2endpoint("127.0.0.1:8000a", &ep)); + ASSERT_EQ(-1, butil::str2endpoint("127.0.0.1:8000#", &ep)); + ASSERT_EQ(-1, butil::str2endpoint("127.0.0.1:8000abc", &ep)); + + // valid: only whitespace after port + ASSERT_EQ(0, butil::str2endpoint("127.0.0.1:8000 ", &ep)); + ASSERT_EQ(0, butil::str2endpoint("127.0.0.1:8000\t", &ep)); + ASSERT_EQ(0, butil::str2endpoint("127.0.0.1:8000\n", &ep)); +} TEST(EndPointTest, hash_table) { butil::hash_map m; From 76ac9a4e9f1c6d59ec0a6dfcb054d09acb34b081 Mon Sep 17 00:00:00 2001 From: yanyuan06 Date: Wed, 7 Jan 2026 16:19:22 +0800 Subject: [PATCH 16/84] surpport tag for selective channel --- src/brpc/controller.cpp | 2 +- src/brpc/selective_channel.cpp | 29 +++++++++++++++++------------ src/brpc/selective_channel.h | 8 ++++++-- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/src/brpc/controller.cpp b/src/brpc/controller.cpp index b30a13476e..d7b511dbd4 100644 --- a/src/brpc/controller.cpp +++ b/src/brpc/controller.cpp @@ -862,7 +862,7 @@ void Controller::Call::OnComplete( } } - if (need_feedback) { + if (need_feedback && c->_lb) { const LoadBalancer::CallInfo info = { begin_time_us, peer_id, error_code, c }; c->_lb->Feedback(info); diff --git a/src/brpc/selective_channel.cpp b/src/brpc/selective_channel.cpp index 21b871af3f..dd155a3044 100644 --- a/src/brpc/selective_channel.cpp +++ b/src/brpc/selective_channel.cpp @@ -83,9 +83,9 @@ class ChannelBalancer : public SharedLoadBalancer { ChannelBalancer() {} ~ChannelBalancer(); int Init(const char* lb_name); - int AddChannel(ChannelBase* sub_channel, + int AddChannel(ChannelBase* sub_channel, const std::string& tag, SelectiveChannel::ChannelHandle* handle); - void RemoveAndDestroyChannel(SelectiveChannel::ChannelHandle handle); + void RemoveAndDestroyChannel(const SelectiveChannel::ChannelHandle& handle); int SelectChannel(const LoadBalancer::SelectIn& in, SelectOut* out); int CheckHealth(); void Describe(std::ostream& os, const DescribeOptions&); @@ -168,7 +168,7 @@ int ChannelBalancer::Init(const char* lb_name) { return SharedLoadBalancer::Init(lb_name); } -int ChannelBalancer::AddChannel(ChannelBase* sub_channel, +int ChannelBalancer::AddChannel(ChannelBase* sub_channel, const std::string& tag, SelectiveChannel::ChannelHandle* handle) { if (NULL == sub_channel) { LOG(ERROR) << "Parameter[sub_channel] is NULL"; @@ -206,7 +206,7 @@ int ChannelBalancer::AddChannel(ChannelBase* sub_channel, << sock_id << " is disabled"; return -1; } - if (!AddServer(ServerId(sock_id))) { + if (!AddServer(ServerId(sock_id, tag))) { LOG(ERROR) << "Duplicated sub_channel=" << sub_channel; // sub_chan will be deleted when the socket is recycled. ptr->SetFailed(); @@ -217,17 +217,18 @@ int ChannelBalancer::AddChannel(ChannelBase* sub_channel, // The health-check-related reference has been held on created. _chan_map[sub_channel]= ptr.get(); if (handle) { - *handle = sock_id; + handle->id = sock_id; + handle->tag = tag; } return 0; } -void ChannelBalancer::RemoveAndDestroyChannel(SelectiveChannel::ChannelHandle handle) { - if (!RemoveServer(ServerId(handle))) { +void ChannelBalancer::RemoveAndDestroyChannel(const SelectiveChannel::ChannelHandle& handle) { + if (!RemoveServer(ServerId(handle.id, handle.tag))) { return; } SocketUniquePtr ptr; - const int rc = Socket::AddressFailedAsWell(handle, &ptr); + const int rc = Socket::AddressFailedAsWell(handle.id, &ptr); if (rc >= 0) { SubChannel* sub = static_cast(ptr->user()); { @@ -311,8 +312,6 @@ int Sender::IssueRPC(int64_t start_realtime_us) { _main_cntl->SetFailed(rc, "Fail to select channel, %s", berror(rc)); return -1; } - DLOG(INFO) << "Selected channel=" << sel_out.channel() << ", size=" - << (_main_cntl->_accessed ? _main_cntl->_accessed->size() : 0); _main_cntl->_current_call.need_feedback = sel_out.need_feedback; _main_cntl->_current_call.peer_id = sel_out.fake_sock->id(); @@ -534,16 +533,22 @@ bool SelectiveChannel::initialized() const { int SelectiveChannel::AddChannel(ChannelBase* sub_channel, ChannelHandle* handle) { + return AddChannel(sub_channel, "", handle); +} + +int SelectiveChannel::AddChannel(ChannelBase* sub_channel, + const std::string& tag, + ChannelHandle* handle) { schan::ChannelBalancer* lb = static_cast(_chan._lb.get()); if (lb == NULL) { LOG(ERROR) << "You must call Init() to initialize a SelectiveChannel"; return -1; } - return lb->AddChannel(sub_channel, handle); + return lb->AddChannel(sub_channel, tag, handle); } -void SelectiveChannel::RemoveAndDestroyChannel(ChannelHandle handle) { +void SelectiveChannel::RemoveAndDestroyChannel(const ChannelHandle& handle) { schan::ChannelBalancer* lb = static_cast(_chan._lb.get()); if (lb == NULL) { diff --git a/src/brpc/selective_channel.h b/src/brpc/selective_channel.h index 8b93e5bf69..6c0af1da9c 100644 --- a/src/brpc/selective_channel.h +++ b/src/brpc/selective_channel.h @@ -51,7 +51,10 @@ namespace brpc { // in `done'. class SelectiveChannel : public ChannelBase/*non-copyable*/ { public: - typedef SocketId ChannelHandle; + struct ChannelHandle { + SocketId id; + std::string tag; + }; SelectiveChannel(); ~SelectiveChannel(); @@ -67,9 +70,10 @@ class SelectiveChannel : public ChannelBase/*non-copyable*/ { // NOTE: Different from pchan, schan can add channels at any time. // Returns 0 on success, -1 otherwise. int AddChannel(ChannelBase* sub_channel, ChannelHandle* handle); + int AddChannel(ChannelBase* sub_channel, const std::string& tag, ChannelHandle* handle); // Remove and destroy the sub_channel associated with `handle'. - void RemoveAndDestroyChannel(ChannelHandle handle); + void RemoveAndDestroyChannel(const ChannelHandle& handle); // Send request by a sub channel. schan may retry another sub channel // according to retrying/backup-request settings. From 89d446614d2906f5af7d39326c734b67c8459e27 Mon Sep 17 00:00:00 2001 From: Jingyuan <52315061+MJY-HUST@users.noreply.github.com> Date: Mon, 19 Jan 2026 01:10:37 +0800 Subject: [PATCH 17/84] support wait with predicate in bthread's ConditionVariable (#3195) --- src/bthread/bthread.h | 1 + src/bthread/condition_variable.h | 14 +++++++++ test/bthread_cond_unittest.cpp | 51 ++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+) diff --git a/src/bthread/bthread.h b/src/bthread/bthread.h index 7e42c96c9f..603cf04d0e 100644 --- a/src/bthread/bthread.h +++ b/src/bthread/bthread.h @@ -30,6 +30,7 @@ #if defined(__cplusplus) #include #include "bthread/mutex.h" // use bthread_mutex_t in the RAII way +#include "bthread/condition_variable.h" // use bthread_cond_t in the RAII way #endif // __cplusplus #include "bthread/id.h" diff --git a/src/bthread/condition_variable.h b/src/bthread/condition_variable.h index c684cf6cbd..fb6bb4bcb5 100644 --- a/src/bthread/condition_variable.h +++ b/src/bthread/condition_variable.h @@ -63,6 +63,20 @@ class ConditionVariable { bthread_cond_wait(&_cond, lock.mutex()); } + template + void wait(std::unique_lock& lock, Predicate p) { + while (!p()) { + bthread_cond_wait(&_cond, lock.mutex()->native_handler()); + } + } + + template + void wait(std::unique_lock& lock, Predicate p) { + while (!p()) { + bthread_cond_wait(&_cond, lock.mutex()); + } + } + // Unlike std::condition_variable, we return ETIMEDOUT when time expires // rather than std::timeout int wait_for(std::unique_lock& lock, diff --git a/test/bthread_cond_unittest.cpp b/test/bthread_cond_unittest.cpp index d01ef69c26..f2dcddfe8c 100644 --- a/test/bthread_cond_unittest.cpp +++ b/test/bthread_cond_unittest.cpp @@ -138,7 +138,10 @@ TEST(CondTest, sanity) { struct WrapperArg { bthread::Mutex mutex; bthread::ConditionVariable cond; + bool ready = false; + static std::atomic wake_time; }; +std::atomic WrapperArg::wake_time{0}; void* cv_signaler(void* void_arg) { WrapperArg* a = (WrapperArg*)void_arg; @@ -168,6 +171,23 @@ void* cv_mutex_waiter(void* void_arg) { return NULL; } + +void* cv_bmutex_waiter_with_pred(void* void_arg) { + WrapperArg* a = (WrapperArg*)void_arg; + std::unique_lock lck(*a->mutex.native_handler()); + a->cond.wait(lck, [&] { return a->ready; }); + WrapperArg::wake_time.fetch_add(1); + return NULL; +} + +void* cv_mutex_waiter_with_pred(void* void_arg) { + WrapperArg* a = (WrapperArg*)void_arg; + std::unique_lock lck(a->mutex); + a->cond.wait(lck, [&] { return a->ready; }); + WrapperArg::wake_time.fetch_add(1); + return NULL; +} + #define COND_IN_PTHREAD #ifndef COND_IN_PTHREAD @@ -202,6 +222,37 @@ TEST(CondTest, cpp_wrapper) { } } +TEST(CondTest, cpp_wrapper2) { + stop = false; + bthread::ConditionVariable cond; + pthread_t bmutex_waiter_threads[8]; + pthread_t mutex_waiter_threads[8]; + pthread_t signal_thread; + WrapperArg a; + for (size_t i = 0; i < ARRAY_SIZE(bmutex_waiter_threads); ++i) { + ASSERT_EQ(0, pthread_create(&bmutex_waiter_threads[i], NULL, + cv_bmutex_waiter_with_pred, &a)); + ASSERT_EQ(0, pthread_create(&mutex_waiter_threads[i], NULL, + cv_mutex_waiter_with_pred, &a)); + } + ASSERT_EQ(0, pthread_create(&signal_thread, NULL, cv_signaler, &a)); + bthread_usleep(100L * 1000); + ASSERT_EQ(WrapperArg::wake_time, 0); + { + BAIDU_SCOPED_LOCK(a.mutex); + stop = true; + a.ready = true; + + } + pthread_join(signal_thread, NULL); + a.cond.notify_all(); + for (size_t i = 0; i < ARRAY_SIZE(bmutex_waiter_threads); ++i) { + pthread_join(bmutex_waiter_threads[i], NULL); + pthread_join(mutex_waiter_threads[i], NULL); + } + ASSERT_EQ(WrapperArg::wake_time, 16); +} + #ifndef COND_IN_PTHREAD #undef pthread_join #undef pthread_create From b8ac2333d12b0e9ec48bec87318bd0e3571c476b Mon Sep 17 00:00:00 2001 From: yanyuan06 Date: Fri, 16 Jan 2026 15:27:02 +0800 Subject: [PATCH 18/84] support change ownship for SelectiveChannel --- src/brpc/selective_channel.cpp | 27 ++++++++++++++------------- src/brpc/selective_channel.h | 17 +++++++++++++++-- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/src/brpc/selective_channel.cpp b/src/brpc/selective_channel.cpp index dd155a3044..ec93354121 100644 --- a/src/brpc/selective_channel.cpp +++ b/src/brpc/selective_channel.cpp @@ -41,10 +41,13 @@ typedef std::map ChannelToIdMap; class SubChannel : public SocketUser { public: ChannelBase* chan; + ChannelOwnership ownership; // internal channel is deleted after the fake Socket is SetFailed void BeforeRecycle(Socket*) { - delete chan; + if (ownership == OWNS_CHANNEL) { + delete chan; + } delete this; } @@ -83,7 +86,8 @@ class ChannelBalancer : public SharedLoadBalancer { ChannelBalancer() {} ~ChannelBalancer(); int Init(const char* lb_name); - int AddChannel(ChannelBase* sub_channel, const std::string& tag, + int AddChannel(ChannelBase* sub_channel, + const SelectiveChannel::SubChannelOptions& subopt, SelectiveChannel::ChannelHandle* handle); void RemoveAndDestroyChannel(const SelectiveChannel::ChannelHandle& handle); int SelectChannel(const LoadBalancer::SelectIn& in, SelectOut* out); @@ -168,7 +172,8 @@ int ChannelBalancer::Init(const char* lb_name) { return SharedLoadBalancer::Init(lb_name); } -int ChannelBalancer::AddChannel(ChannelBase* sub_channel, const std::string& tag, +int ChannelBalancer::AddChannel(ChannelBase* sub_channel, + const SelectiveChannel::SubChannelOptions& subopt, SelectiveChannel::ChannelHandle* handle) { if (NULL == sub_channel) { LOG(ERROR) << "Parameter[sub_channel] is NULL"; @@ -185,6 +190,7 @@ int ChannelBalancer::AddChannel(ChannelBase* sub_channel, const std::string& tag return -1; } sub_chan->chan = sub_channel; + sub_chan->ownership = subopt.ownership; SocketId sock_id; SocketOptions options; options.user = sub_chan; @@ -206,7 +212,7 @@ int ChannelBalancer::AddChannel(ChannelBase* sub_channel, const std::string& tag << sock_id << " is disabled"; return -1; } - if (!AddServer(ServerId(sock_id, tag))) { + if (!AddServer(ServerId(sock_id, subopt.tag))) { LOG(ERROR) << "Duplicated sub_channel=" << sub_channel; // sub_chan will be deleted when the socket is recycled. ptr->SetFailed(); @@ -215,10 +221,10 @@ int ChannelBalancer::AddChannel(ChannelBase* sub_channel, const std::string& tag return -1; } // The health-check-related reference has been held on created. - _chan_map[sub_channel]= ptr.get(); + _chan_map[sub_channel] = ptr.get(); if (handle) { handle->id = sock_id; - handle->tag = tag; + handle->tag = subopt.tag; } return 0; } @@ -532,12 +538,7 @@ bool SelectiveChannel::initialized() const { } int SelectiveChannel::AddChannel(ChannelBase* sub_channel, - ChannelHandle* handle) { - return AddChannel(sub_channel, "", handle); -} - -int SelectiveChannel::AddChannel(ChannelBase* sub_channel, - const std::string& tag, + const SubChannelOptions& option, ChannelHandle* handle) { schan::ChannelBalancer* lb = static_cast(_chan._lb.get()); @@ -545,7 +546,7 @@ int SelectiveChannel::AddChannel(ChannelBase* sub_channel, LOG(ERROR) << "You must call Init() to initialize a SelectiveChannel"; return -1; } - return lb->AddChannel(sub_channel, tag, handle); + return lb->AddChannel(sub_channel, option, handle); } void SelectiveChannel::RemoveAndDestroyChannel(const ChannelHandle& handle) { diff --git a/src/brpc/selective_channel.h b/src/brpc/selective_channel.h index 6c0af1da9c..fd8fb9cf9d 100644 --- a/src/brpc/selective_channel.h +++ b/src/brpc/selective_channel.h @@ -56,6 +56,11 @@ class SelectiveChannel : public ChannelBase/*non-copyable*/ { std::string tag; }; + struct SubChannelOptions { + std::string tag; + ChannelOwnership ownership = OWNS_CHANNEL; + }; + SelectiveChannel(); ~SelectiveChannel(); @@ -69,8 +74,16 @@ class SelectiveChannel : public ChannelBase/*non-copyable*/ { // On success, handle is set with the key for removal. // NOTE: Different from pchan, schan can add channels at any time. // Returns 0 on success, -1 otherwise. - int AddChannel(ChannelBase* sub_channel, ChannelHandle* handle); - int AddChannel(ChannelBase* sub_channel, const std::string& tag, ChannelHandle* handle); + int AddChannel(ChannelBase* sub_channel, ChannelHandle* handle) { + return AddChannel(sub_channel, SubChannelOptions(), handle); + } + int AddChannel(ChannelBase* sub_channel, const std::string& tag, ChannelHandle* handle) { + SubChannelOptions option; + option.tag = tag; + return AddChannel(sub_channel, option, handle); + } + int AddChannel(ChannelBase* sub_channel, const SubChannelOptions& option, + ChannelHandle* handle); // Remove and destroy the sub_channel associated with `handle'. void RemoveAndDestroyChannel(const ChannelHandle& handle); From bf7292a1bbfdf0608b9a96dfa81f09e43bffa3e3 Mon Sep 17 00:00:00 2001 From: Xiaofeng Wang Date: Sun, 18 Jan 2026 16:05:05 +0800 Subject: [PATCH 19/84] Release 1.16.0 --- CMakeLists.txt | 2 +- MODULE.bazel | 2 +- NOTICE | 2 +- RELEASE_VERSION | 2 +- package/rpm/brpc.spec | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6c6f985fc6..b10991f5f5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,7 +40,7 @@ if(POLICY CMP0042) cmake_policy(SET CMP0042 NEW) endif() -set(BRPC_VERSION 1.15.0) +set(BRPC_VERSION 1.16.0) SET(CPACK_GENERATOR "DEB") SET(CPACK_DEBIAN_PACKAGE_MAINTAINER "brpc authors") diff --git a/MODULE.bazel b/MODULE.bazel index 19700bc411..95f4e6b763 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -1,6 +1,6 @@ module( name = 'brpc', - version = '1.15.0', + version = '1.16.0', compatibility_level = 1, ) diff --git a/NOTICE b/NOTICE index 55a2c50bd0..6185d93c1e 100644 --- a/NOTICE +++ b/NOTICE @@ -1,5 +1,5 @@ Apache bRPC -Copyright 2018-2025 The Apache Software Foundation +Copyright 2018-2026 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). diff --git a/RELEASE_VERSION b/RELEASE_VERSION index 141f2e805b..15b989e398 100644 --- a/RELEASE_VERSION +++ b/RELEASE_VERSION @@ -1 +1 @@ -1.15.0 +1.16.0 diff --git a/package/rpm/brpc.spec b/package/rpm/brpc.spec index 51c02d7f6f..4099ca2cbd 100644 --- a/package/rpm/brpc.spec +++ b/package/rpm/brpc.spec @@ -18,7 +18,7 @@ # Name: brpc -Version: 1.15.0 +Version: 1.16.0 Release: 1%{?dist} Summary: Industrial-grade RPC framework using C++ Language. From 3f8c2ba74b04ecf9fde0cf0351e241f6fc951529 Mon Sep 17 00:00:00 2001 From: Mao <75189432+wenjiecn@users.noreply.github.com> Date: Mon, 26 Jan 2026 10:57:05 +0800 Subject: [PATCH 20/84] Support SO_BINDTODEVICE and bind client_host (#3179) * bind_client_ip * fix UT & review * add client_host UT * updated to support SO_BINDTODEVICE. * updated to support SO_BINDTODEVICE and bind client_host. * review --- src/brpc/channel.cpp | 41 ++++++++++++++++++-- src/brpc/channel.h | 10 +++++ src/brpc/details/naming_service_thread.cpp | 4 +- src/brpc/details/naming_service_thread.h | 9 ++--- src/brpc/socket.cpp | 25 +++++++++++- src/brpc/socket.h | 5 +++ src/brpc/socket_map.cpp | 16 ++------ src/brpc/socket_map.h | 22 ++++++++++- test/brpc_server_unittest.cpp | 45 ++++++++++++++++++++++ 9 files changed, 150 insertions(+), 27 deletions(-) diff --git a/src/brpc/channel.cpp b/src/brpc/channel.cpp index 0fd43d7c9c..a130f61356 100644 --- a/src/brpc/channel.cpp +++ b/src/brpc/channel.cpp @@ -77,6 +77,8 @@ ChannelSSLOptions* ChannelOptions::mutable_ssl_options() { static ChannelSignature ComputeChannelSignature(const ChannelOptions& opt) { if (opt.auth == NULL && !opt.has_ssl_options() && + opt.client_host.empty() && + opt.device_name.empty() && opt.connection_group.empty() && opt.hc_option.health_check_path.empty()) { // Returning zeroized result by default is more intuitive for users. @@ -94,6 +96,14 @@ static ChannelSignature ComputeChannelSignature(const ChannelOptions& opt) { buf.append("|conng="); buf.append(opt.connection_group); } + if (!opt.client_host.empty()) { + buf.append("|clih="); + buf.append(opt.client_host); + } + if (!opt.device_name.empty()) { + buf.append("|devn="); + buf.append(opt.device_name); + } if (opt.auth) { buf.append("|auth="); buf.append((char*)&opt.auth, sizeof(opt.auth)); @@ -362,14 +372,27 @@ int Channel::InitSingle(const butil::EndPoint& server_addr_and_port, LOG(ERROR) << "Invalid port=" << port; return -1; } + butil::EndPoint client_endpoint; + if (!_options.client_host.empty() && + butil::str2ip(_options.client_host.c_str(), &client_endpoint.ip) != 0 && + butil::hostname2ip(_options.client_host.c_str(), &client_endpoint.ip) != 0) { + LOG(ERROR) << "Invalid client host=`" << _options.client_host << '\''; + return -1; + } _server_address = server_addr_and_port; const ChannelSignature sig = ComputeChannelSignature(_options); std::shared_ptr ssl_ctx; if (CreateSocketSSLContext(_options, &ssl_ctx) != 0) { return -1; } + SocketOptions opt; + opt.local_side = client_endpoint; + opt.initial_ssl_ctx = ssl_ctx; + opt.use_rdma = _options.use_rdma; + opt.hc_option = _options.hc_option; + opt.device_name = _options.device_name; if (SocketMapInsert(SocketMapKey(server_addr_and_port, sig), - &_server_id, ssl_ctx, _options.use_rdma, _options.hc_option) != 0) { + &_server_id, opt) != 0) { LOG(ERROR) << "Fail to insert into SocketMap"; return -1; } @@ -397,6 +420,13 @@ int Channel::Init(const char* ns_url, _options.mutable_ssl_options()->sni_name = _service_name; } } + butil::EndPoint client_endpoint; + if (!_options.client_host.empty() && + butil::str2ip(_options.client_host.c_str(), &client_endpoint.ip) != 0 && + butil::hostname2ip(_options.client_host.c_str(), &client_endpoint.ip) != 0) { + LOG(ERROR) << "Invalid client host=`" << _options.client_host << '\''; + return -1; + } std::unique_ptr lb(new (std::nothrow) LoadBalancerWithNaming); if (NULL == lb) { @@ -406,10 +436,13 @@ int Channel::Init(const char* ns_url, GetNamingServiceThreadOptions ns_opt; ns_opt.succeed_without_server = _options.succeed_without_server; ns_opt.log_succeed_without_server = _options.log_succeed_without_server; - ns_opt.use_rdma = _options.use_rdma; + ns_opt.socket_option.use_rdma = _options.use_rdma; ns_opt.channel_signature = ComputeChannelSignature(_options); - ns_opt.hc_option = _options.hc_option; - if (CreateSocketSSLContext(_options, &ns_opt.ssl_ctx) != 0) { + ns_opt.socket_option.hc_option = _options.hc_option; + ns_opt.socket_option.local_side = client_endpoint; + ns_opt.socket_option.device_name = _options.device_name; + if (CreateSocketSSLContext(_options, + &ns_opt.socket_option.initial_ssl_ctx) != 0) { return -1; } if (lb->Init(ns_url, lb_name, _options.ns_filter, &ns_opt) != 0) { diff --git a/src/brpc/channel.h b/src/brpc/channel.h index c970209b3a..0f349ac6fe 100644 --- a/src/brpc/channel.h +++ b/src/brpc/channel.h @@ -148,6 +148,16 @@ struct ChannelOptions { // Its priority is higher than FLAGS_health_check_path and FLAGS_health_check_timeout_ms. // When it is not set, FLAGS_health_check_path and FLAGS_health_check_timeout_ms will take effect. HealthCheckOption hc_option; + + // IP address or host name of the client. + // if the client_host is "", the client IP address is determined by the OS. + // Default: "" + std::string client_host; + + // The device name of the client's network adapter. + // if the device_name is "", the flow control is determined by the OS. + // Default: "" + std::string device_name; private: // SSLOptions is large and not often used, allocate it on heap to // prevent ChannelOptions from being bloated in most cases. diff --git a/src/brpc/details/naming_service_thread.cpp b/src/brpc/details/naming_service_thread.cpp index 341ca35b09..f882b2255d 100644 --- a/src/brpc/details/naming_service_thread.cpp +++ b/src/brpc/details/naming_service_thread.cpp @@ -125,8 +125,8 @@ void NamingServiceThread::Actions::ResetServers( // Socket. SocketMapKey may be passed through AddWatcher. Make sure // to pick those Sockets with the right settings during OnAddedServers const SocketMapKey key(_added[i], _owner->_options.channel_signature); - CHECK_EQ(0, SocketMapInsert(key, &tagged_id.id, _owner->_options.ssl_ctx, - _owner->_options.use_rdma, _owner->_options.hc_option)); + CHECK_EQ(0, SocketMapInsert(key, &tagged_id.id, + _owner->_options.socket_option)); _added_sockets.push_back(tagged_id); } diff --git a/src/brpc/details/naming_service_thread.h b/src/brpc/details/naming_service_thread.h index 1745e5f267..9acb8f2931 100644 --- a/src/brpc/details/naming_service_thread.h +++ b/src/brpc/details/naming_service_thread.h @@ -44,15 +44,14 @@ class NamingServiceWatcher { struct GetNamingServiceThreadOptions { GetNamingServiceThreadOptions() : succeed_without_server(false) - , log_succeed_without_server(true) - , use_rdma(false) {} + , log_succeed_without_server(true) { + socket_option.use_rdma = false; +} bool succeed_without_server; bool log_succeed_without_server; - bool use_rdma; - HealthCheckOption hc_option; ChannelSignature channel_signature; - std::shared_ptr ssl_ctx; + SocketOptions socket_option; }; // A dedicated thread to map a name to ServerIds diff --git a/src/brpc/socket.cpp b/src/brpc/socket.cpp index 9490650b78..e431aceff9 100644 --- a/src/brpc/socket.cpp +++ b/src/brpc/socket.cpp @@ -728,7 +728,8 @@ int Socket::OnCreated(const SocketOptions& options) { _keytable_pool = options.keytable_pool; _tos = 0; _remote_side = options.remote_side; - _local_side = butil::EndPoint(); + _local_side = options.local_side; + _device_name = options.device_name; _on_edge_triggered_events = options.on_edge_triggered_events; _user = options.user; _conn = options.conn; @@ -1296,7 +1297,25 @@ int Socket::Connect(const timespec* abstime, CHECK_EQ(0, butil::make_close_on_exec(sockfd)); // We need to do async connect (to manage the timeout by ourselves). CHECK_EQ(0, butil::make_non_blocking(sockfd)); - + if (!_device_name.empty()) { + if (setsockopt(sockfd, SOL_SOCKET, SO_BINDTODEVICE, + _device_name.c_str(), _device_name.size()) < 0) { + PLOG(ERROR) << "Fail to set SO_BINDTODEVICE of fd=" << sockfd + << " to device_name=" << _device_name; + return -1; + } + } + if (local_side().ip != butil::IP_ANY) { + struct sockaddr_storage cli_addr; + if (butil::endpoint2sockaddr(local_side(), &cli_addr, &addr_size) != 0) { + PLOG(ERROR) << "Fail to get client sockaddr"; + return -1; + } + if (::bind(sockfd, (struct sockaddr*)&cli_addr, addr_size) != 0) { + PLOG(ERROR) << "Fail to bind client socket, errno=" << strerror(errno); + return -1; + } + } const int rc = ::connect( sockfd, (struct sockaddr*)&serv_addr, addr_size); if (rc != 0 && errno != EINPROGRESS) { @@ -2811,6 +2830,7 @@ int Socket::GetPooledSocket(SocketUniquePtr* pooled_socket) { if (socket_pool == NULL) { SocketOptions opt; opt.remote_side = remote_side(); + opt.local_side = butil::EndPoint(local_side().ip, 0); opt.user = user(); opt.on_edge_triggered_events = _on_edge_triggered_events; opt.initial_ssl_ctx = _ssl_ctx; @@ -2912,6 +2932,7 @@ int Socket::GetShortSocket(SocketUniquePtr* short_socket) { SocketId id; SocketOptions opt; opt.remote_side = remote_side(); + opt.local_side = butil::EndPoint(local_side().ip, 0); opt.user = user(); opt.on_edge_triggered_events = _on_edge_triggered_events; opt.initial_ssl_ctx = _ssl_ctx; diff --git a/src/brpc/socket.h b/src/brpc/socket.h index 03ad43f867..a3e2323056 100644 --- a/src/brpc/socket.h +++ b/src/brpc/socket.h @@ -250,6 +250,8 @@ struct SocketOptions { // user->BeforeRecycle() before recycling. int fd{-1}; butil::EndPoint remote_side; + butil::EndPoint local_side; + std::string device_name; // If `connect_on_create' is true and `fd' is less than 0, // a client connection will be established to remote_side() // regarding deadline `connect_abstime' when Socket is being created. @@ -830,6 +832,9 @@ friend void DereferenceSocket(Socket*); // Address of self. Initialized in ResetFileDescriptor(). butil::EndPoint _local_side; + // The device name of the client's network adapter. + std::string _device_name; + // Called when edge-triggered events happened on `_fd'. Read comments // of EventDispatcher::AddConsumer (event_dispatcher.h) // carefully before implementing the callback. diff --git a/src/brpc/socket_map.cpp b/src/brpc/socket_map.cpp index 14bea71db5..3984f6b866 100644 --- a/src/brpc/socket_map.cpp +++ b/src/brpc/socket_map.cpp @@ -90,11 +90,9 @@ SocketMap* get_or_new_client_side_socket_map() { } int SocketMapInsert(const SocketMapKey& key, SocketId* id, - const std::shared_ptr& ssl_ctx, - bool use_rdma, - const HealthCheckOption& hc_option) { - return get_or_new_client_side_socket_map()->Insert(key, id, ssl_ctx, use_rdma, hc_option); -} + SocketOptions& opt) { + return get_or_new_client_side_socket_map()->Insert(key, id, opt); +} int SocketMapFind(const SocketMapKey& key, SocketId* id) { SocketMap* m = get_client_side_socket_map(); @@ -227,9 +225,7 @@ void SocketMap::ShowSocketMapInBvarIfNeed() { } int SocketMap::Insert(const SocketMapKey& key, SocketId* id, - const std::shared_ptr& ssl_ctx, - bool use_rdma, - const HealthCheckOption& hc_option) { + SocketOptions& opt) { ShowSocketMapInBvarIfNeed(); std::unique_lock mu(_mutex); @@ -249,11 +245,7 @@ int SocketMap::Insert(const SocketMapKey& key, SocketId* id, sc = NULL; } SocketId tmp_id; - SocketOptions opt; opt.remote_side = key.peer.addr; - opt.initial_ssl_ctx = ssl_ctx; - opt.use_rdma = use_rdma; - opt.hc_option = hc_option; if (_options.socket_creator->CreateSocket(opt, &tmp_id) != 0) { PLOG(FATAL) << "Fail to create socket to " << key.peer; return -1; diff --git a/src/brpc/socket_map.h b/src/brpc/socket_map.h index b0d542e78e..7cf0880498 100644 --- a/src/brpc/socket_map.h +++ b/src/brpc/socket_map.h @@ -80,9 +80,19 @@ struct SocketMapKeyHasher { // successfully, SocketMapRemove() MUST be called when the Socket is not needed. // Return 0 on success, -1 otherwise. int SocketMapInsert(const SocketMapKey& key, SocketId* id, + SocketOptions& opt); + +inline int SocketMapInsert(const SocketMapKey& key, SocketId* id, const std::shared_ptr& ssl_ctx, bool use_rdma, - const HealthCheckOption& hc_option); + const HealthCheckOption& hc_option) { + SocketOptions opt; + opt.remote_side = key.peer.addr; + opt.initial_ssl_ctx = ssl_ctx; + opt.use_rdma = use_rdma; + opt.hc_option = hc_option; + return SocketMapInsert(key, id, opt); +} inline int SocketMapInsert(const SocketMapKey& key, SocketId* id, const std::shared_ptr& ssl_ctx) { @@ -155,7 +165,14 @@ class SocketMap { int Insert(const SocketMapKey& key, SocketId* id, const std::shared_ptr& ssl_ctx, bool use_rdma, - const HealthCheckOption& hc_option); + const HealthCheckOption& hc_option) { + SocketOptions opt; + opt.remote_side = key.peer.addr; + opt.initial_ssl_ctx = ssl_ctx; + opt.use_rdma = use_rdma; + opt.hc_option = hc_option; + return Insert(key, id, opt); +} int Insert(const SocketMapKey& key, SocketId* id, const std::shared_ptr& ssl_ctx) { @@ -167,6 +184,7 @@ class SocketMap { HealthCheckOption hc_option; return Insert(key, id, empty_ptr, false, hc_option); } + int Insert(const SocketMapKey& key, SocketId* id, SocketOptions& opt); void Remove(const SocketMapKey& key, SocketId expected_id); int Find(const SocketMapKey& key, SocketId* id); diff --git a/test/brpc_server_unittest.cpp b/test/brpc_server_unittest.cpp index 4a774fab2a..8508a7986c 100644 --- a/test/brpc_server_unittest.cpp +++ b/test/brpc_server_unittest.cpp @@ -2070,4 +2070,49 @@ TEST_F(ServerTest, auth) { ASSERT_EQ(0, server.Join()); } +void TestClientHost(const butil::EndPoint& ep, + brpc::Controller& cntl, + int error_code, bool failed, + brpc::ChannelOptions& copt) { + brpc::Channel chan; + copt.max_retry = 0; + ASSERT_EQ(0, chan.Init(ep, &copt)); + + test::EchoRequest req; + test::EchoResponse res; + req.set_message(EXP_REQUEST); + test::EchoService_Stub stub(&chan); + stub.Echo(&cntl, &req, &res, NULL); + ASSERT_EQ(cntl.Failed(), failed) << cntl.ErrorText(); + ASSERT_EQ(cntl.ErrorCode(), error_code); +} + +TEST_F(ServerTest, bind_client_host_and_network_device) { + butil::EndPoint ep; + ASSERT_EQ(0, str2endpoint("127.0.0.1:8613", &ep)); + brpc::Server server; + EchoServiceImpl service; + ASSERT_EQ(0, server.AddService(&service, brpc::SERVER_DOESNT_OWN_SERVICE)); + brpc::ServerOptions opt; + ASSERT_EQ(0, server.Start(ep, &opt)); + + brpc::Controller cntl; + brpc::ChannelOptions copt; + copt.client_host = "localhost"; + copt.device_name = "lo"; + std::vector connection_types = { + brpc::CONNECTION_TYPE_SINGLE, + brpc::CONNECTION_TYPE_POOLED, + brpc::CONNECTION_TYPE_SHORT + }; + for (auto connect_type : connection_types) { + copt.connection_type = connect_type; + TestClientHost(ep, cntl, 0, false, copt); + cntl.Reset(); + } + + ASSERT_EQ(0, server.Stop(0)); + ASSERT_EQ(0, server.Join()); +} + } //namespace From b926a12ff7c2e91062a30e244bff089325fb0f8d Mon Sep 17 00:00:00 2001 From: Chuang Zhang Date: Tue, 27 Jan 2026 10:59:00 +0800 Subject: [PATCH 21/84] Add Transport to support more communication protocol extensions (#3199) * Add The transport layer to support communication protocols of different device vendors. * Refine the SocketMode name style and clean some unused code * Refine Transport Debug method param and RdmaTransport WaitEpollOut code * format the code, remove indentation for top class and variables in new file * review code --------- Co-authored-by: wenjiecn <3252896864@qq.com> --- example/rdma_performance/client.cpp | 2 +- example/rdma_performance/server.cpp | 2 +- src/brpc/acceptor.cpp | 16 +- src/brpc/acceptor.h | 5 +- src/brpc/channel.cpp | 38 +--- src/brpc/channel.h | 7 +- src/brpc/details/naming_service_thread.cpp | 2 +- src/brpc/details/naming_service_thread.h | 3 +- src/brpc/input_message_base.h | 1 + src/brpc/input_messenger.cpp | 69 +----- src/brpc/input_messenger.h | 37 ++-- src/brpc/rdma/rdma_endpoint.cpp | 51 +++-- src/brpc/rdma_transport.cpp | 238 +++++++++++++++++++++ src/brpc/rdma_transport.h | 65 ++++++ src/brpc/server.cpp | 42 +--- src/brpc/server.h | 7 +- src/brpc/socket.cpp | 160 +++----------- src/brpc/socket.h | 34 +-- src/brpc/socket_map.h | 16 +- src/brpc/socket_mode.h | 26 +++ src/brpc/tcp_transport.cpp | 94 ++++++++ src/brpc/tcp_transport.h | 41 ++++ src/brpc/transport.h | 66 ++++++ src/brpc/transport_factory.cpp | 51 +++++ src/brpc/transport_factory.h | 40 ++++ 25 files changed, 775 insertions(+), 338 deletions(-) create mode 100644 src/brpc/rdma_transport.cpp create mode 100644 src/brpc/rdma_transport.h create mode 100644 src/brpc/socket_mode.h create mode 100644 src/brpc/tcp_transport.cpp create mode 100644 src/brpc/tcp_transport.h create mode 100644 src/brpc/transport.h create mode 100644 src/brpc/transport_factory.cpp create mode 100644 src/brpc/transport_factory.h diff --git a/example/rdma_performance/client.cpp b/example/rdma_performance/client.cpp index 57d0c06c93..a7ed2c99c6 100644 --- a/example/rdma_performance/client.cpp +++ b/example/rdma_performance/client.cpp @@ -102,7 +102,7 @@ class PerformanceTest { int Init() { brpc::ChannelOptions options; - options.use_rdma = FLAGS_use_rdma; + options.socket_mode = FLAGS_use_rdma? brpc::SOCKET_MODE_RDMA : brpc::SOCKET_MODE_TCP; options.protocol = FLAGS_protocol; options.connection_type = FLAGS_connection_type; options.timeout_ms = FLAGS_rpc_timeout_ms; diff --git a/example/rdma_performance/server.cpp b/example/rdma_performance/server.cpp index d3d00057f4..2e93e1eec7 100644 --- a/example/rdma_performance/server.cpp +++ b/example/rdma_performance/server.cpp @@ -76,7 +76,7 @@ int main(int argc, char* argv[]) { g_last_time.store(0, butil::memory_order_relaxed); brpc::ServerOptions options; - options.use_rdma = FLAGS_use_rdma; + options.socket_mode = FLAGS_use_rdma? brpc::SOCKET_MODE_RDMA : brpc::SOCKET_MODE_TCP; if (server.Start(FLAGS_port, &options) != 0) { LOG(ERROR) << "Fail to start EchoServer"; return -1; diff --git a/src/brpc/acceptor.cpp b/src/brpc/acceptor.cpp index fd6564c987..f9c22a6848 100644 --- a/src/brpc/acceptor.cpp +++ b/src/brpc/acceptor.cpp @@ -21,8 +21,8 @@ #include "butil/fd_guard.h" // fd_guard #include "butil/fd_utility.h" // make_close_on_exec #include "butil/time.h" // gettimeofday_us -#include "brpc/rdma/rdma_endpoint.h" #include "brpc/acceptor.h" +#include "brpc/transport_factory.h" namespace brpc { @@ -40,7 +40,7 @@ Acceptor::Acceptor(bthread_keytable_pool_t* pool) , _empty_cond(&_map_mutex) , _force_ssl(false) , _ssl_ctx(NULL) - , _use_rdma(false) + , _socket_mode(SOCKET_MODE_TCP) , _bthread_tag(BTHREAD_TAG_DEFAULT) { } @@ -282,18 +282,10 @@ void Acceptor::OnNewConnectionsUntilEAGAIN(Socket* acception) { options.fd = in_fd; butil::sockaddr2endpoint(&in_addr, in_len, &options.remote_side); options.user = acception->user(); + options.need_on_edge_trigger = true; options.force_ssl = am->_force_ssl; options.initial_ssl_ctx = am->_ssl_ctx; -#if BRPC_WITH_RDMA - if (am->_use_rdma) { - options.on_edge_triggered_events = rdma::RdmaEndpoint::OnNewDataFromTcp; - } else { -#else - { -#endif - options.on_edge_triggered_events = InputMessenger::OnNewMessages; - } - options.use_rdma = am->_use_rdma; + options.socket_mode = am->_socket_mode; options.bthread_tag = am->_bthread_tag; if (Socket::Create(options, &socket_id) != 0) { LOG(ERROR) << "Fail to create Socket"; diff --git a/src/brpc/acceptor.h b/src/brpc/acceptor.h index 69f632aaca..77942beca2 100644 --- a/src/brpc/acceptor.h +++ b/src/brpc/acceptor.h @@ -22,6 +22,7 @@ #include "butil/synchronization/condition_variable.h" #include "butil/containers/flat_map.h" #include "brpc/input_messenger.h" +#include "brpc/socket_mode.h" namespace brpc { @@ -110,8 +111,8 @@ friend class Server; bool _force_ssl; std::shared_ptr _ssl_ctx; - // Whether to use rdma or not - bool _use_rdma; + // Choose to use a certain socket: 0 TCP, 1 RDMA + SocketMode _socket_mode; // Acceptor belongs to this tag bthread_tag_t _bthread_tag; diff --git a/src/brpc/channel.cpp b/src/brpc/channel.cpp index a130f61356..86124c2552 100644 --- a/src/brpc/channel.cpp +++ b/src/brpc/channel.cpp @@ -37,6 +37,7 @@ #include "brpc/details/usercode_backup_pool.h" // TooManyUserCode #include "brpc/rdma/rdma_helper.h" #include "brpc/policy/esp_authenticator.h" +#include "brpc/transport_factory.h" namespace brpc { @@ -60,7 +61,7 @@ ChannelOptions::ChannelOptions() , connection_type(CONNECTION_TYPE_UNKNOWN) , succeed_without_server(true) , log_succeed_without_server(true) - , use_rdma(false) + , socket_mode(SOCKET_MODE_TCP) , auth(NULL) , backup_request_policy(NULL) , retry_policy(NULL) @@ -130,7 +131,7 @@ static ChannelSignature ComputeChannelSignature(const ChannelOptions& opt) { } else { // All disabled ChannelSSLOptions are the same } - if (opt.use_rdma) { + if (opt.socket_mode == SOCKET_MODE_RDMA) { buf.append("|rdma"); } butil::MurmurHash3_x64_128_Update(&mm_ctx, buf.data(), buf.size()); @@ -173,20 +174,6 @@ Channel::~Channel() { } } -#if BRPC_WITH_RDMA -static bool OptionsAvailableForRdma(const ChannelOptions* opt) { - if (opt->has_ssl_options()) { - LOG(WARNING) << "Cannot use SSL and RDMA at the same time"; - return false; - } - if (!rdma::SupportedByRdma(opt->protocol.name())) { - LOG(WARNING) << "Cannot use " << opt->protocol.name() - << " over RDMA"; - return false; - } - return true; -} -#endif int Channel::InitChannelOptions(const ChannelOptions* options) { if (options) { // Override default options if user provided one. @@ -201,19 +188,10 @@ int Channel::InitChannelOptions(const ChannelOptions* options) { _options.hc_option.health_check_path = FLAGS_health_check_path; _options.hc_option.health_check_timeout_ms = FLAGS_health_check_timeout_ms; } - if (_options.use_rdma) { -#if BRPC_WITH_RDMA - if (!OptionsAvailableForRdma(&_options)) { - return -1; - } - rdma::GlobalRdmaInitializeOrDie(); - if (!rdma::InitPollingModeWithTag(bthread_self_tag())) { - return -1; - } -#else - LOG(WARNING) << "Cannot use rdma since brpc does not compile with rdma"; + auto ret = TransportFactory::ContextInitOrDie(_options.socket_mode, false, &_options); + if (ret != 0) { + LOG(ERROR) << "Fail to initialize transport context for channel, ret=" << ret; return -1; -#endif } _serialize_request = protocol->serialize_request; @@ -388,7 +366,7 @@ int Channel::InitSingle(const butil::EndPoint& server_addr_and_port, SocketOptions opt; opt.local_side = client_endpoint; opt.initial_ssl_ctx = ssl_ctx; - opt.use_rdma = _options.use_rdma; + opt.socket_mode = _options.socket_mode; opt.hc_option = _options.hc_option; opt.device_name = _options.device_name; if (SocketMapInsert(SocketMapKey(server_addr_and_port, sig), @@ -436,7 +414,7 @@ int Channel::Init(const char* ns_url, GetNamingServiceThreadOptions ns_opt; ns_opt.succeed_without_server = _options.succeed_without_server; ns_opt.log_succeed_without_server = _options.log_succeed_without_server; - ns_opt.socket_option.use_rdma = _options.use_rdma; + ns_opt.socket_option.socket_mode = _options.socket_mode; ns_opt.channel_signature = ComputeChannelSignature(_options); ns_opt.socket_option.hc_option = _options.hc_option; ns_opt.socket_option.local_side = client_endpoint; diff --git a/src/brpc/channel.h b/src/brpc/channel.h index 0f349ac6fe..7c257c05d3 100644 --- a/src/brpc/channel.h +++ b/src/brpc/channel.h @@ -37,6 +37,7 @@ #include "brpc/backup_request_policy.h" #include "brpc/naming_service_filter.h" #include "brpc/health_check_option.h" +#include "brpc/socket_mode.h" namespace brpc { @@ -105,9 +106,9 @@ struct ChannelOptions { const ChannelSSLOptions& ssl_options() const { return *_ssl_options; } ChannelSSLOptions* mutable_ssl_options(); - // Let this channel use rdma rather than tcp. - // Default: false - bool use_rdma; + // Let this channel Choose to use a certain socket: 0 SOCKET_MODE_TCP, 1 SOCKET_MODE_RDMA. + // Default: SOCKET_MODE_TCP + SocketMode socket_mode; // Turn on authentication for this channel if `auth' is not NULL. // Note `auth' will not be deleted by channel and must remain valid when diff --git a/src/brpc/details/naming_service_thread.cpp b/src/brpc/details/naming_service_thread.cpp index f882b2255d..7eb005e8f0 100644 --- a/src/brpc/details/naming_service_thread.cpp +++ b/src/brpc/details/naming_service_thread.cpp @@ -125,7 +125,7 @@ void NamingServiceThread::Actions::ResetServers( // Socket. SocketMapKey may be passed through AddWatcher. Make sure // to pick those Sockets with the right settings during OnAddedServers const SocketMapKey key(_added[i], _owner->_options.channel_signature); - CHECK_EQ(0, SocketMapInsert(key, &tagged_id.id, + CHECK_EQ(0, SocketMapInsert(key, &tagged_id.id, _owner->_options.socket_option)); _added_sockets.push_back(tagged_id); } diff --git a/src/brpc/details/naming_service_thread.h b/src/brpc/details/naming_service_thread.h index 9acb8f2931..f01fbea6a4 100644 --- a/src/brpc/details/naming_service_thread.h +++ b/src/brpc/details/naming_service_thread.h @@ -27,6 +27,7 @@ #include "brpc/naming_service.h" // NamingService #include "brpc/naming_service_filter.h" // NamingServiceFilter #include "brpc/socket_map.h" +#include "brpc/socket_mode.h" namespace brpc { @@ -45,7 +46,7 @@ struct GetNamingServiceThreadOptions { GetNamingServiceThreadOptions() : succeed_without_server(false) , log_succeed_without_server(true) { - socket_option.use_rdma = false; + socket_option.socket_mode = SOCKET_MODE_TCP; } bool succeed_without_server; diff --git a/src/brpc/input_message_base.h b/src/brpc/input_message_base.h index 86b25785cc..b117eb99c3 100644 --- a/src/brpc/input_message_base.h +++ b/src/brpc/input_message_base.h @@ -55,6 +55,7 @@ class InputMessageBase : public Destroyable { friend class InputMessenger; friend void* ProcessInputMessage(void*); friend class Stream; +friend class Transport; int64_t _received_us; int64_t _base_real_us; SocketUniquePtr _socket; diff --git a/src/brpc/input_messenger.cpp b/src/brpc/input_messenger.cpp index 1b8a86f2c6..925c8776f9 100644 --- a/src/brpc/input_messenger.cpp +++ b/src/brpc/input_messenger.cpp @@ -29,7 +29,7 @@ #include "brpc/protocol.h" // ListProtocols #include "brpc/rdma/rdma_endpoint.h" #include "brpc/input_messenger.h" - +#include "brpc/transport_factory.h" namespace brpc { @@ -112,8 +112,7 @@ ParseResult InputMessenger::CutInputMessage( // The length of `data' must be PROTO_DUMMY_LEN + 1 to store extra ending char '\0' char data[PROTO_DUMMY_LEN + 1]; m->_read_buf.copy_to_cstr(data, PROTO_DUMMY_LEN); - if (strncmp(data, "RDMA", PROTO_DUMMY_LEN) == 0 && - m->_rdma_state == Socket::RDMA_OFF) { + if (strncmp(data, "RDMA", PROTO_DUMMY_LEN) == 0) { // To avoid timeout when client uses RDMA but server uses TCP return MakeParseError(PARSE_ERROR_TRY_OTHERS); } @@ -191,46 +190,13 @@ struct RunLastMessage { } }; -static void QueueMessage(InputMessageBase* to_run_msg, - int* num_bthread_created, - bthread_keytable_pool_t* keytable_pool) { - if (!to_run_msg) { - return; - } - -#if BRPC_WITH_RDMA - if (rdma::FLAGS_rdma_disable_bthread) { - ProcessInputMessage(to_run_msg); - return; - } -#endif - // Create bthread for last_msg. The bthread is not scheduled - // until bthread_flush() is called (in the worse case). - - // TODO(gejun): Join threads. - bthread_t th; - bthread_attr_t tmp = (FLAGS_usercode_in_pthread ? - BTHREAD_ATTR_PTHREAD : - BTHREAD_ATTR_NORMAL) | BTHREAD_NOSIGNAL; - tmp.keytable_pool = keytable_pool; - tmp.tag = bthread_self_tag(); - bthread_attr_set_name(&tmp, "ProcessInputMessage"); - - if (!FLAGS_usercode_in_coroutine && bthread_start_background( - &th, &tmp, ProcessInputMessage, to_run_msg) == 0) { - ++*num_bthread_created; - } else { - ProcessInputMessage(to_run_msg); - } -} - -InputMessenger::InputMessageClosure::~InputMessageClosure() noexcept(false) { +InputMessageClosure::~InputMessageClosure() noexcept(false) { if (_msg) { ProcessInputMessage(_msg); } } -void InputMessenger::InputMessageClosure::reset(InputMessageBase* m) { +void InputMessageClosure::reset(InputMessageBase* m) { if (_msg) { ProcessInputMessage(_msg); } @@ -303,7 +269,7 @@ int InputMessenger::ProcessNewMessage( // This unique_ptr prevents msg to be lost before transfering // ownership to last_msg DestroyingPtr msg(pr.message()); - QueueMessage(last_msg.release(), &num_bthread_created, m->_keytable_pool); + m->_transport->QueueMessage(last_msg, &num_bthread_created, false); if (_handlers[index].process == NULL) { LOG(ERROR) << "process of index=" << index << " is NULL"; continue; @@ -336,22 +302,19 @@ int InputMessenger::ProcessNewMessage( // Transfer ownership to last_msg last_msg.reset(msg.release()); } else { - QueueMessage(msg.release(), &num_bthread_created, - m->_keytable_pool); + last_msg.reset(msg.release()); + m->_transport->QueueMessage(last_msg, &num_bthread_created, false); bthread_flush(); num_bthread_created = 0; } } -#if BRPC_WITH_RDMA // In RDMA polling mode, all messages must be executed in a new bthread and // not in the bthread where the polling bthread is located, because the // method for processing messages may call synchronization primitives, // causing the polling bthread to be scheduled out. - if (rdma::FLAGS_rdma_use_polling) { - QueueMessage(last_msg.release(), &num_bthread_created, - m->_keytable_pool); + if (m->_socket_mode == SOCKET_MODE_RDMA) { + m->_transport->QueueMessage(last_msg, &num_bthread_created, true); } -#endif if (num_bthread_created) { bthread_flush(); } @@ -414,8 +377,7 @@ void InputMessenger::OnNewMessages(Socket* m) { } } - if (m->_rdma_state == Socket::RDMA_OFF && messenger->ProcessNewMessage( - m, nr, read_eof, received_us, base_realtime, last_msg) < 0) { + if (messenger->ProcessNewMessage(m, nr, read_eof, received_us, base_realtime, last_msg) < 0) { return; } } @@ -533,16 +495,7 @@ int InputMessenger::Create(const butil::EndPoint& remote_side, int InputMessenger::Create(SocketOptions options, SocketId* id) { options.user = this; -#if BRPC_WITH_RDMA - if (options.use_rdma) { - options.on_edge_triggered_events = rdma::RdmaEndpoint::OnNewDataFromTcp; - options.app_connect = std::make_shared(); - } else { -#else - { -#endif - options.on_edge_triggered_events = OnNewMessages; - } + options.need_on_edge_trigger = true; // Enable keepalive by options or Gflag. // Priority: options > Gflag. if (options.keepalive_options || FLAGS_socket_keepalive) { diff --git a/src/brpc/input_messenger.h b/src/brpc/input_messenger.h index 1c191a87c2..8482c3f3fc 100644 --- a/src/brpc/input_messenger.h +++ b/src/brpc/input_messenger.h @@ -29,7 +29,7 @@ namespace brpc { namespace rdma { class RdmaEndpoint; } - +class TcpTransport; struct InputMessageHandler { // The callback to cut a message from `source'. // Returned message will be passed to process_request or process_response @@ -70,9 +70,28 @@ struct InputMessageHandler { const char* name; }; +class InputMessageClosure { +public: + InputMessageClosure() : _msg(NULL) { } + ~InputMessageClosure() noexcept(false); + + InputMessageBase* release() { + InputMessageBase* m = _msg; + _msg = NULL; + return m; + } + + void reset(InputMessageBase* m); + +private: + InputMessageBase* _msg; +}; + // Process messages from connections. // `Message' corresponds to a client's request or a server's response. class InputMessenger : public SocketUser { +friend class Socket; +friend class TcpTransport; friend class rdma::RdmaEndpoint; public: explicit InputMessenger(size_t capacity = 128); @@ -111,22 +130,6 @@ friend class rdma::RdmaEndpoint; static void OnNewMessages(Socket* m); private: - class InputMessageClosure { - public: - InputMessageClosure() : _msg(NULL) { } - ~InputMessageClosure() noexcept(false); - - InputMessageBase* release() { - InputMessageBase* m = _msg; - _msg = NULL; - return m; - } - - void reset(InputMessageBase* m); - - private: - InputMessageBase* _msg; - }; // Find a valid scissor from `handlers' to cut off `header' and `payload' // from m->read_buf, save index of the scissor into `index'. diff --git a/src/brpc/rdma/rdma_endpoint.cpp b/src/brpc/rdma/rdma_endpoint.cpp index 616ef33252..3cc2107f23 100644 --- a/src/brpc/rdma/rdma_endpoint.cpp +++ b/src/brpc/rdma/rdma_endpoint.cpp @@ -30,6 +30,7 @@ #include "brpc/rdma/block_pool.h" #include "brpc/rdma/rdma_helper.h" #include "brpc/rdma/rdma_endpoint.h" +#include "brpc/rdma_transport.h" DECLARE_int32(task_group_ntags); @@ -239,14 +240,15 @@ void RdmaEndpoint::Reset() { void RdmaConnect::StartConnect(const Socket* socket, void (*done)(int err, void* data), void* data) { - CHECK(socket->_rdma_ep != NULL); + auto* rdma_transport = static_cast(socket->_transport.get()); + CHECK(rdma_transport->_rdma_ep != NULL); SocketUniquePtr s; if (Socket::Address(socket->id(), &s) != 0) { return; } if (!IsRdmaAvailable()) { - socket->_rdma_ep->_state = RdmaEndpoint::FALLBACK_TCP; - s->_rdma_state = Socket::RDMA_OFF; + rdma_transport->_rdma_ep->_state = RdmaEndpoint::FALLBACK_TCP; + rdma_transport->_rdma_state = RdmaTransport::RDMA_OFF; done(0, data); return; } @@ -256,7 +258,7 @@ void RdmaConnect::StartConnect(const Socket* socket, bthread_attr_t attr = BTHREAD_ATTR_NORMAL; bthread_attr_set_name(&attr, "RdmaProcessHandshakeAtClient"); if (bthread_start_background(&tid, &attr, - RdmaEndpoint::ProcessHandshakeAtClient, socket->_rdma_ep) < 0) { + RdmaEndpoint::ProcessHandshakeAtClient, rdma_transport->_rdma_ep) < 0) { LOG(FATAL) << "Fail to start handshake bthread"; Run(); } else { @@ -299,7 +301,8 @@ static void TryReadOnTcpDuringRdmaEst(Socket* s) { } void RdmaEndpoint::OnNewDataFromTcp(Socket* m) { - RdmaEndpoint* ep = m->_rdma_ep; + auto* rdma_transport = static_cast(m->_transport.get()); + RdmaEndpoint* ep = rdma_transport->GetRdmaEp(); CHECK(ep != NULL); int progress = Socket::PROGRESS_INIT; @@ -308,7 +311,7 @@ void RdmaEndpoint::OnNewDataFromTcp(Socket* m) { if (!m->CreatedByConnect()) { if (!IsRdmaAvailable()) { ep->_state = FALLBACK_TCP; - m->_rdma_state = Socket::RDMA_OFF; + rdma_transport->_rdma_state = RdmaTransport::RDMA_OFF; continue; } bthread_t tid; @@ -433,9 +436,10 @@ void* RdmaEndpoint::ProcessHandshakeAtClient(void* arg) { // First initialize CQ and QP resources ep->_state = C_ALLOC_QPCQ; + auto* rdma_transport = static_cast(s->_transport.get()); if (ep->AllocateResources() < 0) { LOG(WARNING) << "Fallback to tcp:" << s->description(); - s->_rdma_state = Socket::RDMA_OFF; + rdma_transport->_rdma_state = RdmaTransport::RDMA_OFF; ep->_state = FALLBACK_TCP; return NULL; } @@ -514,7 +518,7 @@ void* RdmaEndpoint::ProcessHandshakeAtClient(void* arg) { if (!HelloNegotiationValid(remote_msg)) { LOG(WARNING) << "Fail to negotiate with server, fallback to tcp:" << s->description(); - s->_rdma_state = Socket::RDMA_OFF; + rdma_transport->_rdma_state = RdmaTransport::RDMA_OFF; } else { ep->_remote_recv_block_size = remote_msg.block_size; ep->_local_window_capacity = @@ -530,16 +534,16 @@ void* RdmaEndpoint::ProcessHandshakeAtClient(void* arg) { ep->_state = C_BRINGUP_QP; if (ep->BringUpQp(remote_msg.lid, remote_msg.gid, remote_msg.qp_num) < 0) { LOG(WARNING) << "Fail to bringup QP, fallback to tcp:" << s->description(); - s->_rdma_state = Socket::RDMA_OFF; + rdma_transport->_rdma_state = RdmaTransport::RDMA_OFF; } else { - s->_rdma_state = Socket::RDMA_ON; + rdma_transport->_rdma_state = RdmaTransport::RDMA_ON; } } // Send ACK message to server ep->_state = C_ACK_SEND; uint32_t flags = 0; - if (s->_rdma_state != Socket::RDMA_OFF) { + if (rdma_transport->_rdma_state != RdmaTransport::RDMA_OFF) { flags |= ACK_MSG_RDMA_OK; } uint32_t* tmp = (uint32_t*)data; // avoid GCC warning on strict-aliasing @@ -553,7 +557,7 @@ void* RdmaEndpoint::ProcessHandshakeAtClient(void* arg) { return NULL; } - if (s->_rdma_state == Socket::RDMA_ON) { + if (rdma_transport->_rdma_state == RdmaTransport::RDMA_ON) { ep->_state = ESTABLISHED; LOG_IF(INFO, FLAGS_rdma_trace_verbose) << "Client handshake ends (use rdma) on " << s->description(); @@ -586,7 +590,7 @@ void* RdmaEndpoint::ProcessHandshakeAtServer(void* arg) { ep->_state = FAILED; return NULL; } - + auto* rdma_transport = static_cast(s->_transport.get()); if (memcmp(data, MAGIC_STR, MAGIC_STR_LEN) != 0) { LOG_IF(INFO, FLAGS_rdma_trace_verbose) << "It seems that the " << "client does not use RDMA, fallback to TCP:" @@ -594,7 +598,7 @@ void* RdmaEndpoint::ProcessHandshakeAtServer(void* arg) { // we need to copy data read back to _socket->_read_buf s->_read_buf.append(data, MAGIC_STR_LEN); ep->_state = FALLBACK_TCP; - s->_rdma_state = Socket::RDMA_OFF; + rdma_transport->_rdma_state = RdmaTransport::RDMA_OFF; ep->TryReadOnTcp(); return NULL; } @@ -626,7 +630,7 @@ void* RdmaEndpoint::ProcessHandshakeAtServer(void* arg) { if (!HelloNegotiationValid(remote_msg)) { LOG(WARNING) << "Fail to negotiate with client, fallback to tcp:" << s->description(); - s->_rdma_state = Socket::RDMA_OFF; + rdma_transport->_rdma_state = RdmaTransport::RDMA_OFF; } else { ep->_remote_recv_block_size = remote_msg.block_size; ep->_local_window_capacity = @@ -643,13 +647,13 @@ void* RdmaEndpoint::ProcessHandshakeAtServer(void* arg) { if (ep->AllocateResources() < 0) { LOG(WARNING) << "Fail to allocate rdma resources, fallback to tcp:" << s->description(); - s->_rdma_state = Socket::RDMA_OFF; + rdma_transport->_rdma_state = RdmaTransport::RDMA_OFF; } else { ep->_state = S_BRINGUP_QP; if (ep->BringUpQp(remote_msg.lid, remote_msg.gid, remote_msg.qp_num) < 0) { LOG(WARNING) << "Fail to bringup QP, fallback to tcp:" << s->description(); - s->_rdma_state = Socket::RDMA_OFF; + rdma_transport->_rdma_state = RdmaTransport::RDMA_OFF; } } } @@ -658,7 +662,7 @@ void* RdmaEndpoint::ProcessHandshakeAtServer(void* arg) { ep->_state = S_HELLO_SEND; HelloMessage local_msg; local_msg.msg_len = g_rdma_hello_msg_len; - if (s->_rdma_state == Socket::RDMA_OFF) { + if (rdma_transport->_rdma_state == RdmaTransport::RDMA_OFF) { local_msg.impl_ver = 0; local_msg.hello_ver = 0; } else { @@ -702,7 +706,7 @@ void* RdmaEndpoint::ProcessHandshakeAtServer(void* arg) { uint32_t* tmp = (uint32_t*)data; // avoid GCC warning on strict-aliasing uint32_t flags = butil::NetToHost32(*tmp); if (flags & ACK_MSG_RDMA_OK) { - if (s->_rdma_state == Socket::RDMA_OFF) { + if (rdma_transport->_rdma_state == RdmaTransport::RDMA_OFF) { LOG(WARNING) << "Fail to parse Hello Message length from client:" << s->description(); s->SetFailed(EPROTO, "Fail to complete rdma handshake from %s: %s", @@ -710,13 +714,13 @@ void* RdmaEndpoint::ProcessHandshakeAtServer(void* arg) { ep->_state = FAILED; return NULL; } else { - s->_rdma_state = Socket::RDMA_ON; + rdma_transport->_rdma_state = RdmaTransport::RDMA_ON; ep->_state = ESTABLISHED; LOG_IF(INFO, FLAGS_rdma_trace_verbose) << "Server handshake ends (use rdma) on " << s->description(); } } else { - s->_rdma_state = Socket::RDMA_OFF; + rdma_transport->_rdma_state = RdmaTransport::RDMA_OFF; ep->_state = FALLBACK_TCP; LOG_IF(INFO, FLAGS_rdma_trace_verbose) << "Server handshake ends (use tcp) on " << s->description(); @@ -1455,7 +1459,8 @@ void RdmaEndpoint::PollCq(Socket* m) { if (Socket::Address(ep->_socket->id(), &s) < 0) { return; } - CHECK(ep == s->_rdma_ep); + auto* rdma_transport = static_cast(s->_transport.get()); + CHECK(ep == rdma_transport->_rdma_ep); bool send = false; ibv_cq* cq = ep->_resource->recv_cq; @@ -1472,7 +1477,7 @@ void RdmaEndpoint::PollCq(Socket* m) { int progress = Socket::PROGRESS_INIT; bool notified = false; - InputMessenger::InputMessageClosure last_msg; + InputMessageClosure last_msg; ibv_wc wc[FLAGS_rdma_cqe_poll_once]; while (true) { int cnt = ibv_poll_cq(cq, FLAGS_rdma_cqe_poll_once, wc); diff --git a/src/brpc/rdma_transport.cpp b/src/brpc/rdma_transport.cpp new file mode 100644 index 0000000000..d980c5a0fc --- /dev/null +++ b/src/brpc/rdma_transport.cpp @@ -0,0 +1,238 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#if BRPC_WITH_RDMA + +#include "brpc/rdma_transport.h" +#include "brpc/tcp_transport.h" +#include "brpc/rdma/rdma_endpoint.h" +#include "brpc/rdma/rdma_helper.h" + +namespace brpc { +DECLARE_bool(usercode_in_coroutine); +DECLARE_bool(usercode_in_pthread); + +extern SocketVarsCollector *g_vars; + +void RdmaTransport::Init(Socket *socket, const SocketOptions &options) { + CHECK(_rdma_ep == NULL); + if (options.socket_mode == SOCKET_MODE_RDMA) { + _rdma_ep = new(std::nothrow)rdma::RdmaEndpoint(socket); + if (!_rdma_ep) { + const int saved_errno = errno; + PLOG(ERROR) << "Fail to create RdmaEndpoint"; + socket->SetFailed(saved_errno, "Fail to create RdmaEndpoint: %s", + berror(saved_errno)); + } + _rdma_state = RDMA_UNKNOWN; + } else { + _rdma_state = RDMA_OFF; + socket->_socket_mode = SOCKET_MODE_TCP; + } + _socket = socket; + _default_connect = options.app_connect; + _on_edge_trigger = options.on_edge_triggered_events; + if (options.need_on_edge_trigger && _on_edge_trigger == NULL) { + _on_edge_trigger = rdma::RdmaEndpoint::OnNewDataFromTcp; + } + _tcp_transport = std::make_shared(); + _tcp_transport->Init(socket, options); +} + +void RdmaTransport::Release() { + if (_rdma_ep) { + delete _rdma_ep; + _rdma_ep = NULL; + _rdma_state = RDMA_UNKNOWN; + } +} + +int RdmaTransport::Reset(int32_t expected_nref) { + if (_rdma_ep) { + _rdma_ep->Reset(); + _rdma_state = RDMA_UNKNOWN; + } + return 0; +} + +std::shared_ptr RdmaTransport::Connect() { + if (_default_connect == nullptr) { + return std::make_shared(); + } + return _default_connect; +} + +int RdmaTransport::CutFromIOBuf(butil::IOBuf *buf) { + if (_rdma_ep && _rdma_state != RDMA_OFF) { + butil::IOBuf *data_arr[1] = {buf}; + return _rdma_ep->CutFromIOBufList(data_arr, 1); + } else { + return _tcp_transport->CutFromIOBuf(buf); + } +} + +ssize_t RdmaTransport::CutFromIOBufList(butil::IOBuf **buf, size_t ndata) { + if (_rdma_ep && _rdma_state != RDMA_OFF) { + return _rdma_ep->CutFromIOBufList(buf, ndata); + } + return _tcp_transport->CutFromIOBufList(buf, ndata); +} + +int RdmaTransport::WaitEpollOut(butil::atomic *_epollout_butex, + bool pollin, const timespec duetime) { + if (_rdma_state == RDMA_ON) { + const int expected_val = _epollout_butex + ->load(butil::memory_order_acquire); + CHECK(_rdma_ep != NULL); + if (!_rdma_ep->IsWritable()) { + g_vars->nwaitepollout << 1; + if (bthread::butex_wait(_epollout_butex, expected_val, &duetime) < 0) { + if (errno != EAGAIN && errno != ETIMEDOUT) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to wait rdma window of " << _socket; + _socket->SetFailed(saved_errno, + "Fail to wait rdma window of %s: %s", + _socket->description().c_str(), + berror(saved_errno)); + } + if (_socket->Failed()) { + // NOTE: + // Different from TCP, we cannot find the RDMA channel + // failed by writing to it. Thus we must check if it + // is already failed here. + return 1; + } + } + } + } else { + return _tcp_transport->WaitEpollOut(_epollout_butex, pollin, duetime); + } + return 0; +} + +void RdmaTransport::ProcessEvent(bthread_attr_t attr) { + bthread_t tid; + if (FLAGS_usercode_in_coroutine) { + OnEdge(_socket); + } else if (rdma::FLAGS_rdma_edisp_unsched == false) { + auto rc = bthread_start_background(&tid, &attr, OnEdge, _socket); + if (rc != 0) { + LOG(FATAL) << "Fail to start ProcessEvent"; + OnEdge(_socket); + } + } else if (bthread_start_urgent(&tid, &attr, OnEdge, _socket) != 0) { + LOG(FATAL) << "Fail to start ProcessEvent"; + OnEdge(_socket); + } +} + +void RdmaTransport::QueueMessage(InputMessageClosure& input_msg, int* num_bthread_created, bool last_msg) { + if (last_msg && !rdma::FLAGS_rdma_use_polling) { + return; + } + InputMessageBase* to_run_msg = input_msg.release(); + if (!to_run_msg) { + return; + } + + if (rdma::FLAGS_rdma_disable_bthread) { + ProcessInputMessage(to_run_msg); + return; + } + // Create bthread for last_msg. The bthread is not scheduled + // until bthread_flush() is called (in the worse case). + + // TODO(gejun): Join threads. + bthread_t th; + bthread_attr_t tmp = (FLAGS_usercode_in_pthread ? + BTHREAD_ATTR_PTHREAD : + BTHREAD_ATTR_NORMAL) | BTHREAD_NOSIGNAL; + tmp.keytable_pool = _socket->keytable_pool(); + tmp.tag = bthread_self_tag(); + bthread_attr_set_name(&tmp, "ProcessInputMessage"); + + if (!FLAGS_usercode_in_coroutine && bthread_start_background( + &th, &tmp, ProcessInputMessage, to_run_msg) == 0) { + ++*num_bthread_created; + } else { + ProcessInputMessage(to_run_msg); + } +} + +void RdmaTransport::Debug(std::ostream &os) { + if (_rdma_state == RDMA_ON && _rdma_ep) { + _rdma_ep->DebugInfo(os); + } +} + +int RdmaTransport::ContextInitOrDie(bool serverOrNot, const void* _options) { + if (serverOrNot) { + if (!OptionsAvailableOverRdma(static_cast(_options))) { + return -1; + } + rdma::GlobalRdmaInitializeOrDie(); + if (!rdma::InitPollingModeWithTag(static_cast(_options)->bthread_tag)) { + return -1; + } + } else { + if (!OptionsAvailableForRdma(static_cast(_options))) { + return -1; + } + rdma::GlobalRdmaInitializeOrDie(); + if (!rdma::InitPollingModeWithTag(bthread_self_tag())) { + return -1; + } + return 0; + } + + return 0; +} + +bool RdmaTransport::OptionsAvailableForRdma(const ChannelOptions* opt) { + if (opt->has_ssl_options()) { + LOG(WARNING) << "Cannot use SSL and RDMA at the same time"; + return false; + } + if (!rdma::SupportedByRdma(opt->protocol.name())) { + LOG(WARNING) << "Cannot use " << opt->protocol.name() + << " over RDMA"; + return false; + } + return true; +} + +bool RdmaTransport::OptionsAvailableOverRdma(const ServerOptions* opt) { + if (opt->rtmp_service) { + LOG(WARNING) << "RTMP is not supported by RDMA"; + return false; + } + if (opt->has_ssl_options()) { + LOG(WARNING) << "SSL is not supported by RDMA"; + return false; + } + if (opt->nshead_service) { + LOG(WARNING) << "NSHEAD is not supported by RDMA"; + return false; + } + if (opt->mongo_service_adaptor) { + LOG(WARNING) << "MONGO is not supported by RDMA"; + return false; + } + return true; +} +} +#endif \ No newline at end of file diff --git a/src/brpc/rdma_transport.h b/src/brpc/rdma_transport.h new file mode 100644 index 0000000000..7e62edff2f --- /dev/null +++ b/src/brpc/rdma_transport.h @@ -0,0 +1,65 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_RDMA_TRANSPORT_H +#define BRPC_RDMA_TRANSPORT_H + +#if BRPC_WITH_RDMA +#include "brpc/socket.h" +#include "brpc/channel.h" +#include "brpc/transport.h" + +namespace brpc { +class RdmaTransport : public Transport { + friend class TransportFactory; + friend class rdma::RdmaEndpoint; + friend class rdma::RdmaConnect; +public: + void Init(Socket* socket, const SocketOptions& options) override; + void Release() override; + int Reset(int32_t expected_nref) override; + std::shared_ptr Connect() override; + int CutFromIOBuf(butil::IOBuf* buf) override; + ssize_t CutFromIOBufList(butil::IOBuf** buf, size_t ndata) override; + int WaitEpollOut(butil::atomic* _epollout_butex, bool pollin, const timespec duetime) override; + void ProcessEvent(bthread_attr_t attr) override; + void QueueMessage(InputMessageClosure& inputMsg, int* num_bthread_created, bool last_msg) override; + void Debug(std::ostream &os) override; + rdma::RdmaEndpoint* GetRdmaEp() { + CHECK(_rdma_ep != NULL); + return _rdma_ep; + } + static int ContextInitOrDie(bool serverOrNot, const void* _options); +private: + static bool OptionsAvailableForRdma(const ChannelOptions* opt); + static bool OptionsAvailableOverRdma(const ServerOptions* opt); +private: + // The on/off state of RDMA + enum RdmaState { + RDMA_ON, + RDMA_OFF, + RDMA_UNKNOWN + }; + // The RdmaEndpoint + rdma::RdmaEndpoint* _rdma_ep = NULL; + // Should use RDMA or not + RdmaState _rdma_state; + std::shared_ptr _tcp_transport; +}; +} +#endif // BRPC_WITH_RDMA +#endif //BRPC_RDMA_TRANSPORT_H \ No newline at end of file diff --git a/src/brpc/server.cpp b/src/brpc/server.cpp index 8e2368bcb2..9470220d09 100644 --- a/src/brpc/server.cpp +++ b/src/brpc/server.cpp @@ -81,6 +81,7 @@ #include "brpc/details/tcmalloc_extension.h" #include "brpc/rdma/rdma_helper.h" #include "brpc/baidu_master_service.h" +#include "brpc/transport_factory.h" inline std::ostream& operator<<(std::ostream& os, const timeval& tm) { const char old_fill = os.fill(); @@ -146,7 +147,7 @@ ServerOptions::ServerOptions() , internal_port(-1) , has_builtin_services(true) , force_ssl(false) - , use_rdma(false) + , socket_mode(SOCKET_MODE_TCP) , baidu_master_service(NULL) , http_master_service(NULL) , health_reporter(NULL) @@ -772,27 +773,6 @@ bool Server::CreateConcurrencyLimiter(const AdaptiveMaxConcurrency& amc, return true; } -#if BRPC_WITH_RDMA -static bool OptionsAvailableOverRdma(const ServerOptions* opt) { - if (opt->rtmp_service) { - LOG(WARNING) << "RTMP is not supported by RDMA"; - return false; - } - if (opt->has_ssl_options()) { - LOG(WARNING) << "SSL is not supported by RDMA"; - return false; - } - if (opt->nshead_service) { - LOG(WARNING) << "NSHEAD is not supported by RDMA"; - return false; - } - if (opt->mongo_service_adaptor) { - LOG(WARNING) << "MONGO is not supported by RDMA"; - return false; - } - return true; -} -#endif static AdaptiveMaxConcurrency g_default_max_concurrency_of_method(0); static bool g_default_ignore_eovercrowded(false); @@ -889,20 +869,10 @@ int Server::StartInternal(const butil::EndPoint& endpoint, << FLAGS_task_group_ntags << ")"; return -1; } - - if (_options.use_rdma) { -#if BRPC_WITH_RDMA - if (!OptionsAvailableOverRdma(&_options)) { - return -1; - } - rdma::GlobalRdmaInitializeOrDie(); - if (!rdma::InitPollingModeWithTag(_options.bthread_tag)) { - return -1; - } -#else - LOG(WARNING) << "Cannot use rdma since brpc does not compile with rdma"; + int ret = TransportFactory::ContextInitOrDie(_options.socket_mode, true, &_options); + if (ret != 0) { + LOG(ERROR) << "Fail to initialize transport context for server, ret=" << ret; return -1; -#endif } if (_options.http_master_service) { @@ -1170,7 +1140,7 @@ int Server::StartInternal(const butil::EndPoint& endpoint, LOG(ERROR) << "Fail to build acceptor"; return -1; } - _am->_use_rdma = _options.use_rdma; + _am->_socket_mode = _options.socket_mode; _am->_bthread_tag = _options.bthread_tag; } // Set `_status' to RUNNING before accepting connections diff --git a/src/brpc/server.h b/src/brpc/server.h index c262375c67..9f69a83458 100644 --- a/src/brpc/server.h +++ b/src/brpc/server.h @@ -45,6 +45,7 @@ #include "brpc/concurrency_limiter.h" #include "brpc/baidu_master_service.h" #include "brpc/rpc_pb_message_factory.h" +#include "brpc/socket_mode.h" namespace brpc { @@ -223,9 +224,9 @@ struct ServerOptions { // Force ssl for all connections of the port to Start(). bool force_ssl; - // Whether the server uses rdma or not - // Default: false - bool use_rdma; + // the server socket mode uses tcp or rdma or other + // Default: SOCKET_MODE_TCP + SocketMode socket_mode; // [CAUTION] This option is for implementing specialized baidu-std proxies, // most users don't need it. Don't change this option unless you fully diff --git a/src/brpc/socket.cpp b/src/brpc/socket.cpp index e431aceff9..9b14d43051 100644 --- a/src/brpc/socket.cpp +++ b/src/brpc/socket.cpp @@ -50,8 +50,7 @@ #include "brpc/policy/rtmp_protocol.h" // FIXME #include "brpc/periodic_task.h" #include "brpc/details/health_check.h" -#include "brpc/rdma/rdma_endpoint.h" -#include "brpc/rdma/rdma_helper.h" +#include "brpc/transport_factory.h" #if defined(OS_MACOSX) #include #endif @@ -456,6 +455,7 @@ Socket::Socket(Forbidden f) , _tos(0) , _reset_fd_real_us(-1) , _on_edge_triggered_events(NULL) + , _need_on_edge_trigger(false) , _user(NULL) , _conn(NULL) , _preferred_index(-1) @@ -473,8 +473,8 @@ Socket::Socket(Forbidden f) , _auth_context(NULL) , _ssl_state(SSL_UNKNOWN) , _ssl_session(NULL) - , _rdma_ep(NULL) - , _rdma_state(RDMA_OFF) + , _socket_mode(SOCKET_MODE_TCP) + , _transport(nullptr) , _connection_type_for_progressive_read(CONNECTION_TYPE_UNKNOWN) , _controller_released_socket(false) , _overcrowded(false) @@ -601,7 +601,7 @@ int Socket::ResetFileDescriptor(int fd) { SetSocketOptions(fd); - if (_on_edge_triggered_events) { + if (_transport->HasOnEdgeTrigger()) { if (_io_event.AddConsumer(fd) != 0) { PLOG(ERROR) << "Fail to add SocketId=" << id() << " into EventDispatcher"; @@ -721,6 +721,11 @@ int Socket::OnCreated(const SocketOptions& options) { auto guard = butil::MakeScopeGuard([this] { _io_event.Reset(); }); + // start build the transport + _socket_mode = options.socket_mode; + _transport = TransportFactory::CreateTransport(options.socket_mode); + CHECK(NULL != _transport); + _transport->Init(this, options); g_vars->nsocket << 1; CHECK(NULL == _shared_part.load(butil::memory_order_relaxed)); @@ -731,9 +736,10 @@ int Socket::OnCreated(const SocketOptions& options) { _local_side = options.local_side; _device_name = options.device_name; _on_edge_triggered_events = options.on_edge_triggered_events; + _need_on_edge_trigger = options.need_on_edge_trigger; _user = options.user; _conn = options.conn; - _app_connect = options.app_connect; + _app_connect = _transport->Connect(); _preferred_index = -1; _hc_count = 0; CHECK(_read_buf.empty()); @@ -757,22 +763,6 @@ int Socket::OnCreated(const SocketOptions& options) { _ssl_state = (options.initial_ssl_ctx == NULL ? SSL_OFF : SSL_UNKNOWN); _ssl_session = NULL; _ssl_ctx = options.initial_ssl_ctx; -#if BRPC_WITH_RDMA - CHECK(_rdma_ep == NULL); - if (options.use_rdma) { - _rdma_ep = new (std::nothrow)rdma::RdmaEndpoint(this); - if (!_rdma_ep) { - const int saved_errno = errno; - PLOG(ERROR) << "Fail to create RdmaEndpoint"; - SetFailed(saved_errno, "Fail to create RdmaEndpoint: %s", - berror(saved_errno)); - return -1; - } - _rdma_state = RDMA_UNKNOWN; - } else { - _rdma_state = RDMA_OFF; - } -#endif _connection_type_for_progressive_read = CONNECTION_TYPE_UNKNOWN; _controller_released_socket.store(false, butil::memory_order_relaxed); _overcrowded = false; @@ -852,7 +842,7 @@ void Socket::BeforeRecycled() { }; const int prev_fd = _fd.exchange(-1, butil::memory_order_relaxed); if (ValidFileDescriptor(prev_fd)) { - if (_on_edge_triggered_events != NULL) { + if (_transport->HasOnEdgeTrigger()) { _io_event.RemoveConsumer(prev_fd); } close(prev_fd); @@ -860,15 +850,7 @@ void Socket::BeforeRecycled() { g_vars->channel_conn << -1; } } - -#if BRPC_WITH_RDMA - if (_rdma_ep) { - delete _rdma_ep; - _rdma_ep = NULL; - _rdma_state = RDMA_UNKNOWN; - } -#endif - + _transport->Release(); reset_parsing_context(NULL); _read_buf.clear(); @@ -1013,7 +995,7 @@ int Socket::WaitAndReset(int32_t expected_nref) { // It's safe to close previous fd (provided expected_nref is correct). const int prev_fd = _fd.exchange(-1, butil::memory_order_relaxed); if (ValidFileDescriptor(prev_fd)) { - if (_on_edge_triggered_events != NULL) { + if (_transport->HasOnEdgeTrigger()) { _io_event.RemoveConsumer(prev_fd); } close(prev_fd); @@ -1021,13 +1003,7 @@ int Socket::WaitAndReset(int32_t expected_nref) { g_vars->channel_conn << -1; } } - -#if BRPC_WITH_RDMA - if (_rdma_ep) { - _rdma_ep->Reset(); - _rdma_state = RDMA_UNKNOWN; - } -#endif + _transport->Reset(expected_nref); _local_side = butil::EndPoint(); if (_ssl_session) { @@ -1181,13 +1157,6 @@ int Socket::Status(SocketId id, int32_t* nref) { return -1; } -void* Socket::ProcessEvent(void* arg) { - // the enclosed Socket is valid and free to access inside this function. - SocketUniquePtr s(static_cast(arg)); - s->_on_edge_triggered_events(s.get()); - return NULL; -} - // Check if there're new requests appended. // If yes, point old_head to reversed new requests and return false; // If no: @@ -1771,16 +1740,7 @@ int Socket::StartWrite(WriteRequest* req, const WriteOptions& opt) { butil::IOBuf* data_arr[1] = { &req->data }; nw = _conn->CutMessageIntoFileDescriptor(fd(), data_arr, 1); } else { -#if BRPC_WITH_RDMA - if (_rdma_ep && _rdma_state != RDMA_OFF) { - butil::IOBuf* data_arr[1] = { &req->data }; - nw = _rdma_ep->CutFromIOBufList(data_arr, 1); - } else { -#else - { -#endif - nw = req->data.cut_into_file_descriptor(fd()); - } + nw = _transport->CutFromIOBuf(&req->data); } if (nw < 0) { // RTMP may return EOVERCROWDED @@ -1882,45 +1842,11 @@ void* Socket::KeepWrite(void* void_arg) { // which may turn on _overcrowded to stop pending requests from // growing infinitely. const timespec duetime = - butil::milliseconds_from_now(WAIT_EPOLLOUT_TIMEOUT_MS); -#if BRPC_WITH_RDMA - if (s->_rdma_state == RDMA_ON) { - const int expected_val = s->_epollout_butex - ->load(butil::memory_order_acquire); - CHECK(s->_rdma_ep != NULL); - if (!s->_rdma_ep->IsWritable()) { - g_vars->nwaitepollout << 1; - if (bthread::butex_wait(s->_epollout_butex, - expected_val, &duetime) < 0) { - if (errno != EAGAIN && errno != ETIMEDOUT) { - const int saved_errno = errno; - PLOG(WARNING) << "Fail to wait rdma window of " << *s; - s->SetFailed(saved_errno, "Fail to wait rdma window of %s: %s", - s->description().c_str(), berror(saved_errno)); - } - if (s->Failed()) { - // NOTE: - // Different from TCP, we cannot find the RDMA channel - // failed by writing to it. Thus we must check if it - // is already failed here. - break; - } - } - } - } else { -#else - { -#endif - g_vars->nwaitepollout << 1; - bool pollin = (s->_on_edge_triggered_events != NULL); - const int rc = s->WaitEpollOut(s->fd(), pollin, &duetime); - if (rc < 0 && errno != ETIMEDOUT) { - const int saved_errno = errno; - PLOG(WARNING) << "Fail to wait epollout of " << *s; - s->SetFailed(saved_errno, "Fail to wait epollout of %s: %s", - s->description().c_str(), berror(saved_errno)); - break; - } + butil::milliseconds_from_now(WAIT_EPOLLOUT_TIMEOUT_MS); + bool pollin = s->_transport->HasOnEdgeTrigger(); + int ret = s->_transport->WaitEpollOut(s->_epollout_butex, pollin, duetime); + if (ret == 1) { + break; } } if (NULL == cur_tail) { @@ -1960,13 +1886,7 @@ ssize_t Socket::DoWrite(WriteRequest* req) { if (_conn) { return _conn->CutMessageIntoFileDescriptor(fd(), data_list, ndata); } else { -#if BRPC_WITH_RDMA - if (_rdma_ep && _rdma_state != RDMA_OFF) { - return _rdma_ep->CutFromIOBufList(data_list, ndata); - } -#endif - return butil::IOBuf::cut_multiple_into_file_descriptor( - fd(), data_list, ndata); + return _transport->CutFromIOBufList(data_list, ndata); } } @@ -2155,7 +2075,6 @@ ssize_t Socket::DoRead(size_t size_hint) { errno = ESSL; return -1; } - CHECK(_rdma_state == RDMA_OFF); return _read_buf.append_from_file_descriptor(fd(), size_hint); } @@ -2257,7 +2176,7 @@ int Socket::OnInputEvent(void* user_data, uint32_t events, if (Address(id, &s) < 0) { return -1; } - if (NULL == s->_on_edge_triggered_events) { + if (!s->_transport->HasOnEdgeTrigger()) { // Callback can be NULL when receiving error epoll events // (Added into epoll by `WaitConnected') return 0; @@ -2283,28 +2202,15 @@ int Socket::OnInputEvent(void* user_data, uint32_t events, // is just 1500~1700/s g_vars->neventthread << 1; - bthread_t tid; // transfer ownership as well, don't use s anymore! Socket* const p = s.release(); bthread_attr_t attr = thread_attr; attr.keytable_pool = p->_keytable_pool; attr.tag = bthread_self_tag(); - bthread_attr_set_name(&attr, "ProcessEvent"); - if (FLAGS_usercode_in_coroutine) { - ProcessEvent(p); -#if BRPC_WITH_RDMA - } else if (rdma::FLAGS_rdma_edisp_unsched) { - auto rc = bthread_start_background(&tid, &attr, ProcessEvent, p); - if (rc != 0) { - LOG(FATAL) << "Fail to start ProcessEvent"; - ProcessEvent(p); - } -#endif - } else if (bthread_start_urgent(&tid, &attr, ProcessEvent, p) != 0) { - LOG(FATAL) << "Fail to start ProcessEvent"; - ProcessEvent(p); - } + // Only event dispatcher thread has flag BTHREAD_GLOBAL_PRIORITY + attr.flags = attr.flags & (~BTHREAD_GLOBAL_PRIORITY); + p->_transport->ProcessEvent(attr); } return 0; } @@ -2606,11 +2512,7 @@ void Socket::DebugSocket(std::ostream& os, SocketId id) { << "\n}"; } #endif -#if BRPC_WITH_RDMA - if (ptr->_rdma_state == RDMA_ON && ptr->_rdma_ep) { - ptr->_rdma_ep->DebugInfo(os); - } -#endif + ptr->_transport->Debug(os); { os << "\nbthread_tag=" << ptr->_io_event.bthread_tag(); } } @@ -2833,10 +2735,11 @@ int Socket::GetPooledSocket(SocketUniquePtr* pooled_socket) { opt.local_side = butil::EndPoint(local_side().ip, 0); opt.user = user(); opt.on_edge_triggered_events = _on_edge_triggered_events; + opt.need_on_edge_trigger = _need_on_edge_trigger; opt.initial_ssl_ctx = _ssl_ctx; opt.keytable_pool = _keytable_pool; opt.app_connect = _app_connect; - opt.use_rdma = (_rdma_ep) ? true : false; + opt.socket_mode = _socket_mode; socket_pool = new SocketPool(opt); SocketPool* expected = NULL; if (!main_sp->socket_pool.compare_exchange_strong( @@ -2935,10 +2838,11 @@ int Socket::GetShortSocket(SocketUniquePtr* short_socket) { opt.local_side = butil::EndPoint(local_side().ip, 0); opt.user = user(); opt.on_edge_triggered_events = _on_edge_triggered_events; + opt.need_on_edge_trigger = _need_on_edge_trigger; opt.initial_ssl_ctx = _ssl_ctx; opt.keytable_pool = _keytable_pool; opt.app_connect = _app_connect; - opt.use_rdma = (_rdma_ep) ? true : false; + opt.socket_mode = _socket_mode; if (get_client_side_messenger()->Create(opt, &id) != 0 || Address(id, short_socket) != 0) { return -1; diff --git a/src/brpc/socket.h b/src/brpc/socket.h index a3e2323056..c2f751e367 100644 --- a/src/brpc/socket.h +++ b/src/brpc/socket.h @@ -42,6 +42,7 @@ #include "brpc/event_dispatcher.h" #include "brpc/versioned_ref_with_id.h" #include "brpc/health_check_option.h" +#include "brpc/socket_mode.h" namespace brpc { namespace policy { @@ -61,6 +62,7 @@ class Socket; class AuthContext; class EventDispatcher; class Stream; +class Transport; // A special closure for processing the about-to-recycle socket. Socket does // not delete SocketUser, if you want, `delete this' at the end of @@ -268,11 +270,20 @@ struct SocketOptions { // until new data arrives. The callback will not be called from more than // one thread at any time. void (*on_edge_triggered_events)(Socket*){NULL}; + // Indicates that this socket requires an edge-triggered event handler even + // if `on_edge_triggered_events` is left as NULL by the caller. When this + // flag is true and `on_edge_triggered_events` is NULL, the underlying + // transport-specific implementation (e.g. a transport subclass) is allowed + // to install a suitable default `on_edge_triggered_events` callback on + // behalf of the user. Typical usage is by transports/protocols that rely + // on edge-triggered I/O semantics but want the framework to provide the + // actual event handler. + bool need_on_edge_trigger{false}; int health_check_interval_s{-1}; // Only accept ssl connection. bool force_ssl{false}; std::shared_ptr initial_ssl_ctx; - bool use_rdma{false}; + SocketMode socket_mode{SOCKET_MODE_TCP}; bthread_keytable_pool_t* keytable_pool{NULL}; SocketConnection* conn{NULL}; std::shared_ptr app_connect; @@ -313,6 +324,10 @@ friend class policy::H2GlobalStreamCreator; friend class VersionedRefWithId; friend class IOEvent; friend void DereferenceSocket(Socket*); +friend class Transport; +friend class TcpTransport; +friend class RdmaTransport; +friend class TransportFactory; class SharedPart; struct WriteRequest; @@ -650,13 +665,6 @@ friend void DereferenceSocket(Socket*); private: DISALLOW_COPY_AND_ASSIGN(Socket); - // The on/off state of RDMA - enum RdmaState { - RDMA_ON, - RDMA_OFF, - RDMA_UNKNOWN - }; - int ConductError(bthread_id_t); int StartWrite(WriteRequest*, const WriteOptions&); @@ -732,7 +740,6 @@ friend void DereferenceSocket(Socket*); // Wait until nref hits `expected_nref' and reset some internal resources. int WaitAndReset(int32_t expected_nref); - static void* ProcessEvent(void*); static void* KeepWrite(void*); @@ -839,7 +846,7 @@ friend void DereferenceSocket(Socket*); // of EventDispatcher::AddConsumer (event_dispatcher.h) // carefully before implementing the callback. void (*_on_edge_triggered_events)(Socket*); - + bool _need_on_edge_trigger; // A set of callbacks to monitor important events of this socket. // Initialized by SocketOptions.user SocketUser* _user; @@ -918,10 +925,9 @@ friend void DereferenceSocket(Socket*); SSL* _ssl_session; // owner std::shared_ptr _ssl_ctx; - // The RdmaEndpoint - rdma::RdmaEndpoint* _rdma_ep; - // Should use RDMA or not - RdmaState _rdma_state; + // Should use SOCKET_MODE_RDMA or SOCKET_MODE_TCP or Other, default is SOCKET_MODE_TCP Transport + SocketMode _socket_mode{SOCKET_MODE_TCP}; + std::shared_ptr _transport; // Pass from controller, for progressive reading. ConnectionType _connection_type_for_progressive_read; diff --git a/src/brpc/socket_map.h b/src/brpc/socket_map.h index 7cf0880498..b1922bf86e 100644 --- a/src/brpc/socket_map.h +++ b/src/brpc/socket_map.h @@ -84,12 +84,12 @@ int SocketMapInsert(const SocketMapKey& key, SocketId* id, inline int SocketMapInsert(const SocketMapKey& key, SocketId* id, const std::shared_ptr& ssl_ctx, - bool use_rdma, + SocketMode socket_mode, const HealthCheckOption& hc_option) { SocketOptions opt; opt.remote_side = key.peer.addr; opt.initial_ssl_ctx = ssl_ctx; - opt.use_rdma = use_rdma; + opt.socket_mode = socket_mode; opt.hc_option = hc_option; return SocketMapInsert(key, id, opt); } @@ -97,13 +97,13 @@ inline int SocketMapInsert(const SocketMapKey& key, SocketId* id, inline int SocketMapInsert(const SocketMapKey& key, SocketId* id, const std::shared_ptr& ssl_ctx) { HealthCheckOption hc_option; - return SocketMapInsert(key, id, ssl_ctx, false, hc_option); + return SocketMapInsert(key, id, ssl_ctx, SOCKET_MODE_TCP, hc_option); } inline int SocketMapInsert(const SocketMapKey& key, SocketId* id) { std::shared_ptr empty_ptr; HealthCheckOption hc_option; - return SocketMapInsert(key, id, empty_ptr, false, hc_option); + return SocketMapInsert(key, id, empty_ptr, SOCKET_MODE_TCP, hc_option); } // Find the SocketId associated with `key'. @@ -164,12 +164,12 @@ class SocketMap { int Init(const SocketMapOptions&); int Insert(const SocketMapKey& key, SocketId* id, const std::shared_ptr& ssl_ctx, - bool use_rdma, + SocketMode socket_mode, const HealthCheckOption& hc_option) { SocketOptions opt; opt.remote_side = key.peer.addr; opt.initial_ssl_ctx = ssl_ctx; - opt.use_rdma = use_rdma; + opt.socket_mode = socket_mode; opt.hc_option = hc_option; return Insert(key, id, opt); } @@ -177,12 +177,12 @@ class SocketMap { int Insert(const SocketMapKey& key, SocketId* id, const std::shared_ptr& ssl_ctx) { HealthCheckOption hc_option; - return Insert(key, id, ssl_ctx, false, hc_option); + return Insert(key, id, ssl_ctx, SOCKET_MODE_TCP, hc_option); } int Insert(const SocketMapKey& key, SocketId* id) { std::shared_ptr empty_ptr; HealthCheckOption hc_option; - return Insert(key, id, empty_ptr, false, hc_option); + return Insert(key, id, empty_ptr, SOCKET_MODE_TCP, hc_option); } int Insert(const SocketMapKey& key, SocketId* id, SocketOptions& opt); diff --git a/src/brpc/socket_mode.h b/src/brpc/socket_mode.h new file mode 100644 index 0000000000..8bce01890b --- /dev/null +++ b/src/brpc/socket_mode.h @@ -0,0 +1,26 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_COMMON_H +#define BRPC_COMMON_H +namespace brpc { +enum SocketMode { + SOCKET_MODE_TCP = 0, + SOCKET_MODE_RDMA = 1 +}; +} +#endif //BRPC_COMMON_H \ No newline at end of file diff --git a/src/brpc/tcp_transport.cpp b/src/brpc/tcp_transport.cpp new file mode 100644 index 0000000000..49c6f68d64 --- /dev/null +++ b/src/brpc/tcp_transport.cpp @@ -0,0 +1,94 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "tcp_transport.h" +namespace brpc { +DECLARE_bool(usercode_in_coroutine); +DECLARE_bool(usercode_in_pthread); + +extern SocketVarsCollector* g_vars; + +void TcpTransport::Init(Socket* socket, const SocketOptions& options) { + _socket = socket; + _default_connect = options.app_connect; + _on_edge_trigger = options.on_edge_triggered_events; + if (options.need_on_edge_trigger && _on_edge_trigger == NULL) { + _on_edge_trigger = InputMessenger::OnNewMessages; + } +} + +void TcpTransport::Release(){} + +int TcpTransport::Reset(int32_t expected_nref) { + return 0; +} + +int TcpTransport::CutFromIOBuf(butil::IOBuf* buf) { + return buf->cut_into_file_descriptor(_socket->fd()); +} + +std::shared_ptr TcpTransport::Connect() { + return _default_connect; +} + +ssize_t TcpTransport::CutFromIOBufList(butil::IOBuf** buf, size_t ndata) { + return butil::IOBuf::cut_multiple_into_file_descriptor(_socket->fd(), buf, ndata); +} + +int TcpTransport::WaitEpollOut(butil::atomic* _epollout_butex, bool pollin, const timespec duetime) { + g_vars->nwaitepollout << 1; + const int rc = _socket->WaitEpollOut(_socket->fd(), pollin, &duetime); + if (rc < 0 && errno != ETIMEDOUT) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to wait epollout of " << _socket; + _socket->SetFailed(saved_errno, "Fail to wait epollout of %s: %s", + _socket->description().c_str(), berror(saved_errno)); + return 1; + } + return 0; +} + +void TcpTransport::ProcessEvent(bthread_attr_t attr) { + bthread_t tid; + if (FLAGS_usercode_in_coroutine) { + OnEdge(_socket); + } else if (bthread_start_urgent(&tid, &attr, OnEdge, _socket) != 0) { + LOG(FATAL) << "Fail to start ProcessEvent"; + OnEdge(_socket); + } +} +void TcpTransport::QueueMessage(InputMessageClosure& input_msg, int* num_bthread_created, bool last_msg) { + InputMessageBase* to_run_msg = input_msg.release(); + if (!to_run_msg) { + return; + } + // Create bthread for last_msg. The bthread is not scheduled + // until bthread_flush() is called (in the worse case). + bthread_t th; + bthread_attr_t tmp = (FLAGS_usercode_in_pthread ? BTHREAD_ATTR_PTHREAD : BTHREAD_ATTR_NORMAL) | BTHREAD_NOSIGNAL; + tmp.keytable_pool = _socket->keytable_pool(); + tmp.tag = bthread_self_tag(); + bthread_attr_set_name(&tmp, "ProcessInputMessage"); + if (!FLAGS_usercode_in_coroutine && bthread_start_background( + &th, &tmp, ProcessInputMessage, to_run_msg) == 0) { + ++*num_bthread_created; + } else { + ProcessInputMessage(to_run_msg); + } +} +void TcpTransport::Debug(std::ostream &os) {} +} \ No newline at end of file diff --git a/src/brpc/tcp_transport.h b/src/brpc/tcp_transport.h new file mode 100644 index 0000000000..b8c6b5e6c7 --- /dev/null +++ b/src/brpc/tcp_transport.h @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_TCP_TRANSPORT_H +#define BRPC_TCP_TRANSPORT_H + +#include "brpc/transport.h" +#include "brpc/socket.h" + +namespace brpc { +class TcpTransport : public Transport { + friend class TransportFactory; +public: + void Init(Socket* socket, const SocketOptions& options) override; + void Release() override; + int Reset(int32_t expected_nref) override; + std::shared_ptr Connect() override; + int CutFromIOBuf(butil::IOBuf* buf) override; + ssize_t CutFromIOBufList(butil::IOBuf** buf, size_t ndata) override; + int WaitEpollOut(butil::atomic* _epollout_butex, bool pollin, const timespec duetime) override; + void ProcessEvent(bthread_attr_t attr) override; + void QueueMessage(InputMessageClosure& input_msg, int* num_bthread_created, bool last_msg) override; + void Debug(std::ostream &os) override; +}; +} + +#endif //BRPC_TCP_TRANSPORT_H \ No newline at end of file diff --git a/src/brpc/transport.h b/src/brpc/transport.h new file mode 100644 index 0000000000..ca8985087f --- /dev/null +++ b/src/brpc/transport.h @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_TRANSPORT_H +#define BRPC_TRANSPORT_H +#include "brpc/input_messenger.h" +#include "brpc/socket.h" +#include "server.h" + +namespace brpc { +using OnEdgeTrigger = std::function; +class Transport { + friend class TransportFactory; +public: + static void* OnEdge(void* arg) { + // the enclosed Socket is valid and free to access inside this function. + SocketUniquePtr s(static_cast(arg)); + const OnEdgeTrigger on_edge_trigger = s->_transport->GetOnEdgeTrigger(); + on_edge_trigger(s.get()); + return NULL; + } + + static void* ProcessInputMessage(void* void_arg) { + InputMessageBase* msg = static_cast(void_arg); + msg->_process(msg); + return NULL; + } + virtual ~Transport() = default; + virtual void Init(Socket* socket, const SocketOptions& options) = 0; + virtual void Release() = 0; + virtual int Reset(int32_t expected_nref) = 0; + virtual std::shared_ptr Connect() = 0; + virtual int CutFromIOBuf(butil::IOBuf* buf) = 0; + virtual ssize_t CutFromIOBufList(butil::IOBuf** buf, size_t ndata) = 0; + virtual int WaitEpollOut(butil::atomic* _epollout_butex, bool pollin, const timespec duetime) = 0; + virtual void ProcessEvent(bthread_attr_t attr) = 0; + virtual void QueueMessage(InputMessageClosure& input_msg, int* num_bthread_created, bool last_msg) = 0; + virtual void Debug(std::ostream &os) = 0; + + bool HasOnEdgeTrigger() { + return _on_edge_trigger != NULL; + } + OnEdgeTrigger GetOnEdgeTrigger() { + return _on_edge_trigger; + } +protected: + Socket* _socket; + std::shared_ptr _default_connect; + OnEdgeTrigger _on_edge_trigger; +}; +} +#endif //BRPC_TRANSPORT_H \ No newline at end of file diff --git a/src/brpc/transport_factory.cpp b/src/brpc/transport_factory.cpp new file mode 100644 index 0000000000..b29a5e6dc8 --- /dev/null +++ b/src/brpc/transport_factory.cpp @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "transport_factory.h" +#include "brpc/tcp_transport.h" +#include "brpc/rdma_transport.h" +namespace brpc { +int TransportFactory::ContextInitOrDie(SocketMode mode, bool serverOrNot, const void* _options) { + if (mode == SOCKET_MODE_TCP) { + return 0; + } +#if BRPC_WITH_RDMA + else if (mode == SOCKET_MODE_RDMA) { + return RdmaTransport::ContextInitOrDie(serverOrNot, _options); + } +#endif + else { + LOG(ERROR) << "unknown transport type " << mode; + return 1; + } +} + +std::shared_ptr TransportFactory::CreateTransport(SocketMode mode) { + if (mode == SOCKET_MODE_TCP) { + return std::unique_ptr(new TcpTransport()); + } +#if BRPC_WITH_RDMA + else if (mode == SOCKET_MODE_RDMA) { + return std::unique_ptr(new RdmaTransport()); + } +#endif + else { + LOG(ERROR) << "socket_mode set error"; + return nullptr; + } +} +} // namespace brpc \ No newline at end of file diff --git a/src/brpc/transport_factory.h b/src/brpc/transport_factory.h new file mode 100644 index 0000000000..bdbf4c2be4 --- /dev/null +++ b/src/brpc/transport_factory.h @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_TRANSPORT_FACTORY_H +#define BRPC_TRANSPORT_FACTORY_H + +#include "brpc/errno.pb.h" +#include "brpc/socket_mode.h" +#include "brpc/transport.h" + +#if BRPC_WITH_RDMA +BAIDU_REGISTER_ERRNO(brpc::ERDMA, "RDMA verbs error"); +BAIDU_REGISTER_ERRNO(brpc::ERDMAMEM, "Memory not registered for RDMA"); +#endif + +namespace brpc { +// TransportFactory to create transport instance with socket_mode {TCP, RDMA} +class TransportFactory { +public: + static int ContextInitOrDie(SocketMode mode, bool serverOrNot, const void* _options); + // create transport instance with socket mode + static std::shared_ptr CreateTransport(SocketMode mode); +}; +} + +#endif //BRPC_TRANSPORT_FACTORY_H \ No newline at end of file From 76f8c3212f9dbebacac89572a59fd216ee9c923e Mon Sep 17 00:00:00 2001 From: Searion <33591311+Searion@users.noreply.github.com> Date: Tue, 27 Jan 2026 19:26:11 +0800 Subject: [PATCH 22/84] Fix build error caused by CMAKE_CXX_FLAGS (#3204) Co-authored-by: Haigang Xi --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b10991f5f5..77703a4661 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -146,7 +146,7 @@ if(WITH_MESALINK) endif() set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} -DBTHREAD_USE_FAST_PTHREAD_MUTEX -D__const__=__unused__ -D_GNU_SOURCE -DUSE_SYMBOLIZE -DNO_TCMALLOC -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -DBRPC_REVISION=\\\"${BRPC_REVISION}\\\" -D__STRICT_ANSI__") set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} ${DEBUG_SYMBOL} ${THRIFT_CPP_FLAG}") -set(CMAKE_CXX_FLAGS "${CMAKE_CPP_FLAGS} -O2 -pipe -Wall -W -fPIC -fstrict-aliasing -Wno-invalid-offsetof -Wno-unused-parameter -fno-omit-frame-pointer") +set(CMAKE_CXX_FLAGS "${CMAKE_CPP_FLAGS} ${CMAKE_CXX_FLAGS} -O2 -pipe -Wall -W -fPIC -fstrict-aliasing -Wno-invalid-offsetof -Wno-unused-parameter -fno-omit-frame-pointer") set(CMAKE_C_FLAGS "${CMAKE_CPP_FLAGS} -O2 -pipe -Wall -W -fPIC -fstrict-aliasing -Wno-unused-parameter -fno-omit-frame-pointer") macro(use_cxx11) From 94aadc1fa3ace8c189cd466b43b69a8b98a296fe Mon Sep 17 00:00:00 2001 From: Bright Chen Date: Thu, 29 Jan 2026 10:13:08 +0800 Subject: [PATCH 23/84] Fix some issues with transport (#3206) 1. The return value of CreateTransport should be std::unique_ptr. 2. Delete BAIDU_REGISTER_ERRNO in transport_factory.h. 3. Optimize some code formatting. --- src/brpc/input_messenger.cpp | 3 ++- src/brpc/rdma_transport.cpp | 18 +++++++++--------- src/brpc/rdma_transport.h | 2 +- src/brpc/socket.cpp | 1 - src/brpc/socket.h | 4 ++-- src/brpc/socket_mode.h | 8 ++++---- src/brpc/tcp_transport.cpp | 19 ++++++++++++------- src/brpc/tcp_transport.h | 6 +++--- src/brpc/transport.h | 2 +- src/brpc/transport_factory.cpp | 5 +++-- src/brpc/transport_factory.h | 12 +++--------- 11 files changed, 40 insertions(+), 40 deletions(-) diff --git a/src/brpc/input_messenger.cpp b/src/brpc/input_messenger.cpp index 925c8776f9..c249cca22c 100644 --- a/src/brpc/input_messenger.cpp +++ b/src/brpc/input_messenger.cpp @@ -377,7 +377,8 @@ void InputMessenger::OnNewMessages(Socket* m) { } } - if (messenger->ProcessNewMessage(m, nr, read_eof, received_us, base_realtime, last_msg) < 0) { + if (messenger->ProcessNewMessage(m, nr, read_eof, received_us, + base_realtime, last_msg) < 0) { return; } } diff --git a/src/brpc/rdma_transport.cpp b/src/brpc/rdma_transport.cpp index d980c5a0fc..8fe88c6b4b 100644 --- a/src/brpc/rdma_transport.cpp +++ b/src/brpc/rdma_transport.cpp @@ -35,8 +35,8 @@ void RdmaTransport::Init(Socket *socket, const SocketOptions &options) { if (!_rdma_ep) { const int saved_errno = errno; PLOG(ERROR) << "Fail to create RdmaEndpoint"; - socket->SetFailed(saved_errno, "Fail to create RdmaEndpoint: %s", - berror(saved_errno)); + socket->SetFailed( + saved_errno, "Fail to create RdmaEndpoint: %s", berror(saved_errno)); } _rdma_state = RDMA_UNKNOWN; } else { @@ -95,8 +95,7 @@ ssize_t RdmaTransport::CutFromIOBufList(butil::IOBuf **buf, size_t ndata) { int RdmaTransport::WaitEpollOut(butil::atomic *_epollout_butex, bool pollin, const timespec duetime) { if (_rdma_state == RDMA_ON) { - const int expected_val = _epollout_butex - ->load(butil::memory_order_acquire); + const int expected_val = _epollout_butex->load(butil::memory_order_acquire); CHECK(_rdma_ep != NULL); if (!_rdma_ep->IsWritable()) { g_vars->nwaitepollout << 1; @@ -105,9 +104,9 @@ int RdmaTransport::WaitEpollOut(butil::atomic *_epollout_butex, const int saved_errno = errno; PLOG(WARNING) << "Fail to wait rdma window of " << _socket; _socket->SetFailed(saved_errno, - "Fail to wait rdma window of %s: %s", - _socket->description().c_str(), - berror(saved_errno)); + "Fail to wait rdma window of %s: %s", + _socket->description().c_str(), + berror(saved_errno)); } if (_socket->Failed()) { // NOTE: @@ -140,7 +139,8 @@ void RdmaTransport::ProcessEvent(bthread_attr_t attr) { } } -void RdmaTransport::QueueMessage(InputMessageClosure& input_msg, int* num_bthread_created, bool last_msg) { +void RdmaTransport::QueueMessage(InputMessageClosure& input_msg, + int* num_bthread_created, bool last_msg) { if (last_msg && !rdma::FLAGS_rdma_use_polling) { return; } @@ -234,5 +234,5 @@ bool RdmaTransport::OptionsAvailableOverRdma(const ServerOptions* opt) { } return true; } -} +} // namespace brpc #endif \ No newline at end of file diff --git a/src/brpc/rdma_transport.h b/src/brpc/rdma_transport.h index 7e62edff2f..65ae88f7a6 100644 --- a/src/brpc/rdma_transport.h +++ b/src/brpc/rdma_transport.h @@ -60,6 +60,6 @@ class RdmaTransport : public Transport { RdmaState _rdma_state; std::shared_ptr _tcp_transport; }; -} +} // namespace brpc #endif // BRPC_WITH_RDMA #endif //BRPC_RDMA_TRANSPORT_H \ No newline at end of file diff --git a/src/brpc/socket.cpp b/src/brpc/socket.cpp index 9b14d43051..b132f2acea 100644 --- a/src/brpc/socket.cpp +++ b/src/brpc/socket.cpp @@ -474,7 +474,6 @@ Socket::Socket(Forbidden f) , _ssl_state(SSL_UNKNOWN) , _ssl_session(NULL) , _socket_mode(SOCKET_MODE_TCP) - , _transport(nullptr) , _connection_type_for_progressive_read(CONNECTION_TYPE_UNKNOWN) , _controller_released_socket(false) , _overcrowded(false) diff --git a/src/brpc/socket.h b/src/brpc/socket.h index c2f751e367..816fccdf27 100644 --- a/src/brpc/socket.h +++ b/src/brpc/socket.h @@ -926,8 +926,8 @@ friend class TransportFactory; std::shared_ptr _ssl_ctx; // Should use SOCKET_MODE_RDMA or SOCKET_MODE_TCP or Other, default is SOCKET_MODE_TCP Transport - SocketMode _socket_mode{SOCKET_MODE_TCP}; - std::shared_ptr _transport; + SocketMode _socket_mode; + std::unique_ptr _transport; // Pass from controller, for progressive reading. ConnectionType _connection_type_for_progressive_read; diff --git a/src/brpc/socket_mode.h b/src/brpc/socket_mode.h index 8bce01890b..b5d42be4aa 100644 --- a/src/brpc/socket_mode.h +++ b/src/brpc/socket_mode.h @@ -15,12 +15,12 @@ // specific language governing permissions and limitations // under the License. -#ifndef BRPC_COMMON_H -#define BRPC_COMMON_H +#ifndef BRPC_SOCKET_MODE_H +#define BRPC_SOCKET_MODE_H namespace brpc { enum SocketMode { SOCKET_MODE_TCP = 0, SOCKET_MODE_RDMA = 1 }; -} -#endif //BRPC_COMMON_H \ No newline at end of file +} // namespace brpc +#endif //BRPC_SOCKET_MODE_H \ No newline at end of file diff --git a/src/brpc/tcp_transport.cpp b/src/brpc/tcp_transport.cpp index 49c6f68d64..37db7a8966 100644 --- a/src/brpc/tcp_transport.cpp +++ b/src/brpc/tcp_transport.cpp @@ -15,7 +15,8 @@ // specific language governing permissions and limitations // under the License. -#include "tcp_transport.h" +#include "brpc/tcp_transport.h" + namespace brpc { DECLARE_bool(usercode_in_coroutine); DECLARE_bool(usercode_in_pthread); @@ -49,14 +50,15 @@ ssize_t TcpTransport::CutFromIOBufList(butil::IOBuf** buf, size_t ndata) { return butil::IOBuf::cut_multiple_into_file_descriptor(_socket->fd(), buf, ndata); } -int TcpTransport::WaitEpollOut(butil::atomic* _epollout_butex, bool pollin, const timespec duetime) { +int TcpTransport::WaitEpollOut(butil::atomic* _epollout_butex, + bool pollin, timespec duetime) { g_vars->nwaitepollout << 1; const int rc = _socket->WaitEpollOut(_socket->fd(), pollin, &duetime); if (rc < 0 && errno != ETIMEDOUT) { const int saved_errno = errno; PLOG(WARNING) << "Fail to wait epollout of " << _socket; _socket->SetFailed(saved_errno, "Fail to wait epollout of %s: %s", - _socket->description().c_str(), berror(saved_errno)); + _socket->description().c_str(), berror(saved_errno)); return 1; } return 0; @@ -71,7 +73,8 @@ void TcpTransport::ProcessEvent(bthread_attr_t attr) { OnEdge(_socket); } } -void TcpTransport::QueueMessage(InputMessageClosure& input_msg, int* num_bthread_created, bool last_msg) { +void TcpTransport::QueueMessage(InputMessageClosure& input_msg, + int* num_bthread_created, bool) { InputMessageBase* to_run_msg = input_msg.release(); if (!to_run_msg) { return; @@ -79,7 +82,9 @@ void TcpTransport::QueueMessage(InputMessageClosure& input_msg, int* num_bthread // Create bthread for last_msg. The bthread is not scheduled // until bthread_flush() is called (in the worse case). bthread_t th; - bthread_attr_t tmp = (FLAGS_usercode_in_pthread ? BTHREAD_ATTR_PTHREAD : BTHREAD_ATTR_NORMAL) | BTHREAD_NOSIGNAL; + bthread_attr_t tmp = + (FLAGS_usercode_in_pthread ? BTHREAD_ATTR_PTHREAD : BTHREAD_ATTR_NORMAL) | + BTHREAD_NOSIGNAL; tmp.keytable_pool = _socket->keytable_pool(); tmp.tag = bthread_self_tag(); bthread_attr_set_name(&tmp, "ProcessInputMessage"); @@ -90,5 +95,5 @@ void TcpTransport::QueueMessage(InputMessageClosure& input_msg, int* num_bthread ProcessInputMessage(to_run_msg); } } -void TcpTransport::Debug(std::ostream &os) {} -} \ No newline at end of file + +} // namespace brpc \ No newline at end of file diff --git a/src/brpc/tcp_transport.h b/src/brpc/tcp_transport.h index b8c6b5e6c7..8a06a85d37 100644 --- a/src/brpc/tcp_transport.h +++ b/src/brpc/tcp_transport.h @@ -31,11 +31,11 @@ class TcpTransport : public Transport { std::shared_ptr Connect() override; int CutFromIOBuf(butil::IOBuf* buf) override; ssize_t CutFromIOBufList(butil::IOBuf** buf, size_t ndata) override; - int WaitEpollOut(butil::atomic* _epollout_butex, bool pollin, const timespec duetime) override; + int WaitEpollOut(butil::atomic* _epollout_butex, bool pollin, timespec duetime) override; void ProcessEvent(bthread_attr_t attr) override; void QueueMessage(InputMessageClosure& input_msg, int* num_bthread_created, bool last_msg) override; - void Debug(std::ostream &os) override; + void Debug(std::ostream &os) override {} }; -} +} // namespace brpc #endif //BRPC_TCP_TRANSPORT_H \ No newline at end of file diff --git a/src/brpc/transport.h b/src/brpc/transport.h index ca8985087f..a2cb868b89 100644 --- a/src/brpc/transport.h +++ b/src/brpc/transport.h @@ -46,7 +46,7 @@ class Transport { virtual std::shared_ptr Connect() = 0; virtual int CutFromIOBuf(butil::IOBuf* buf) = 0; virtual ssize_t CutFromIOBufList(butil::IOBuf** buf, size_t ndata) = 0; - virtual int WaitEpollOut(butil::atomic* _epollout_butex, bool pollin, const timespec duetime) = 0; + virtual int WaitEpollOut(butil::atomic* _epollout_butex, bool pollin, timespec duetime) = 0; virtual void ProcessEvent(bthread_attr_t attr) = 0; virtual void QueueMessage(InputMessageClosure& input_msg, int* num_bthread_created, bool last_msg) = 0; virtual void Debug(std::ostream &os) = 0; diff --git a/src/brpc/transport_factory.cpp b/src/brpc/transport_factory.cpp index b29a5e6dc8..b689e2edd2 100644 --- a/src/brpc/transport_factory.cpp +++ b/src/brpc/transport_factory.cpp @@ -15,9 +15,10 @@ // specific language governing permissions and limitations // under the License. -#include "transport_factory.h" +#include "brpc/transport_factory.h" #include "brpc/tcp_transport.h" #include "brpc/rdma_transport.h" + namespace brpc { int TransportFactory::ContextInitOrDie(SocketMode mode, bool serverOrNot, const void* _options) { if (mode == SOCKET_MODE_TCP) { @@ -34,7 +35,7 @@ int TransportFactory::ContextInitOrDie(SocketMode mode, bool serverOrNot, const } } -std::shared_ptr TransportFactory::CreateTransport(SocketMode mode) { +std::unique_ptr TransportFactory::CreateTransport(SocketMode mode) { if (mode == SOCKET_MODE_TCP) { return std::unique_ptr(new TcpTransport()); } diff --git a/src/brpc/transport_factory.h b/src/brpc/transport_factory.h index bdbf4c2be4..d933a130e1 100644 --- a/src/brpc/transport_factory.h +++ b/src/brpc/transport_factory.h @@ -18,23 +18,17 @@ #ifndef BRPC_TRANSPORT_FACTORY_H #define BRPC_TRANSPORT_FACTORY_H -#include "brpc/errno.pb.h" #include "brpc/socket_mode.h" #include "brpc/transport.h" -#if BRPC_WITH_RDMA -BAIDU_REGISTER_ERRNO(brpc::ERDMA, "RDMA verbs error"); -BAIDU_REGISTER_ERRNO(brpc::ERDMAMEM, "Memory not registered for RDMA"); -#endif - namespace brpc { // TransportFactory to create transport instance with socket_mode {TCP, RDMA} class TransportFactory { public: static int ContextInitOrDie(SocketMode mode, bool serverOrNot, const void* _options); - // create transport instance with socket mode - static std::shared_ptr CreateTransport(SocketMode mode); + // Create transport instance with socket mode. + static std::unique_ptr CreateTransport(SocketMode mode); }; -} +} // namespace brpc #endif //BRPC_TRANSPORT_FACTORY_H \ No newline at end of file From 2df7b1b3217ee1b803b4435bc246a9a35aba64ee Mon Sep 17 00:00:00 2001 From: "Yang,Liming" Date: Sat, 31 Jan 2026 14:19:11 +0800 Subject: [PATCH 24/84] change protobuf version check for descriptor->extension_range (#3210) --- src/json2pb/json_to_pb.cpp | 2 +- src/json2pb/pb_to_json.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/json2pb/json_to_pb.cpp b/src/json2pb/json_to_pb.cpp index 87fd080a15..491ab92116 100644 --- a/src/json2pb/json_to_pb.cpp +++ b/src/json2pb/json_to_pb.cpp @@ -555,7 +555,7 @@ bool JsonValueToProtoMessage(const BUTIL_RAPIDJSON_NAMESPACE::Value& json_value, for (int i = 0; i < descriptor->extension_range_count(); ++i) { const google::protobuf::Descriptor::ExtensionRange* ext_range = descriptor->extension_range(i); -#if GOOGLE_PROTOBUF_VERSION < 4025000 +#if GOOGLE_PROTOBUF_VERSION < 4024000 for (int tag_number = ext_range->start; tag_number < ext_range->end; ++tag_number) #else for (int tag_number = ext_range->start_number(); tag_number < ext_range->end_number(); ++tag_number) diff --git a/src/json2pb/pb_to_json.cpp b/src/json2pb/pb_to_json.cpp index b9d01c2a94..8e7e4f32e3 100644 --- a/src/json2pb/pb_to_json.cpp +++ b/src/json2pb/pb_to_json.cpp @@ -136,7 +136,7 @@ bool PbToJsonConverter::Convert(const google::protobuf::Message& message, Handle for (int i = 0; i < ext_range_count; ++i) { const google::protobuf::Descriptor::ExtensionRange* ext_range = descriptor->extension_range(i); -#if GOOGLE_PROTOBUF_VERSION < 4025000 +#if GOOGLE_PROTOBUF_VERSION < 4024000 for (int tag_number = ext_range->start; tag_number < ext_range->end; ++tag_number) #else for (int tag_number = ext_range->start_number(); tag_number < ext_range->end_number(); ++tag_number) From 23004abf35be7aa0e24c461bb2b0837c50ae798a Mon Sep 17 00:00:00 2001 From: yanfeng Date: Thu, 12 Feb 2026 14:02:26 +0800 Subject: [PATCH 25/84] feat(auto_cl): add error rate threshold for punishment attenuation (#3219) * feat(auto_cl): add error rate threshold for punishment attenuation Add new GFlag `auto_cl_error_rate_punish_threshold` to enable error-rate-based punishment attenuation in AutoConcurrencyLimiter. Problem: Low error rates (e.g., 1.3% sporadic timeouts) cause disproportionate avg_latency inflation (+31%), leading the limiter to mistakenly shrink max_concurrency and trigger ELIMIT rejections. Solution: Inspired by Alibaba Sentinel's threshold-based approach: - threshold=0 (default): Original behavior preserved (backward compat) - threshold>0 (e.g., 0.1): Error rates below threshold produce zero punishment; above it, punishment scales linearly from 0 to full Example: With threshold=0.1, a 5% error rate produces no punishment, while a 50% error rate produces 44% of the original punishment. --------- Co-authored-by: Claude Opus 4.5 --- docs/cn/auto_concurrency_limiter.md | 18 ++ src/brpc/policy/auto_concurrency_limiter.cpp | 31 +++- test/BUILD.bazel | 13 ++ ...brpc_auto_concurrency_limiter_unittest.cpp | 168 ++++++++++++++++++ 4 files changed, 229 insertions(+), 1 deletion(-) create mode 100644 test/brpc_auto_concurrency_limiter_unittest.cpp diff --git a/docs/cn/auto_concurrency_limiter.md b/docs/cn/auto_concurrency_limiter.md index 17ef5d7ec3..342e9ba641 100644 --- a/docs/cn/auto_concurrency_limiter.md +++ b/docs/cn/auto_concurrency_limiter.md @@ -154,3 +154,21 @@ netflix中的gradient算法公式为:max_concurrency = min_latency / latency * * gradient算法中的max_concurrency / latency从概念上和qps有关联(根据little's law),但可能严重脱节。比如在重测 min_latency前,若所有latency都小于min_latency,那么max_concurrency会不断下降甚至到0;但按照本算法,max_qps和min_latency仍然是稳定的,它们计算出的max_concurrency也不会剧烈变动。究其本质,gradient算法在迭代max_concurrency时,latency并不能代表实际并发为max_concurrency时的延时,两者是脱节的,所以max_concurrency / latency的实际物理含义不明,与qps可能差异甚大,最后导致了很大的偏差。 * gradient算法的queue_size推荐为sqrt(max_concurrency),这是不合理的。netflix对queue_size的理解大概是代表各种不可控环节的缓存,比如socket里的,和max_concurrency存在一定的正向关系情有可原。但在我们的理解中,这部分queue_size作用微乎其微,没有或用常量即可。我们关注的queue_size是给concurrency上升留出的探索空间: max_concurrency的更新是有延迟的,在并发从低到高的增长过程中,queue_size的作用就是在max_concurrency更新前不限制qps上升。而当concurrency高时,服务可能已经过载了,queue_size就应该小一点,防止进一步恶化延时。这里的queue_size和并发是反向关系。 + +## 错误率惩罚阈值 + +`auto_cl_error_rate_punish_threshold`用于设置错误率"死区",低于该阈值的错误率不会产生惩罚,避免少量错误请求对max_concurrency的过度影响。 + +| GFlag | 默认值 | 有效范围 | 说明 | +|-------|--------|----------|------| +| auto_cl_error_rate_punish_threshold | 0 | [0, 1) | 错误率惩罚阈值,0表示禁用 | + +- **默认值为0**:禁用该功能,保持原有行为 +- **设置为有效值(如0.1)**:错误率 ≤ 阈值时惩罚为0;错误率 > 阈值时惩罚线性增长 +- **无效值处理**:≥1 的值会被忽略,等同于0 + +**示例**: +``` +# 错误率低于10%时不惩罚,高于10%时线性增加惩罚 +--auto_cl_error_rate_punish_threshold=0.1 +``` diff --git a/src/brpc/policy/auto_concurrency_limiter.cpp b/src/brpc/policy/auto_concurrency_limiter.cpp index dd5a02ec99..51ea56d765 100644 --- a/src/brpc/policy/auto_concurrency_limiter.cpp +++ b/src/brpc/policy/auto_concurrency_limiter.cpp @@ -77,6 +77,13 @@ DEFINE_int32(auto_cl_latency_fluctuation_correction_factor, 1, "the value, the higher the tolerance for the fluctuation of the " "latency. If the value is too large, the latency will be higher " "when the server is overloaded."); +DEFINE_double(auto_cl_error_rate_punish_threshold, 0, + "Threshold for error-rate-based punishment attenuation. " + "Valid range: [0, 1). 0 (default) disables the feature. " + "Values >= 1 are ignored and treated as 0. " + "e.g. 0.1: error rates below 10%% produce zero punishment; " + "above it the punishment scales linearly from 0 to full strength. " + "Only effective when auto_cl_enable_error_punish is true."); AutoConcurrencyLimiter::AutoConcurrencyLimiter() : _max_concurrency(FLAGS_auto_cl_initial_max_concurrency) @@ -236,7 +243,29 @@ void AutoConcurrencyLimiter::AdjustMaxConcurrency(int next_max_concurrency) { void AutoConcurrencyLimiter::UpdateMaxConcurrency(int64_t sampling_time_us) { int32_t total_succ_req = _total_succ_req.load(butil::memory_order_relaxed); double failed_punish = _sw.total_failed_us * FLAGS_auto_cl_fail_punish_ratio; - int64_t avg_latency = + + // Threshold-based attenuation: when 0 < threshold < 1, attenuate punishment + // based on error rate. Inspired by Sentinel's threshold-based circuit breaker: + // low error rates should not inflate avg_latency. Above threshold, punishment + // scales linearly from 0 to full strength. + // Invalid values (<=0 or >=1) skip this block entirely, preserving original behavior. + if (FLAGS_auto_cl_error_rate_punish_threshold > 0 && + FLAGS_auto_cl_error_rate_punish_threshold < 1.0 && + _sw.failed_count > 0) { + double threshold = FLAGS_auto_cl_error_rate_punish_threshold; + double error_rate = static_cast(_sw.failed_count) / + (_sw.succ_count + _sw.failed_count); + if (error_rate <= threshold) { + // Error rate within dead zone, cancel punishment. + failed_punish = 0; + } else { + // Linear ramp: 0 at threshold, 1.0 at 100% error rate. + double punish_factor = (error_rate - threshold) / (1.0 - threshold); + failed_punish *= punish_factor; + } + } + + int64_t avg_latency = std::ceil((failed_punish + _sw.total_succ_us) / _sw.succ_count); double qps = 1000000.0 * total_succ_req / (sampling_time_us - _sw.start_time_us); UpdateMinLatency(avg_latency); diff --git a/test/BUILD.bazel b/test/BUILD.bazel index 05420ae310..66aef4259e 100644 --- a/test/BUILD.bazel +++ b/test/BUILD.bazel @@ -269,6 +269,19 @@ cc_test( ], ) +cc_test( + name = "brpc_auto_concurrency_limiter_test", + srcs = [ + "brpc_auto_concurrency_limiter_unittest.cpp", + ], + copts = COPTS, + deps = [ + "//:brpc", + "@com_google_googletest//:gtest", + "@com_google_googletest//:gtest_main", + ], +) + refresh_compile_commands( name = "brpc_test_compdb", # Specify the targets of interest. diff --git a/test/brpc_auto_concurrency_limiter_unittest.cpp b/test/brpc_auto_concurrency_limiter_unittest.cpp new file mode 100644 index 0000000000..77163e2fb8 --- /dev/null +++ b/test/brpc_auto_concurrency_limiter_unittest.cpp @@ -0,0 +1,168 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "brpc/policy/auto_concurrency_limiter.h" +#include "butil/time.h" +#include + +namespace brpc { +namespace policy { + +DECLARE_int32(auto_cl_sample_window_size_ms); +DECLARE_int32(auto_cl_min_sample_count); +DECLARE_int32(auto_cl_max_sample_count); +DECLARE_bool(auto_cl_enable_error_punish); +DECLARE_double(auto_cl_fail_punish_ratio); +DECLARE_double(auto_cl_error_rate_punish_threshold); + +} // namespace policy +} // namespace brpc + +class AutoConcurrencyLimiterTest : public ::testing::Test { +protected: + void SetUp() override { + // Save original values + orig_sample_window_size_ms_ = brpc::policy::FLAGS_auto_cl_sample_window_size_ms; + orig_min_sample_count_ = brpc::policy::FLAGS_auto_cl_min_sample_count; + orig_max_sample_count_ = brpc::policy::FLAGS_auto_cl_max_sample_count; + orig_enable_error_punish_ = brpc::policy::FLAGS_auto_cl_enable_error_punish; + orig_fail_punish_ratio_ = brpc::policy::FLAGS_auto_cl_fail_punish_ratio; + orig_error_rate_threshold_ = brpc::policy::FLAGS_auto_cl_error_rate_punish_threshold; + + // Set test-friendly values + brpc::policy::FLAGS_auto_cl_sample_window_size_ms = 1000; + brpc::policy::FLAGS_auto_cl_min_sample_count = 5; + brpc::policy::FLAGS_auto_cl_max_sample_count = 200; + brpc::policy::FLAGS_auto_cl_enable_error_punish = true; + brpc::policy::FLAGS_auto_cl_fail_punish_ratio = 1.0; + } + + void TearDown() override { + // Restore original values + brpc::policy::FLAGS_auto_cl_sample_window_size_ms = orig_sample_window_size_ms_; + brpc::policy::FLAGS_auto_cl_min_sample_count = orig_min_sample_count_; + brpc::policy::FLAGS_auto_cl_max_sample_count = orig_max_sample_count_; + brpc::policy::FLAGS_auto_cl_enable_error_punish = orig_enable_error_punish_; + brpc::policy::FLAGS_auto_cl_fail_punish_ratio = orig_fail_punish_ratio_; + brpc::policy::FLAGS_auto_cl_error_rate_punish_threshold = orig_error_rate_threshold_; + } + +private: + int32_t orig_sample_window_size_ms_; + int32_t orig_min_sample_count_; + int32_t orig_max_sample_count_; + bool orig_enable_error_punish_; + double orig_fail_punish_ratio_; + double orig_error_rate_threshold_; +}; + +// Helper function to add samples and trigger window completion +// Uses synthetic timestamps instead of sleeping for faster, deterministic tests. +// The final successful sample is used as the trigger, so actual counts match +// succ_count/fail_count exactly (preserving intended error rates). +void AddSamplesAndTriggerWindow(brpc::policy::AutoConcurrencyLimiter& limiter, + int succ_count, int64_t succ_latency, + int fail_count, int64_t fail_latency) { + ASSERT_GT(succ_count, 0) << "Need at least 1 success to trigger window"; + int64_t now = butil::gettimeofday_us(); + + // Add successful samples (reserve one for the trigger) + for (int i = 0; i < succ_count - 1; ++i) { + limiter.AddSample(0, succ_latency, now); + } + // Add failed samples + for (int i = 0; i < fail_count; ++i) { + limiter.AddSample(1, fail_latency, now); + } + + // Advance timestamp past window expiry instead of sleeping + int64_t after_window = now + brpc::policy::FLAGS_auto_cl_sample_window_size_ms * 1000 + 1000; + + // Use the final success sample to trigger window submission + limiter.AddSample(0, succ_latency, after_window); +} + +// Test 1: Backward compatibility - threshold=0 preserves original punishment behavior +TEST_F(AutoConcurrencyLimiterTest, ThresholdZeroPreservesOriginalBehavior) { + brpc::policy::FLAGS_auto_cl_error_rate_punish_threshold = 0; + brpc::policy::FLAGS_auto_cl_sample_window_size_ms = 10; + + brpc::policy::AutoConcurrencyLimiter limiter; + AddSamplesAndTriggerWindow(limiter, 90, 100, 10, 1000); + + // 10% error rate, threshold=0 means full punishment applied + // avg_latency = ceil((10*1000 + 90*100) / 90) = ceil(211.1) = 212us + ASSERT_GT(limiter._min_latency_us, 180); + ASSERT_LT(limiter._min_latency_us, 250); +} + +// Test 2: Dead zone - error rate below threshold produces zero punishment +TEST_F(AutoConcurrencyLimiterTest, BelowThresholdZeroPunishment) { + brpc::policy::FLAGS_auto_cl_error_rate_punish_threshold = 0.2; // 20% threshold + brpc::policy::FLAGS_auto_cl_sample_window_size_ms = 10; + + brpc::policy::AutoConcurrencyLimiter limiter; + AddSamplesAndTriggerWindow(limiter, 90, 100, 10, 1000); + + // 10% error rate < 20% threshold, punishment should be zero + // avg_latency = 90*100 / 90 = 100us (no inflation) + ASSERT_GT(limiter._min_latency_us, 80); + ASSERT_LT(limiter._min_latency_us, 130); +} + +// Test 3: Boundary - error rate exactly at threshold produces zero punishment +TEST_F(AutoConcurrencyLimiterTest, ExactlyAtThresholdZeroPunishment) { + brpc::policy::FLAGS_auto_cl_error_rate_punish_threshold = 0.1; // 10% threshold + brpc::policy::FLAGS_auto_cl_sample_window_size_ms = 10; + + brpc::policy::AutoConcurrencyLimiter limiter; + AddSamplesAndTriggerWindow(limiter, 90, 100, 10, 1000); + + // 10% error rate == 10% threshold, punishment should be zero + // avg_latency = 90*100 / 90 = 100us + ASSERT_GT(limiter._min_latency_us, 80); + ASSERT_LT(limiter._min_latency_us, 130); +} + +// Test 4: Linear scaling - above threshold, punishment scales proportionally +TEST_F(AutoConcurrencyLimiterTest, AboveThresholdLinearScaling) { + brpc::policy::FLAGS_auto_cl_error_rate_punish_threshold = 0.1; // 10% threshold + brpc::policy::FLAGS_auto_cl_sample_window_size_ms = 10; + + // Case A: 50% error rate + // punish_factor = (0.5 - 0.1) / (1.0 - 0.1) = 4/9 ≈ 0.444 + // failed_punish = 50 * 1000 * (4/9) = 22222.2us + // avg_latency = ceil((22222.2 + 50*100) / 50) = ceil(544.4) = 545us + { + brpc::policy::AutoConcurrencyLimiter limiter; + AddSamplesAndTriggerWindow(limiter, 50, 100, 50, 1000); + ASSERT_GT(limiter._min_latency_us, 450); + ASSERT_LT(limiter._min_latency_us, 650); + } + + // Case B: 90% error rate (near full punishment) + // punish_factor = (0.9 - 0.1) / (1.0 - 0.1) = 8/9 ≈ 0.889 + // failed_punish = 90 * 1000 * (8/9) = 80000us + // avg_latency = ceil((80000 + 10*100) / 10) = ceil(8100) = 8100us + { + brpc::policy::AutoConcurrencyLimiter limiter; + AddSamplesAndTriggerWindow(limiter, 10, 100, 90, 1000); + ASSERT_GT(limiter._min_latency_us, 7000); + ASSERT_LT(limiter._min_latency_us, 9000); + } +} + From 5fa3503d46f9c94127ba72b54777b7e3d637089b Mon Sep 17 00:00:00 2001 From: Jenrry You Date: Sun, 15 Feb 2026 20:57:38 +0800 Subject: [PATCH 26/84] Fix stream closed before response during batch stream creation (#3214) --- src/brpc/controller.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/brpc/controller.cpp b/src/brpc/controller.cpp index d7b511dbd4..d3821eca80 100644 --- a/src/brpc/controller.cpp +++ b/src/brpc/controller.cpp @@ -1448,6 +1448,7 @@ void Controller::HandleStreamConnection(Socket *host_socket) { auto extra_stream_ids = std::move(*_remote_stream_settings->mutable_extra_stream_ids()); _remote_stream_settings->clear_extra_stream_ids(); for (size_t i = 1; i < stream_num; ++i) { + if(!ptrs[i]) continue; Stream* extra_stream = (Stream *) ptrs[i]->conn(); _remote_stream_settings->set_stream_id(extra_stream_ids[i - 1]); s->SetHostSocket(host_socket); From c23365baf4db81b259265536f5a5b4fe6e753836 Mon Sep 17 00:00:00 2001 From: Daniel Morante Date: Mon, 23 Feb 2026 01:52:56 -0600 Subject: [PATCH 27/84] fix: remove references to non-existent tracked_objects.h in platform_thread_freebsd.cc (#3223) The tracked_objects.h header and ThreadData::InitializeThreadContext() were part of Chromium's base library profiling subsystem, which was never ported to brpc. The Linux (platform_thread_linux.cc) and macOS (platform_thread_mac.mm) equivalents already had these references removed. This causes a compile error on FreeBSD: fatal error: 'butil/tracked_objects.h' file not found --- src/butil/threading/platform_thread_freebsd.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/butil/threading/platform_thread_freebsd.cc b/src/butil/threading/platform_thread_freebsd.cc index a18264be99..ad5fa94941 100644 --- a/src/butil/threading/platform_thread_freebsd.cc +++ b/src/butil/threading/platform_thread_freebsd.cc @@ -13,7 +13,6 @@ #include "butil/safe_strerror_posix.h" #include "butil/threading/thread_id_name_manager.h" #include "butil/threading/thread_restrictions.h" -#include "butil/tracked_objects.h" #if !defined(OS_NACL) #include @@ -46,7 +45,6 @@ int ThreadNiceValue(ThreadPriority priority) { // static void PlatformThread::SetName(const char* name) { ThreadIdNameManager::GetInstance()->SetName(CurrentId(), name); - tracked_objects::ThreadData::InitializeThreadContext(name); SetNameSimple(name); } From 3639e7a2d54ac8abc2d7954048da4a0deab71b6d Mon Sep 17 00:00:00 2001 From: Xiaofeng Wang Date: Thu, 26 Feb 2026 10:40:25 +0800 Subject: [PATCH 28/84] explicitly link utf8_validity for Protobuf v22+ (#3230) --- config_brpc.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/config_brpc.sh b/config_brpc.sh index 75826452b4..7d03e224ad 100755 --- a/config_brpc.sh +++ b/config_brpc.sh @@ -267,6 +267,11 @@ GFLAGS_HDR=$(find_dir_of_header_or_die gflags/gflags.h) PROTOBUF_HDR=$(find_dir_of_header_or_die google/protobuf/message.h) PROTOBUF_VERSION=$(grep '#define GOOGLE_PROTOBUF_VERSION [0-9]\+' $PROTOBUF_HDR/google/protobuf/stubs/common.h | awk '{print $3}') if [ "$PROTOBUF_VERSION" -ge 4022000 ]; then + # from v22, utf8_validity should be explicitly linked + # https://github.com/protocolbuffers/protobuf/blob/a847a8dc4ba1d99e7ba917146c84438b4de7d085/cmake/libprotobuf.cmake#L47 + UTF8_VALIDITY_LIB=$(find_dir_of_lib utf8_validity) + append_linking "$UTF8_VALIDITY_LIB" utf8_validity + ABSL_HDR=$(find_dir_of_header_or_die absl/base/config.h) ABSL_LIB=$(find_dir_of_lib_or_die absl_strings) ABSL_TARGET_NAMES=" From 718bd50f83f220cd2dcff20440d3dbe8334d14c3 Mon Sep 17 00:00:00 2001 From: Bright Chen Date: Sun, 1 Mar 2026 12:26:47 +0800 Subject: [PATCH 29/84] Fix CI installation errors (#3233) --- .github/actions/install-all-dependencies/action.yml | 2 +- .github/actions/install-essential-dependencies/action.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/actions/install-all-dependencies/action.yml b/.github/actions/install-all-dependencies/action.yml index 179f86cd4f..86d2884b97 100644 --- a/.github/actions/install-all-dependencies/action.yml +++ b/.github/actions/install-all-dependencies/action.yml @@ -2,7 +2,7 @@ runs: using: "composite" steps: - uses: ./.github/actions/install-essential-dependencies - - run: sudo apt-get install -y libunwind-dev libgoogle-glog-dev automake bison flex libboost-all-dev libevent-dev libtool pkg-config libibverbs1 libibverbs-dev + - run: sudo apt-get update && sudo apt-get install -y libunwind-dev libgoogle-glog-dev automake bison flex libboost-all-dev libevent-dev libtool pkg-config libibverbs1 libibverbs-dev shell: bash - run: | wget https://archive.apache.org/dist/thrift/0.11.0/thrift-0.11.0.tar.gz && tar -xf thrift-0.11.0.tar.gz && cd thrift-0.11.0/ diff --git a/.github/actions/install-essential-dependencies/action.yml b/.github/actions/install-essential-dependencies/action.yml index 3411b7f7c1..d6c5da96c1 100644 --- a/.github/actions/install-essential-dependencies/action.yml +++ b/.github/actions/install-essential-dependencies/action.yml @@ -3,5 +3,5 @@ runs: steps: - run: ulimit -c unlimited -S && sudo bash -c "echo 'core.%e.%p' > /proc/sys/kernel/core_pattern" shell: bash - - run: sudo apt-get install -y git g++ make libssl-dev libgflags-dev libprotobuf-dev libprotoc-dev protobuf-compiler libleveldb-dev + - run: sudo apt-get update && sudo apt-get install -y git g++ make libssl-dev libgflags-dev libprotobuf-dev libprotoc-dev protobuf-compiler libleveldb-dev shell: bash From 7c41ef798d8d4e15b02db34e5c661373404be60c Mon Sep 17 00:00:00 2001 From: Bright Chen Date: Sun, 1 Mar 2026 16:04:06 +0800 Subject: [PATCH 30/84] Support custom modification of sub controllers (#3213) * Copy http headers from main controller to sub controller * Support custom modification of sub controllers --- docs/cn/combo_channel.md | 16 +++++++ docs/en/combo_channel.md | 14 ++++++ src/brpc/parallel_channel.cpp | 23 +++++++-- src/brpc/parallel_channel.h | 16 ++++++- src/brpc/policy/redis_protocol.cpp | 2 +- src/brpc/selective_channel.cpp | 12 ++--- src/brpc/socket.cpp | 3 +- test/brpc_channel_unittest.cpp | 75 +++++++++++++++++++++++++++--- test/echo.proto | 1 + 9 files changed, 142 insertions(+), 20 deletions(-) diff --git a/docs/cn/combo_channel.md b/docs/cn/combo_channel.md index e11c79b46b..fba4f6be69 100644 --- a/docs/cn/combo_channel.md +++ b/docs/cn/combo_channel.md @@ -60,8 +60,12 @@ public: const google::protobuf::MethodDescriptor* method, const google::protobuf::Message* request, google::protobuf::Message* response) = 0; + + virtual void MapController(int channel_index/*starting from 0*/, int channel_count, + const Controller* main_cntl, Controller* sub_cntl); }; ``` +### Map channel_index:该sub channel在ParallelChannel中的位置,从0开始计数。 @@ -124,6 +128,18 @@ method/request/response:ParallelChannel.CallMethod()的参数。 }; ``` +### MapController + +channel_index:该sub channel在ParallelChannel中的位置,从0开始计数。 + +channel_count:ParallelChannel中sub channel的数量。 + +main_cntl:ParallelChannel.CallMethod()的参数。 + +sub_cntl:sub channel的请求对应的controller。默认实现:拷贝main_cntl的http_request和request_attachment到sub_cntl中。 + +注意:修改ClientSettings相关配置(如超时、重试等)是无效的,因为所有sub_cntl都是使用main_cntl的ClientSettings配置。 + ## ResponseMerger response_merger把sub channel的response合并入总的response,其为NULL时,则使用response->MergeFrom(*sub_response),MergeFrom的行为可概括为“除了合并repeated字段,其余都是覆盖”。如果你需要更复杂的行为,则需实现ResponseMerger。response_merger是一个个执行的,所以你并不需要考虑多个Merge同时运行的情况。response_merger在ParallelChannel析构时被删除。response_merger内含引用计数,一个response_merger可与多个sub channel关联。 diff --git a/docs/en/combo_channel.md b/docs/en/combo_channel.md index 686fad59c1..ab68188fe2 100644 --- a/docs/en/combo_channel.md +++ b/docs/en/combo_channel.md @@ -63,6 +63,8 @@ public: }; ``` +### Map + `channel_index`: The position of the sub channel inside `ParallelChannel`, starting from zero. `channel_count`: The sub channel count inside `ParallelChannel`. @@ -131,6 +133,18 @@ Common implementations of `Map()` are listed below: }; ``` +### MapController + +`channel_index`: The position of the sub channel inside `ParallelChannel`, starting from zero. + +`channel_count`: The sub channel count inside `ParallelChannel`. + +`main_cntl`:Parameters to `ParallelChannel::CallMethod()`. + +`sub_cntl`:The controller corresponding to the sub-channel's requests. Default implementation: Copy the http_request and request_attachment of `main_cntl` to the `sub_cntl`. + +Note: Modifying `ClientSettings` configurations (such as timeout and retries) is ineffective because all sub controllers use the `ClientSettings` configuration of `main_cntl`. + ## ResponseMerger `response_merger` merges responses from all sub channels into one for the `ParallelChannel`. When it's NULL, `response->MergeFrom(*sub_response)` is used instead, whose behavior can be summarized as "merge repeated fields and overwrite the rest". If you need more complex behavior, implement `ResponseMerger`. Multiple `response_merger` are called one by one to merge sub responses so that you do not need to consider the race conditions between merging multiple responses simultaneously. The object is deleted when `ParallelChannel ` destructs. Due to the reference counting inside, `response_merger ` can be associated with multiple sub channels. diff --git a/src/brpc/parallel_channel.cpp b/src/brpc/parallel_channel.cpp index 130712bfb9..de2b86f1c4 100644 --- a/src/brpc/parallel_channel.cpp +++ b/src/brpc/parallel_channel.cpp @@ -612,6 +612,7 @@ void ParallelChannel::CallMethod( int ndone = nchan; int fail_limit = 1; int success_limit = 1; + Controller::ClientSettings settings{}; DEFINE_SMALL_ARRAY(SubCall, aps, nchan, 64); if (cntl->FailedInline()) { @@ -718,12 +719,28 @@ void ParallelChannel::CallMethod( d->SaveThreadInfoOfCallsite(); CHECK_EQ(0, bthread_id_unlock(cid)); // Don't touch `cntl' and `d' again (for async RPC) - + + // Apply client settings of _cntl to controllers of sub calls, except + // timeout. If we let sub channel do their timeout separately, when + // timeout happens, we get ETOOMANYFAILS rather than ERPCTIMEDOUT. + cntl->SaveClientSettings(&settings); + settings.timeout_ms = -1; + for (int i = 0, j = 0; i < nchan; ++i) { + if (!aps[i].is_skip()) { + ParallelChannelDone::SubDone* sd = d->sub_done(j++); + if (NULL != _chans[i].call_mapper) { + _chans[i].call_mapper->MapController(i, nchan, cntl, &sd->cntl); + } else { + // Forward the attachment to each sub call. + sd->cntl.request_attachment().append(cntl->request_attachment()); + } + sd->cntl.ApplyClientSettings(settings); + sd->cntl.allow_done_to_run_in_place(); + } + } for (int i = 0, j = 0; i < nchan; ++i) { if (!aps[i].is_skip()) { ParallelChannelDone::SubDone* sd = d->sub_done(j++); - // Forward the attachment to each sub call - sd->cntl.request_attachment().append(cntl->request_attachment()); _chans[i].chan->CallMethod(sd->ap.method, &sd->cntl, sd->ap.request, sd->ap.response, sd); } diff --git a/src/brpc/parallel_channel.h b/src/brpc/parallel_channel.h index 84e5f342cb..292213c1ad 100644 --- a/src/brpc/parallel_channel.h +++ b/src/brpc/parallel_channel.h @@ -91,6 +91,14 @@ struct SubCall { // } // return SubCall(sub_method, request->sub_request(channel_index), // response->add_sub_response(), 0); +// MapController calls to ParallelChannel to sub channels, which can have +// different controllers. +// Note: +// Modifying ClientSettings configurations (such as timeout, retries, etc.) +// is ineffective because all sub-controllers use the main controller's +// ClientSettings configuration. +// Examples: +// sub_cntl->http_request().SetHeader(...); class CallMapper : public SharedObject { public: virtual SubCall Map(int channel_index/*starting from 0*/, @@ -98,7 +106,13 @@ class CallMapper : public SharedObject { const google::protobuf::MethodDescriptor* method, const google::protobuf::Message* request, google::protobuf::Message* response) { - return Map(channel_index, method, request, response); + return Map(channel_index, method, request, response); + } + + virtual void MapController(int channel_index/*starting from 0*/, int channel_count, + const Controller* main_cntl, Controller* sub_cntl) { + // Forward the attachment to each sub call by default. + sub_cntl->request_attachment().append(main_cntl->request_attachment()); } protected: diff --git a/src/brpc/policy/redis_protocol.cpp b/src/brpc/policy/redis_protocol.cpp index f8acf49d6a..9e8e148ebf 100644 --- a/src/brpc/policy/redis_protocol.cpp +++ b/src/brpc/policy/redis_protocol.cpp @@ -283,7 +283,7 @@ void SerializeRedisRequest(butil::IOBuf* buf, const RedisRequest* rr = (const RedisRequest*)request; // If redis byte size is zero, brpc call will fail with E22. Continuous E22 may cause E112 in the end. // So set failed and return useful error message - if (rr->ByteSize() == 0) { + if (GetProtobufByteSize(*rr) == 0) { return cntl->SetFailed(EREQUEST, "request byte size is empty"); } // We work around SerializeTo of pb which is just a placeholder. diff --git a/src/brpc/selective_channel.cpp b/src/brpc/selective_channel.cpp index ec93354121..567ffa51b8 100644 --- a/src/brpc/selective_channel.cpp +++ b/src/brpc/selective_channel.cpp @@ -344,13 +344,13 @@ int Sender::IssueRPC(int64_t start_realtime_us) { sub_cntl->set_request_code(_main_cntl->request_code()); // Forward request attachment to the subcall sub_cntl->request_attachment().append(_main_cntl->request_attachment()); - sub_cntl->http_request() = _main_cntl->http_request(); + ProtocolType protocol = _main_cntl->request_protocol(); + if (PROTOCOL_HTTP == protocol || PROTOCOL_H2 == protocol) { + sub_cntl->http_request() = _main_cntl->http_request(); + } - sel_out.channel()->CallMethod(_main_cntl->_method, - &r.sub_done->_cntl, - _request, - r.response, - r.sub_done); + sel_out.channel()->CallMethod(_main_cntl->_method, &r.sub_done->_cntl, + _request, r.response, r.sub_done); return 0; } diff --git a/src/brpc/socket.cpp b/src/brpc/socket.cpp index b132f2acea..c123fb6b6e 100644 --- a/src/brpc/socket.cpp +++ b/src/brpc/socket.cpp @@ -896,8 +896,7 @@ void Socket::OnFailed(int error_code, const std::string& error_text) { // comes online. if (HCEnabled()) { GetOrNewSharedPart()->circuit_breaker.MarkAsBroken(); - StartHealthCheck(id(), - GetOrNewSharedPart()->circuit_breaker.isolation_duration_ms()); + StartHealthCheck(id(), GetOrNewSharedPart()->circuit_breaker.isolation_duration_ms()); } // Wake up all threads waiting on EPOLLOUT when closing fd _epollout_butex->fetch_add(1, butil::memory_order_relaxed); diff --git a/test/brpc_channel_unittest.cpp b/test/brpc_channel_unittest.cpp index 66d1fbad9b..86bee89105 100644 --- a/test/brpc_channel_unittest.cpp +++ b/test/brpc_channel_unittest.cpp @@ -176,6 +176,16 @@ class MyEchoService : public ::test::EchoService { res->add_code_list(req->code()); } res->set_receiving_socket_id(cntl->_current_call.sending_sock->id()); + + brpc::ProtocolType protocol = cntl->request_protocol(); + if ((brpc::PROTOCOL_HTTP == protocol || brpc::PROTOCOL_H2 == protocol) && + !req->http_header().empty()) { + ASSERT_FALSE(req->http_header().empty()); + const std::string* val = cntl->http_request().GetHeader(req->http_header()); + ASSERT_TRUE(val); + ASSERT_FALSE(val->empty()); + cntl->http_response().SetHeader(req->http_header(), *val); + } } static void CallAfterRpc(std::shared_ptr str, brpc::Controller* cntl, @@ -310,8 +320,10 @@ class ChannelTest : public ::testing::Test{ bool short_connection, const brpc::Authenticator* auth = NULL, std::string connection_group = std::string(), - bool use_backup_request_policy = false) { + bool use_backup_request_policy = false, + brpc::ProtocolType protocol = brpc::PROTOCOL_BAIDU_STD) { brpc::ChannelOptions opt; + opt.protocol = protocol; if (short_connection) { opt.connection_type = brpc::CONNECTION_TYPE_SHORT; } @@ -526,7 +538,7 @@ class ChannelTest : public ::testing::Test{ int channel_index, const google::protobuf::MethodDescriptor* method, const google::protobuf::Message* req_base, - google::protobuf::Message* response) { + google::protobuf::Message* response) override { test::EchoRequest* req = brpc::Clone(req_base); req->set_code(channel_index + 1/*non-zero*/); return brpc::SubCall(method, req, response->New(), @@ -540,7 +552,7 @@ class ChannelTest : public ::testing::Test{ int channel_index, const google::protobuf::MethodDescriptor* method, const google::protobuf::Message* req_base, - google::protobuf::Message* response) { + google::protobuf::Message* response) override { if (channel_index % 2) { return brpc::SubCall::Skip(); } @@ -554,7 +566,7 @@ class ChannelTest : public ::testing::Test{ int channel_index, const google::protobuf::MethodDescriptor* method, const google::protobuf::Message* req_base, - google::protobuf::Message* res_base) { + google::protobuf::Message* res_base) override { const test::ComboRequest* req = dynamic_cast(req_base); test::ComboResponse* res = dynamic_cast(res_base); @@ -1334,7 +1346,7 @@ class ChannelTest : public ::testing::Test{ int /*channel_index*/, const google::protobuf::MethodDescriptor* method, const google::protobuf::Message* req_base, - google::protobuf::Message* response) { + google::protobuf::Message* response) override { test::EchoRequest* req = brpc::Clone(req_base); req->set_sleep_us(70000); // 70ms return brpc::SubCall(method, req, response->New(), @@ -2357,7 +2369,7 @@ class BadCall : public brpc::CallMapper { brpc::SubCall Map(int, const google::protobuf::MethodDescriptor*, const google::protobuf::Message*, - google::protobuf::Message*) { + google::protobuf::Message*) override { return brpc::SubCall::Bad(); } }; @@ -2384,7 +2396,7 @@ class SkipCall : public brpc::CallMapper { brpc::SubCall Map(int, const google::protobuf::MethodDescriptor*, const google::protobuf::Message*, - google::protobuf::Message*) { + google::protobuf::Message*) override { return brpc::SubCall::Skip(); } }; @@ -2412,6 +2424,55 @@ TEST_F(ChannelTest, skip_all_channels) { } } +static const std::string ECHO_HTTP_HEADER = "echo-http-header"; + +class EchoHttpHeader : public brpc::CallMapper { +public: + brpc::SubCall Map(int channel_index, int channel_count, + const google::protobuf::MethodDescriptor* method, + const google::protobuf::Message* request, + google::protobuf::Message* response) override { + return brpc::SubCall(method, request, response->New(), brpc::DELETE_RESPONSE); + } + + void MapController(int channel_index, int, + const brpc::Controller* main_cntl, + brpc::Controller* sub_cntl) override { + sub_cntl->http_request().SetHeader(ECHO_HTTP_HEADER, std::to_string(channel_index)); + } +}; + +TEST_F(ChannelTest, http_header_parallel_channels) { + brpc::Server server; + MyEchoService service; + ASSERT_EQ(0, server.AddService(&service, brpc::SERVER_DOESNT_OWN_SERVICE)); + brpc::ServerOptions opt; + ASSERT_EQ(0, server.Start(_ep, &opt)); + + const size_t NCHANS = 5; + brpc::ParallelChannel channel; + for (size_t i = 0; i < NCHANS; ++i) { + brpc::Channel* sub_chan = new brpc::Channel(); + SetUpChannel(sub_chan, true, false, NULL, "", false, brpc::PROTOCOL_HTTP); + ASSERT_EQ(0, channel.AddChannel(sub_chan, brpc::OWNS_CHANNEL, new EchoHttpHeader, NULL)); + } + + brpc::Controller cntl; + test::EchoRequest req; + test::EchoResponse res; + req.set_message(__FUNCTION__); + *req.mutable_http_header() = ECHO_HTTP_HEADER; + CallMethod(&channel, &cntl, &req, &res, false); + + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ((int)NCHANS, cntl.sub_count()); + for (int i = 0; i < cntl.sub_count(); ++i) { + const brpc::Controller* sub_cntl = cntl.sub(i); + ASSERT_TRUE(NULL != sub_cntl) << "i=" << i; + ASSERT_EQ(std::to_string(i), *sub_cntl->http_response().GetHeader(ECHO_HTTP_HEADER)); + } +} + TEST_F(ChannelTest, connection_failed_parallel) { for (int i = 0; i <= 1; ++i) { // Flag SingleServer for (int j = 0; j <= 1; ++j) { // Flag Asynchronous diff --git a/test/echo.proto b/test/echo.proto index 970ef1dbb1..c9fa8acee4 100644 --- a/test/echo.proto +++ b/test/echo.proto @@ -27,6 +27,7 @@ message EchoRequest { optional bool close_fd = 3; optional int32 sleep_us = 4; optional int32 server_fail = 5; + optional string http_header = 6; }; message EchoResponse { From b044d6d590addfc1665da62a5141a80a054f107d Mon Sep 17 00:00:00 2001 From: yanfeng Date: Mon, 2 Mar 2026 21:17:57 +0800 Subject: [PATCH 31/84] feat(backup_request): add rate-limited backup request policy (#3228) (#3229) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(backup_request): add rate-limited backup request policy (#3228) * docs(backup_request): restructure rate-limiting section, add lifecycle guidance - Promote built-in factory function to its own subsection (before custom interface) - Add unique_ptr usage example for policy lifetime management - Add RateLimitedBackupPolicyOptions parameter table with defaults/constraints - Document NULL return on invalid params - Keep cn/en docs in sync * fix(backup_request): address review issues — sentinel fallback, comments, tests - controller.cpp: When policy returns -1 (inherit sentinel), fall back to _backup_request_ms set from ChannelOptions, so backup timer is actually armed when using a policy with backup_request_ms=-1. - backup_request_policy.cpp: Clarify OnRPCEnd comment to say 'RPC legs' (both original and backup completions counted as denominator). - backup_request_policy.cpp: Warn when update_interval_seconds exceeds window_size_seconds (window would rarely refresh within its period). - backup_request_policy.h: Fix comment typo ('Called when an RPC ends'). - brpc_channel_unittest.cpp: Replace nullptr with NULL to match codebase convention; use ASSERT_TRUE(p != NULL) for unique_ptr null checks. - brpc_channel_unittest.cpp: Add ValidMaxRatioAtBoundary behavioral assert and AfterColdStartBackupSuppressedUntilRpcCompletes test. * fix(backup_request): correct docs table defaults and add suppression test - docs: fix backup_request_ms default (0→-1) and constraint (>=0→>=-1); add note that -1 inherit only works via ChannelOptions injection path, not Controller::set_backup_request_policy(). - test: replace no-op AfterColdStart test with a real behavioral assertion: after cold-start backup fires, wait 1.2s for ratio refresh, verify DoBackup() returns false (conservative ratio=1.0 path triggers). * fix(backup_request): clarify comments — negative defer semantics and burst caveat * fix(backup_request): address Copilot review — sentinel contract, OnRPCEnd comment, re-allow test, docs - controller.cpp: treat -1 specifically (not all negatives) as the inherit sentinel; other negatives still disable backup, preserving old behavior for custom policies that return negative values to disable backup - backup_request_policy.h: document the -1 sentinel contract on GetBackupRequestMs() so custom implementors know the new interface - backup_request_policy.cpp: fix OnRPCEnd comment — called once per user-level RPC, not once per leg (total_count tracks user RPCs) - test: add OnRPCEndDrivesRatioDownAndReAllows — fires 20 backups to suppress, then completes 50 RPCs via OnRPCEnd, verifies DoBackup re-allows once ratio refreshes below max_backup_ratio - docs (EN+CN): rephrase backup_request_ms=-1 note to clarify the channel-level fallback only applies when set via ChannelOptions * fix(backup_request): explain why std::nothrow is intentionally omitted Plain new follows brpc's project-wide OOM convention (abort rather than return NULL). The factory's NULL return already exclusively signals invalid parameters, not allocation failure — adding std::nothrow would conflate the two. Comment added to suppress future linter/AI suggestions. * docs(backup_request): clarify policy lifetime — channel must be destroyed before policy The unique_ptr comment was ambiguous: 'released when goes out of scope, as long as it outlives the channel' can be read as contradictory. Reword to make the ordering explicit: destroy channel first, then policy. * test(backup_request): fix inaccurate cold-start comment in ValidMaxRatioAtBoundary ratio=1.0 conservative path only applies when backup>0 && total==0. True cold start (both zero) sets ratio=0.0 and allows freely. --- docs/cn/backup_request.md | 76 ++++++++++++- docs/en/backup_request.md | 74 ++++++++++++ src/brpc/backup_request_policy.cpp | 177 +++++++++++++++++++++++++++++ src/brpc/backup_request_policy.h | 39 ++++++- src/brpc/channel.cpp | 1 + src/brpc/channel.h | 5 +- src/brpc/controller.cpp | 12 +- test/brpc_channel_unittest.cpp | 137 ++++++++++++++++++++++ 8 files changed, 515 insertions(+), 6 deletions(-) create mode 100644 src/brpc/backup_request_policy.cpp diff --git a/docs/cn/backup_request.md b/docs/cn/backup_request.md index 6674fbf429..b2e0bb61e0 100644 --- a/docs/cn/backup_request.md +++ b/docs/cn/backup_request.md @@ -6,7 +6,7 @@ Channel开启backup request。这个Channel会先向其中一个server发送请 示例代码见[example/backup_request_c++](https://github.com/apache/brpc/blob/master/example/backup_request_c++)。这个例子中,client设定了在2ms后发送backup request,server在碰到偶数位的请求后会故意睡眠20ms以触发backup request。 -运行后,client端和server端的日志分别如下,“index”是请求的编号。可以看到server端在收到第一个请求后会故意sleep 20ms,client端之后发送另一个同样index的请求,最终的延时并没有受到故意sleep的影响。 +运行后,client端和server端的日志分别如下,"index"是请求的编号。可以看到server端在收到第一个请求后会故意sleep 20ms,client端之后发送另一个同样index的请求,最终的延时并没有受到故意sleep的影响。 ![img](../images/backup_request_1.png) @@ -39,6 +39,80 @@ my_func_latency << tm.u_elapsed(); // u代表微秒,还有s_elapsed(), m_elap // 好了,在/vars中会显示my_func_qps, my_func_latency, my_func_latency_cdf等很多计数器。 ``` +## Backup Request 限流 + +如需限制 backup request 的发送比例,可使用内置工厂函数创建限流策略,也可自行实现 `BackupRequestPolicy` 接口。 + +优先级顺序:`backup_request_policy` > `backup_request_ms`。 + +### 使用内置限流策略 + +调用 `CreateRateLimitedBackupPolicy` 创建限流策略,并将其设置到 `ChannelOptions.backup_request_policy`: + +```c++ +#include "brpc/backup_request_policy.h" +#include + +brpc::RateLimitedBackupPolicyOptions opts; +opts.backup_request_ms = 10; // 超过10ms未返回时发送backup请求 +opts.max_backup_ratio = 0.3; // backup请求比例上限30% +opts.window_size_seconds = 10; // 滑动窗口宽度(秒) +opts.update_interval_seconds = 5; // 缓存比例的刷新间隔(秒) + +// CreateRateLimitedBackupPolicy返回的指针由调用方负责释放。 +// policy的生命周期必须长于channel——先销毁channel,再销毁policy。 +std::unique_ptr policy( + brpc::CreateRateLimitedBackupPolicy(opts)); + +brpc::ChannelOptions options; +options.backup_request_policy = policy.get(); // Channel不拥有该对象 +channel.Init(..., &options); +// channel必须在policy析构之前销毁。 +``` + +参数说明(`RateLimitedBackupPolicyOptions`): + +| 字段 | 默认值 | 说明 | +|------|--------|------| +| `backup_request_ms` | -1 | 超时阈值(毫秒)。-1 表示继承 `ChannelOptions.backup_request_ms`(仅在通过 `ChannelOptions.backup_request_policy` 设置策略时有效;通过 Controller 注入时没有 channel 级的回退值,应显式指定 >= 0 的值)。必须 >= -1。 | +| `max_backup_ratio` | 0.1 | backup比例上限,取值范围 (0, 1] | +| `window_size_seconds` | 10 | 滑动窗口宽度(秒),取值范围 [1, 3600] | +| `update_interval_seconds` | 5 | 缓存刷新间隔(秒),必须 >= 1 | + +参数不合法时 `CreateRateLimitedBackupPolicy` 返回 `NULL`。 + +### 使用自定义 BackupRequestPolicy + +如需完全控制,可实现 `BackupRequestPolicy` 接口并设置到 `ChannelOptions.backup_request_policy`: + +```c++ +#include "brpc/backup_request_policy.h" + +class MyBackupPolicy : public brpc::BackupRequestPolicy { +public: + int32_t GetBackupRequestMs(const brpc::Controller*) const override { + return 10; // 10ms后发送backup + } + bool DoBackup(const brpc::Controller*) const override { + return should_allow_backup(); // 自定义逻辑 + } + void OnRPCEnd(const brpc::Controller*) override { + // 每次RPC结束时调用,可在此更新统计 + } +}; + +MyBackupPolicy my_policy; +brpc::ChannelOptions options; +options.backup_request_policy = &my_policy; // Channel不拥有该对象,需保证其生命周期长于Channel +channel.Init(..., &options); +``` + +### 实现说明 + +- 比例通过bvar计数器在滑动时间窗口内统计。缓存值通过无锁CAS选举最多每 `update_interval_seconds` 刷新一次,因此每次RPC的开销极低(公共路径仅有两次原子读)。 +- Backup决策在做出时立即计数(RPC完成前),以便在延迟抖动期间更快地反馈。总RPC数在完成时统计。这意味着比例在抖动期间可能短暂滞后,这是设计有意为之——限流器的目标是近似的尽力而为的节流,而非精确执行。 +- 每个使用限流的Channel会维护两个 `bvar::Window` 采样任务,在Channel数量极多的部署中请留意此开销。 + # 当后端server不能挂在一个命名服务内时 【推荐】建立一个开启backup request的SelectiveChannel,其中包含两个sub channel。访问这个SelectiveChannel和上面的情况类似,会先访问一个sub channel,如果在ChannelOptions.backup_request_ms后没返回,再访问另一个sub channel。如果一个sub channel对应一个集群,这个方法就是在两个集群间做互备。SelectiveChannel的例子见[example/selective_echo_c++](https://github.com/apache/brpc/tree/master/example/selective_echo_c++),具体做法请参考上面的过程。 diff --git a/docs/en/backup_request.md b/docs/en/backup_request.md index 8e1a337c41..e61f361182 100644 --- a/docs/en/backup_request.md +++ b/docs/en/backup_request.md @@ -39,6 +39,80 @@ my_func_latency << tm.u_elapsed(); // u represents for microsecond, and s_elaps // All work is done here. My_func_qps, my_func_latency, my_func_latency_cdf and many other counters would be shown in /vars. ``` +## Rate-limited backup requests + +To limit the ratio of backup requests sent, use the built-in factory function or implement the `BackupRequestPolicy` interface yourself. + +Priority order: `backup_request_policy` > `backup_request_ms`. + +### Using the built-in rate-limiting policy + +Call `CreateRateLimitedBackupPolicy` and set the result on `ChannelOptions.backup_request_policy`: + +```c++ +#include "brpc/backup_request_policy.h" +#include + +brpc::RateLimitedBackupPolicyOptions opts; +opts.backup_request_ms = 10; // send backup if RPC does not complete within 10ms +opts.max_backup_ratio = 0.3; // cap backup requests at 30% of total +opts.window_size_seconds = 10; // sliding window width in seconds +opts.update_interval_seconds = 5; // how often the cached ratio is refreshed + +// The caller owns the returned pointer. +// The policy must outlive the channel — destroy the channel before the policy. +std::unique_ptr policy( + brpc::CreateRateLimitedBackupPolicy(opts)); + +brpc::ChannelOptions options; +options.backup_request_policy = policy.get(); // NOT owned by channel +channel.Init(..., &options); +// channel must be destroyed before policy goes out of scope. +``` + +`RateLimitedBackupPolicyOptions` fields: + +| Field | Default | Description | +|-------|---------|-------------| +| `backup_request_ms` | -1 | Timeout threshold in ms. -1 means inherit from `ChannelOptions.backup_request_ms` (only works when the policy is set via `ChannelOptions.backup_request_policy`; at controller level there is no channel-level fallback, so set an explicit >= 0 value instead). Must be >= -1. | +| `max_backup_ratio` | 0.1 | Max backup ratio; range (0, 1] | +| `window_size_seconds` | 10 | Sliding window width in seconds; range [1, 3600] | +| `update_interval_seconds` | 5 | Cached-ratio refresh interval in seconds; must be >= 1 | + +`CreateRateLimitedBackupPolicy` returns `NULL` if any parameter is invalid. + +### Using a custom BackupRequestPolicy + +For full control, implement the `BackupRequestPolicy` interface and set it on `ChannelOptions.backup_request_policy`: + +```c++ +#include "brpc/backup_request_policy.h" + +class MyBackupPolicy : public brpc::BackupRequestPolicy { +public: + int32_t GetBackupRequestMs(const brpc::Controller*) const override { + return 10; // send backup after 10ms + } + bool DoBackup(const brpc::Controller*) const override { + return should_allow_backup(); // your logic here + } + void OnRPCEnd(const brpc::Controller*) override { + // called on every RPC completion; update stats if needed + } +}; + +MyBackupPolicy my_policy; +brpc::ChannelOptions options; +options.backup_request_policy = &my_policy; // NOT owned by channel; must outlive channel +channel.Init(..., &options); +``` + +### Implementation notes + +- The ratio is computed over a sliding time window using bvar counters. The cached value is refreshed at most once per `update_interval_seconds` using a lock-free CAS election, so the overhead per RPC is very low (two atomic loads in the common path). +- Backup decisions are counted immediately at decision time (before the RPC completes) to provide faster feedback during latency spikes. Total RPCs are counted on completion. This means the ratio may transiently lag during a spike, but this is intentional — the limiter is designed for approximate, best-effort throttling, not exact enforcement. +- Each channel using rate limiting maintains two `bvar::Window` sampler tasks. Keep this in mind in deployments with a very large number of channels. + # When backend servers cannot be hung in a naming service [Recommended] Define a SelectiveChannel that sets backup request, in which contains two sub channel. The visiting process of this SelectiveChannel is similar to the above situation. It will visit one sub channel first. If the response is not returned after channelOptions.backup_request_ms ms, then another sub channel is visited. If a sub channel corresponds to a cluster, this method does backups between two clusters. An example of SelectiveChannel can be found in [example/selective_echo_c++](https://github.com/apache/brpc/tree/master/example/selective_echo_c++). More details please refer to the above program. diff --git a/src/brpc/backup_request_policy.cpp b/src/brpc/backup_request_policy.cpp new file mode 100644 index 0000000000..851537b3df --- /dev/null +++ b/src/brpc/backup_request_policy.cpp @@ -0,0 +1,177 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "brpc/backup_request_policy.h" + +#include "butil/logging.h" +#include "bvar/reducer.h" +#include "bvar/window.h" +#include "butil/atomicops.h" +#include "butil/time.h" + +namespace brpc { + +// Standalone statistics module for tracking backup/total request ratio +// within a sliding time window. Each instance schedules two bvar::Window +// sampler tasks; keep this in mind for high channel-count deployments. +class BackupRateLimiter { +public: + BackupRateLimiter(double max_backup_ratio, + int window_size_seconds, + int update_interval_seconds) + : _max_backup_ratio(max_backup_ratio) + , _update_interval_us(update_interval_seconds * 1000000LL) + , _total_count() + , _backup_count() + , _total_window(&_total_count, window_size_seconds) + , _backup_window(&_backup_count, window_size_seconds) + , _cached_ratio(0.0) + , _last_update_us(0) { + } + + // All atomic operations use relaxed ordering intentionally. + // This is best-effort rate limiting: a slightly stale ratio is + // acceptable for approximate throttling. Within a single update interval, + // the cached ratio is not updated, so bursts up to update_interval_seconds + // in duration can exceed the configured max_backup_ratio transiently. + bool ShouldAllow() const { + const int64_t now_us = butil::cpuwide_time_us(); + int64_t last_us = _last_update_us.load(butil::memory_order_relaxed); + double ratio = _cached_ratio.load(butil::memory_order_relaxed); + + if (now_us - last_us >= _update_interval_us) { + if (_last_update_us.compare_exchange_strong( + last_us, now_us, butil::memory_order_relaxed)) { + int64_t total = _total_window.get_value(); + int64_t backup = _backup_window.get_value(); + // Fall back to cumulative counts when the window has no + // sampled data yet (cold-start within the first few seconds). + if (total <= 0) { + total = _total_count.get_value(); + backup = _backup_count.get_value(); + } + if (total > 0) { + ratio = static_cast(backup) / total; + } else if (backup > 0) { + // Backups issued but no completions in window yet (latency spike). + // Be conservative to prevent backup storms. + ratio = 1.0; + } else { + // True cold-start: no traffic yet. Allow freely. + ratio = 0.0; + } + _cached_ratio.store(ratio, butil::memory_order_relaxed); + } + } + + bool allow = ratio < _max_backup_ratio; + if (allow) { + // Count backup decisions immediately for faster feedback + // during latency spikes (before RPCs complete). + _backup_count << 1; + } + return allow; + } + + void OnRPCEnd(const Controller* /*controller*/) { + // Count each completed user-level RPC (called once per RPC, not per leg). + // Backup decisions are counted in ShouldAllow() at decision time for + // faster feedback. As a result, the effective suppression threshold is + // (backup_count / total_count), where total_count is the number of + // user RPCs that have completed. + _total_count << 1; + } + +private: + double _max_backup_ratio; + int64_t _update_interval_us; + + bvar::Adder _total_count; + mutable bvar::Adder _backup_count; + bvar::Window> _total_window; + bvar::Window> _backup_window; + + mutable butil::atomic _cached_ratio; + mutable butil::atomic _last_update_us; +}; + +// Internal BackupRequestPolicy that composes a BackupRateLimiter +// for ratio-based suppression. +class RateLimitedBackupPolicy : public BackupRequestPolicy { +public: + RateLimitedBackupPolicy(int32_t backup_request_ms, + double max_backup_ratio, + int window_size_seconds, + int update_interval_seconds) + : _backup_request_ms(backup_request_ms) + , _rate_limiter(max_backup_ratio, window_size_seconds, + update_interval_seconds) { + } + + int32_t GetBackupRequestMs(const Controller* /*controller*/) const override { + return _backup_request_ms; + } + + bool DoBackup(const Controller* /*controller*/) const override { + return _rate_limiter.ShouldAllow(); + } + + void OnRPCEnd(const Controller* controller) override { + _rate_limiter.OnRPCEnd(controller); + } + +private: + int32_t _backup_request_ms; + BackupRateLimiter _rate_limiter; +}; + +BackupRequestPolicy* CreateRateLimitedBackupPolicy( + const RateLimitedBackupPolicyOptions& options) { + if (options.backup_request_ms < -1) { + LOG(ERROR) << "Invalid backup_request_ms=" << options.backup_request_ms + << ", must be >= -1 (-1 means inherit from ChannelOptions)"; + return NULL; + } + if (options.max_backup_ratio <= 0 || options.max_backup_ratio > 1.0) { + LOG(ERROR) << "Invalid max_backup_ratio=" << options.max_backup_ratio + << ", must be in (0, 1]"; + return NULL; + } + if (options.window_size_seconds < 1 || options.window_size_seconds > 3600) { + LOG(ERROR) << "Invalid window_size_seconds=" << options.window_size_seconds + << ", must be in [1, 3600]"; + return NULL; + } + if (options.update_interval_seconds < 1) { + LOG(ERROR) << "Invalid update_interval_seconds=" + << options.update_interval_seconds << ", must be >= 1"; + return NULL; + } + if (options.update_interval_seconds > options.window_size_seconds) { + LOG(WARNING) << "update_interval_seconds=" << options.update_interval_seconds + << " exceeds window_size_seconds=" << options.window_size_seconds + << "; the ratio window will rarely refresh within its own period"; + } + // Plain new (without std::nothrow): brpc follows the project-wide convention + // of letting OOM throw/abort rather than returning NULL. NULL return from + // this factory already signals invalid parameters, not allocation failure. + return new RateLimitedBackupPolicy( + options.backup_request_ms, options.max_backup_ratio, + options.window_size_seconds, options.update_interval_seconds); +} + +} // namespace brpc diff --git a/src/brpc/backup_request_policy.h b/src/brpc/backup_request_policy.h index ea254f1dbf..13da2a59ba 100644 --- a/src/brpc/backup_request_policy.h +++ b/src/brpc/backup_request_policy.h @@ -29,15 +29,52 @@ class BackupRequestPolicy { // Return the time in milliseconds in which another request // will be sent if RPC does not finish. + // Returning -1 means "inherit the backup_request_ms from ChannelOptions". + // Returning any other negative value disables backup for this RPC. virtual int32_t GetBackupRequestMs(const Controller* controller) const = 0; // Return true if the backup request should be sent. virtual bool DoBackup(const Controller* controller) const = 0; - // Called when a rpc is end, user can collect call information to adjust policy. + // Called when an RPC ends; user can collect call information to adjust policy. virtual void OnRPCEnd(const Controller* controller) = 0; }; +// Options for CreateRateLimitedBackupPolicy(). +// All fields have defaults matching the recommended starting values. +struct RateLimitedBackupPolicyOptions { + // Time in milliseconds after which a backup request is sent if the RPC + // has not completed. + // Use -1 (the default) to inherit the value from ChannelOptions.backup_request_ms. + // Use >= 0 to override it explicitly. + // Default: -1 + int32_t backup_request_ms = -1; + + // Maximum ratio of backup requests to total requests in the sliding + // window. Must be in (0, 1]. + // Default: 0.1 + double max_backup_ratio = 0.1; + + // Width of the sliding time window in seconds. Must be in [1, 3600]. + // Default: 10 + int window_size_seconds = 10; + + // Interval in seconds between cached-ratio refreshes. Must be >= 1. + // Default: 5 + int update_interval_seconds = 5; +}; + +// Create a BackupRequestPolicy that limits the ratio of backup requests +// to total requests within a sliding time window. When the ratio reaches +// or exceeds options.max_backup_ratio, DoBackup() returns false. +// NOTE: Backup decisions are counted immediately at DoBackup() time for +// fast feedback. Total RPCs are counted on completion (OnRPCEnd). During +// latency spikes the ratio may temporarily lag until RPCs complete. +// Returns NULL on invalid parameters. +// The caller owns the returned pointer. +BackupRequestPolicy* CreateRateLimitedBackupPolicy( + const RateLimitedBackupPolicyOptions& options); + } #endif // BRPC_BACKUP_REQUEST_POLICY_H diff --git a/src/brpc/channel.cpp b/src/brpc/channel.cpp index 86124c2552..dde4ca0f8c 100644 --- a/src/brpc/channel.cpp +++ b/src/brpc/channel.cpp @@ -242,6 +242,7 @@ int Channel::InitChannelOptions(const ChannelOptions* options) { if (!cg.empty() && (::isspace(cg.front()) || ::isspace(cg.back()))) { butil::TrimWhitespace(cg, butil::TRIM_ALL, &cg); } + return 0; } diff --git a/src/brpc/channel.h b/src/brpc/channel.h index 7c257c05d3..28a17ac8ea 100644 --- a/src/brpc/channel.h +++ b/src/brpc/channel.h @@ -118,9 +118,10 @@ struct ChannelOptions { // Customize the backup request time and whether to send backup request. // Priority: `backup_request_policy' > `backup_request_ms'. - // Overridable by Controller.set_backup_request_ms() or + // Overridable per-RPC by Controller.set_backup_request_ms() or // Controller.set_backup_request_policy(). - // This object is NOT owned by channel and should remain valid when channel is used. + // This object is NOT owned by channel and should remain valid during + // channel's lifetime. // Default: NULL BackupRequestPolicy* backup_request_policy; diff --git a/src/brpc/controller.cpp b/src/brpc/controller.cpp index d3821eca80..133d1f0453 100644 --- a/src/brpc/controller.cpp +++ b/src/brpc/controller.cpp @@ -351,8 +351,16 @@ void Controller::set_backup_request_ms(int64_t timeout_ms) { } int64_t Controller::backup_request_ms() const { - int timeout_ms = NULL != _backup_request_policy ? - _backup_request_policy->GetBackupRequestMs(this) : _backup_request_ms; + int timeout_ms = _backup_request_ms; + if (NULL != _backup_request_policy) { + const int32_t policy_ms = _backup_request_policy->GetBackupRequestMs(this); + // -1 is the designated sentinel: the policy defers to the channel-level + // backup_request_ms (set from ChannelOptions). Any other negative value + // disables backup for this RPC. Values >= 0 override directly. + if (policy_ms != -1) { + timeout_ms = policy_ms; + } + } if (timeout_ms > 0x7fffffff) { timeout_ms = 0x7fffffff; LOG(WARNING) << "backup_request_ms is limited to 0x7fffffff (roughly 24 days)"; diff --git a/test/brpc_channel_unittest.cpp b/test/brpc_channel_unittest.cpp index 86bee89105..f96650211e 100644 --- a/test/brpc_channel_unittest.cpp +++ b/test/brpc_channel_unittest.cpp @@ -3078,4 +3078,141 @@ TEST_F(ChannelTest, adaptive_protocol_type) { ASSERT_EQ("", ptype.param()); } +class RateLimitedBackupPolicyTest : public ::testing::Test {}; + +TEST_F(RateLimitedBackupPolicyTest, InvalidBackupRequestMs) { + brpc::RateLimitedBackupPolicyOptions opts; + opts.backup_request_ms = -2; + ASSERT_EQ(NULL, brpc::CreateRateLimitedBackupPolicy(opts)); +} + +TEST_F(RateLimitedBackupPolicyTest, InvalidMaxBackupRatioZero) { + brpc::RateLimitedBackupPolicyOptions opts; + opts.backup_request_ms = 100; + opts.max_backup_ratio = 0.0; + ASSERT_EQ(NULL, brpc::CreateRateLimitedBackupPolicy(opts)); +} + +TEST_F(RateLimitedBackupPolicyTest, InvalidMaxBackupRatioNegative) { + brpc::RateLimitedBackupPolicyOptions opts; + opts.backup_request_ms = 100; + opts.max_backup_ratio = -0.1; + ASSERT_EQ(NULL, brpc::CreateRateLimitedBackupPolicy(opts)); +} + +TEST_F(RateLimitedBackupPolicyTest, InvalidMaxBackupRatioAboveOne) { + brpc::RateLimitedBackupPolicyOptions opts; + opts.backup_request_ms = 100; + opts.max_backup_ratio = 1.001; + ASSERT_EQ(NULL, brpc::CreateRateLimitedBackupPolicy(opts)); +} + +TEST_F(RateLimitedBackupPolicyTest, InvalidWindowSizeTooSmall) { + brpc::RateLimitedBackupPolicyOptions opts; + opts.backup_request_ms = 100; + opts.window_size_seconds = 0; + ASSERT_EQ(NULL, brpc::CreateRateLimitedBackupPolicy(opts)); +} + +TEST_F(RateLimitedBackupPolicyTest, InvalidWindowSizeTooLarge) { + brpc::RateLimitedBackupPolicyOptions opts; + opts.backup_request_ms = 100; + opts.window_size_seconds = 3601; + ASSERT_EQ(NULL, brpc::CreateRateLimitedBackupPolicy(opts)); +} + +TEST_F(RateLimitedBackupPolicyTest, InvalidUpdateIntervalTooSmall) { + brpc::RateLimitedBackupPolicyOptions opts; + opts.backup_request_ms = 100; + opts.update_interval_seconds = 0; + ASSERT_EQ(NULL, brpc::CreateRateLimitedBackupPolicy(opts)); +} + +TEST_F(RateLimitedBackupPolicyTest, ValidMinusOneBackupRequestMsInherits) { + brpc::RateLimitedBackupPolicyOptions opts; + opts.backup_request_ms = -1; + std::unique_ptr p( + brpc::CreateRateLimitedBackupPolicy(opts)); + ASSERT_TRUE(p != NULL); + ASSERT_EQ(-1, p->GetBackupRequestMs(NULL)); +} + +TEST_F(RateLimitedBackupPolicyTest, ValidMaxRatioAtBoundary) { + brpc::RateLimitedBackupPolicyOptions opts; + opts.backup_request_ms = 50; + opts.max_backup_ratio = 1.0; + std::unique_ptr p( + brpc::CreateRateLimitedBackupPolicy(opts)); + ASSERT_TRUE(p != NULL); + // With max_backup_ratio=1.0 and true cold start (total==0, backup==0), + // ShouldAllow() sets ratio=0.0 (free pass). The conservative ratio=1.0 + // path only applies when backup>0 but total==0 (latency spike with no + // completions yet). At absolute cold start DoBackup() must return true. + ASSERT_TRUE(p->DoBackup(NULL)); // cold start: ratio=0.0 < 1.0, allow +} + +TEST_F(RateLimitedBackupPolicyTest, ColdStartAllowsBackup) { + brpc::RateLimitedBackupPolicyOptions opts; + opts.backup_request_ms = 10; + opts.max_backup_ratio = 0.1; + opts.update_interval_seconds = 1; + std::unique_ptr p( + brpc::CreateRateLimitedBackupPolicy(opts)); + ASSERT_TRUE(p != NULL); + ASSERT_TRUE(p->DoBackup(NULL)); +} + +// After the first backup fires (backup_count=1, total_count=0), once the +// update interval elapses the ratio is refreshed via the conservative path +// (total==0 → ratio=1.0), which exceeds max_backup_ratio < 1.0, so +// subsequent DoBackup() calls are suppressed until an RPC leg completes. +TEST_F(RateLimitedBackupPolicyTest, AfterColdStartBackupSuppressedUntilRpcCompletes) { + brpc::RateLimitedBackupPolicyOptions opts; + opts.backup_request_ms = 10; + opts.max_backup_ratio = 0.1; + opts.window_size_seconds = 1; + opts.update_interval_seconds = 1; + std::unique_ptr p( + brpc::CreateRateLimitedBackupPolicy(opts)); + ASSERT_TRUE(p != NULL); + // First call fires (cold start: total=0, backup=0 → ratio=0.0 → allow). + ASSERT_TRUE(p->DoBackup(NULL)); + // Wait for the update interval to elapse so the ratio refreshes. + // After refresh: total=0 but backup=1 → conservative path sets ratio=1.0, + // which is >= max_backup_ratio (0.1), so DoBackup() must return false. + bthread_usleep(1200000); // 1.2s > update_interval_seconds=1 + ASSERT_FALSE(p->DoBackup(NULL)); +} + +// After the ratio rises above the threshold, calling OnRPCEnd() many times +// drives total_count up relative to backup_count. Once the ratio refreshes +// below max_backup_ratio, DoBackup() should allow backups again. +TEST_F(RateLimitedBackupPolicyTest, OnRPCEndDrivesRatioDownAndReAllows) { + brpc::RateLimitedBackupPolicyOptions opts; + opts.backup_request_ms = 10; + opts.max_backup_ratio = 0.5; + opts.window_size_seconds = 1; + opts.update_interval_seconds = 1; + std::unique_ptr p( + brpc::CreateRateLimitedBackupPolicy(opts)); + ASSERT_TRUE(p != NULL); + // Fire many backup decisions so backup_count >> total_count, + // pushing the ratio above max_backup_ratio. + for (int i = 0; i < 20; ++i) { + p->DoBackup(NULL); + } + // Wait for update interval so the ratio is refreshed above threshold. + bthread_usleep(1200000); // 1.2s + ASSERT_FALSE(p->DoBackup(NULL)); + // Now complete many more RPCs than backups fired to bring ratio below 0.5. + // 20 backup decisions already counted; need total_count > 20/0.5 = 40. + for (int i = 0; i < 50; ++i) { + p->OnRPCEnd(NULL); + } + // Wait for the ratio cache to refresh. + bthread_usleep(1200000); // 1.2s + // Ratio is now ~20/50 = 0.4 < max_backup_ratio (0.5), so backup is re-allowed. + ASSERT_TRUE(p->DoBackup(NULL)); +} + } //namespace From 4fa7e962e0df0b022ebd6d50cf0a2668fa1150a5 Mon Sep 17 00:00:00 2001 From: yanyuan06 Date: Tue, 3 Mar 2026 17:41:07 +0800 Subject: [PATCH 32/84] Solve the issue of attachment being overwritten when backuprequest is triggered. --- src/brpc/selective_channel.cpp | 13 +++--- test/brpc_channel_unittest.cpp | 73 +++++++++++++++++++++++++++++++++- 2 files changed, 79 insertions(+), 7 deletions(-) diff --git a/src/brpc/selective_channel.cpp b/src/brpc/selective_channel.cpp index 567ffa51b8..a59580e321 100644 --- a/src/brpc/selective_channel.cpp +++ b/src/brpc/selective_channel.cpp @@ -364,12 +364,6 @@ void SubDone::Run() { << _cid.value << ": " << berror(rc); return; } - // NOTE: Copying gettable-but-settable fields which are generally set - // during the RPC to reflect details. - main_cntl->_remote_side = _cntl._remote_side; - // connection_type may be changed during CallMethod. - main_cntl->set_connection_type(_cntl.connection_type()); - main_cntl->response_attachment().swap(_cntl.response_attachment()); Resource r; r.response = _cntl._response; r.sub_done = this; @@ -377,6 +371,13 @@ void SubDone::Run() { return; } const int saved_error = main_cntl->ErrorCode(); + + // NOTE: Copying gettable-but-settable fields which are generally set + // during the RPC to reflect details. + main_cntl->_remote_side = _cntl._remote_side; + // connection_type may be changed during CallMethod. + main_cntl->set_connection_type(_cntl.connection_type()); + main_cntl->response_attachment().swap(_cntl.response_attachment()); if (_cntl.Failed()) { if (_cntl.ErrorCode() == ENODATA || _cntl.ErrorCode() == EHOSTDOWN) { diff --git a/test/brpc_channel_unittest.cpp b/test/brpc_channel_unittest.cpp index f96650211e..ad6670443a 100644 --- a/test/brpc_channel_unittest.cpp +++ b/test/brpc_channel_unittest.cpp @@ -176,6 +176,7 @@ class MyEchoService : public ::test::EchoService { res->add_code_list(req->code()); } res->set_receiving_socket_id(cntl->_current_call.sending_sock->id()); + if (mockfunc_) mockfunc_(cntl_base, req, res, done); brpc::ProtocolType protocol = cntl->request_protocol(); if ((brpc::PROTOCOL_HTTP == protocol || brpc::PROTOCOL_H2 == protocol) && @@ -198,6 +199,17 @@ class MyEchoService : public ::test::EchoService { EXPECT_TRUE(nullptr != request); EXPECT_TRUE(nullptr != response); } + +public: + using MockFuncType = void(google::protobuf::RpcController*, + const ::test::EchoRequest*, ::test::EchoResponse*, + google::protobuf::Closure*); + void SetMockFunc(std::function&& mockfunc) { + mockfunc_ = std::move(mockfunc); + } + +private: + std::function mockfunc_; }; pthread_once_t register_mock_protocol = PTHREAD_ONCE_INIT; @@ -1408,7 +1420,7 @@ class ChannelTest : public ::testing::Test{ SetUpChannel(subchan, single_server, short_connection); ASSERT_EQ(0, channel.AddChannel(subchan, NULL)) << "i=" << i; } - + brpc::Controller cntl; test::EchoRequest req; test::EchoResponse res; @@ -1427,6 +1439,55 @@ class ChannelTest : public ::testing::Test{ EXPECT_EQ(17, cntl.sub(0)->_real_timeout_ms); StopAndJoin(); } + + void TestBackupRequestSelective( + bool single_server, bool async, bool short_connection) { + std::cout << " *** single=" << single_server + << " async=" << async + << " short=" << short_connection << std::endl; + ASSERT_EQ(0, StartAccept(_ep)); + + const size_t NCHANS = 8; + brpc::SelectiveChannel channel; + ASSERT_EQ(0, channel.Init("rr", NULL)); + for (size_t i = 0; i < NCHANS; ++i) { + brpc::Channel* subchan = new brpc::Channel; + SetUpChannel(subchan, single_server, short_connection); + ASSERT_EQ(0, channel.AddChannel(subchan, NULL)) << "i=" << i; + } + + brpc::Controller cntl; + test::EchoRequest req; + test::EchoResponse res; + req.set_message(__FUNCTION__); + cntl.set_backup_request_ms(20); + cntl.set_timeout_ms(100); + std::atomic call_cnt(0); + _svc.SetMockFunc([&call_cnt](google::protobuf::RpcController* cntl_base, + const ::test::EchoRequest*, + ::test::EchoResponse*, + google::protobuf::Closure*) { + brpc::Controller* cntl = static_cast(cntl_base); + int see_cnt = call_cnt.fetch_add(1, std::memory_order_relaxed); + if (see_cnt == 0) { + LOG(INFO) << "slow node"; + bthread_usleep(30 * 1000); + } else { + LOG(INFO) << "normal node "; + butil::IOBuf iobuf; + iobuf.append("123"); + cntl->response_attachment().swap(iobuf); + } + }); + butil::Timer tm; + tm.start(); + CallMethod(&channel, &cntl, &req, &res, async); + tm.stop(); + EXPECT_FALSE(cntl.Failed()); + EXPECT_EQ(call_cnt.load(std::memory_order_relaxed), 2); + EXPECT_EQ(cntl.response_attachment().to_string(), "123"); + StopAndJoin(); + } void TestCloseFD(bool single_server, bool async, bool short_connection) { std::cout << " *** single=" << single_server @@ -2713,6 +2774,16 @@ TEST_F(ChannelTest, timeout_selective) { } } +TEST_F(ChannelTest, backuprequest_selective) { + for (int i = 0; i <= 1; ++i) { // Flag SingleServer + for (int j = 0; j <= 1; ++j) { // Flag Asynchronous + for (int k = 0; k <=1; ++k) { // Flag ShortConnection + TestBackupRequestSelective(i, j, k); + } + } + } +} + TEST_F(ChannelTest, close_fd) { for (int i = 0; i <= 1; ++i) { // Flag SingleServer for (int j = 0; j <= 1; ++j) { // Flag Asynchronous From e9ad004796897cf4f4c997eb62a93eeb36057b6f Mon Sep 17 00:00:00 2001 From: wayslog Date: Mon, 23 Feb 2026 01:11:53 +0800 Subject: [PATCH 33/84] feat(redis): add native Redis Cluster channel, tests and docs --- docs/cn/redis_client.md | 51 + docs/en/redis_client.md | 51 + example/redis_c++/CMakeLists.txt | 2 + example/redis_c++/Makefile | 15 +- example/redis_c++/redis_cluster_client.cpp | 119 ++ src/brpc/redis_cluster.cpp | 1190 ++++++++++++++++ src/brpc/redis_cluster.h | 183 +++ test/BUILD.bazel | 15 + test/brpc_redis_cluster_unittest.cpp | 1455 ++++++++++++++++++++ 9 files changed, 3079 insertions(+), 2 deletions(-) create mode 100644 example/redis_c++/redis_cluster_client.cpp create mode 100644 src/brpc/redis_cluster.cpp create mode 100644 src/brpc/redis_cluster.h create mode 100644 test/brpc_redis_cluster_unittest.cpp diff --git a/docs/cn/redis_client.md b/docs/cn/redis_client.md index 48873c9f8f..d0ac54458c 100644 --- a/docs/cn/redis_client.md +++ b/docs/cn/redis_client.md @@ -161,6 +161,55 @@ response中的所有reply的ownership属于response。当response析构时,rep 或者你可以沿用常见的[twemproxy](https://github.com/twitter/twemproxy)方案。这个方案虽然需要额外部署proxy,还增加了延时,但client端仍可以像访问单点一样的访问它。 +如果你要直接访问原生 Redis Cluster(按 slot 路由、自动处理 MOVED/ASK,并通过 `CLUSTER SLOTS`/`CLUSTER NODES` 刷新拓扑),可以使用 `brpc::RedisClusterChannel`: + +```c++ +#include + +brpc::RedisClusterChannel channel; +brpc::RedisClusterChannelOptions options; +options.max_redirect = 5; +if (channel.Init("127.0.0.1:7000,127.0.0.1:7001", &options) != 0) { + LOG(ERROR) << "Fail to init redis cluster channel"; +} +``` + +`RedisClusterChannel` 支持同步/异步 `CallMethod`、自动重定向重试和周期性拓扑刷新。多 key 支持 `MGET/MSET/DEL/EXISTS/UNLINK/EVAL/EVALSHA`,暂不支持 `MULTI/EXEC`。 + +## RedisClusterChannel 示例 + +`example/redis_c++/redis_cluster_client.cpp` 覆盖了以下常见场景: + +- 通过多个 seed 节点初始化; +- 自动处理 MOVED/ASK 重定向与重试; +- 先走 `CLUSTER SLOTS`,失败后回退 `CLUSTER NODES`; +- 同一个 channel 同时执行同步 pipeline 和异步请求。 + +构建并运行: + +```bash +cd example/redis_c++ +make redis_cluster_client +./redis_cluster_client \ + --seeds=127.0.0.1:7000,127.0.0.1:7001 \ + --max_redirect=5 \ + --timeout_ms=1000 +``` + +常用选项: + +- `RedisClusterChannelOptions::max_redirect`:单个命令的最大重定向次数; +- `RedisClusterChannelOptions::refresh_interval_s`:周期刷新拓扑的间隔; +- `RedisClusterChannelOptions::topology_refresh_timeout_ms`:拓扑命令超时; +- `RedisClusterChannelOptions::channel_options`:每个 redis 节点的通用 brpc 参数; +- `RedisClusterChannelOptions::enable_periodic_refresh`:是否启用后台周期刷新。 + +说明: + +- `MGET/MSET/DEL/EXISTS/UNLINK` 会按 key 分发后按请求顺序合并返回; +- `EVAL/EVALSHA` 要求声明的 key 位于同一 slot; +- `MULTI/EXEC` 当前会直接返回错误 reply。 + # 查看发出的请求和收到的回复 打开[-redis_verbose](http://brpc.baidu.com:8765/flags/redis_verbose)即看到所有的redis request和response,注意这应该只用于线下调试,而不是线上程序。 @@ -242,6 +291,8 @@ TRACE: 02-13 18:07:42: * 0 client.cpp:180] Accessing redis server at qps=75238 [example/redis_c++/redis_cli](https://github.com/apache/brpc/blob/master/example/redis_c%2B%2B/redis_cli.cpp)是一个类似于官方CLI的命令行工具,以展示brpc对redis协议的处理能力。当使用brpc访问redis-server出现不符合预期的行为时,也可以使用这个CLI进行交互式的调试。 +如果是原生 Redis Cluster 场景,可直接参考 [example/redis_c++/redis_cluster_client.cpp](https://github.com/apache/brpc/blob/master/example/redis_c%2B%2B/redis_cluster_client.cpp)。 + 和官方CLI类似,`redis_cli `也可以直接运行命令,-server参数可以指定redis-server的地址。 ``` diff --git a/docs/en/redis_client.md b/docs/en/redis_client.md index 093868b6a7..2dbc6bfdfc 100644 --- a/docs/en/redis_client.md +++ b/docs/en/redis_client.md @@ -162,6 +162,55 @@ Create a `Channel` using the consistent hashing as the load balancing algorithm( Another choice is to use the common [twemproxy](https://github.com/twitter/twemproxy) solution, which makes clients access the cluster just like accessing a single server, although the solution needs to deploy proxies and adds more latency. +For native Redis Cluster (slot based routing, MOVED/ASK redirection and topology refresh from `CLUSTER SLOTS`/`CLUSTER NODES`), use `brpc::RedisClusterChannel`: + +```c++ +#include + +brpc::RedisClusterChannel channel; +brpc::RedisClusterChannelOptions options; +options.max_redirect = 5; +if (channel.Init("127.0.0.1:7000,127.0.0.1:7001", &options) != 0) { + LOG(ERROR) << "Fail to init redis cluster channel"; +} +``` + +`RedisClusterChannel` supports synchronous/asynchronous `CallMethod`, automatic redirection retries and periodic topology refresh. Multi-key support includes `MGET/MSET/DEL/EXISTS/UNLINK/EVAL/EVALSHA`. `MULTI/EXEC` is currently not supported. + +## RedisClusterChannel example + +`example/redis_c++/redis_cluster_client.cpp` demonstrates: + +- bootstrap from multiple seed nodes. +- MOVED/ASK auto-redirection and retry. +- topology refresh from `CLUSTER SLOTS` with `CLUSTER NODES` fallback. +- sync pipeline and async calls using one channel. + +Build and run: + +```bash +cd example/redis_c++ +make redis_cluster_client +./redis_cluster_client \ + --seeds=127.0.0.1:7000,127.0.0.1:7001 \ + --max_redirect=5 \ + --timeout_ms=1000 +``` + +Frequently used options: + +- `RedisClusterChannelOptions::max_redirect`: max redirects per command. +- `RedisClusterChannelOptions::refresh_interval_s`: interval of periodic topology refresh. +- `RedisClusterChannelOptions::topology_refresh_timeout_ms`: timeout for topology commands. +- `RedisClusterChannelOptions::channel_options`: normal brpc channel options for each redis node. +- `RedisClusterChannelOptions::enable_periodic_refresh`: disable this when your app controls refresh explicitly. + +Notes: + +- `MGET/MSET/DEL/EXISTS/UNLINK` are executed per key and merged in request order. +- `EVAL/EVALSHA` requires all declared keys to be in one slot. +- `MULTI/EXEC` returns an error reply by design. + # Debug Turn on [-redis_verbose](http://brpc.baidu.com:8765/flags/redis_verbose) to print contents of all redis requests and responses. Note that this should only be used for debugging rather than online services. @@ -243,6 +292,8 @@ We can see a tremendous drop of QPS compared to the one using single connection [example/redis_c++/redis_cli](https://github.com/apache/brpc/blob/master/example/redis_c%2B%2B/redis_cli.cpp) is a command line tool similar to the official CLI, demostrating brpc's capability to talk with redis servers. When unexpected results are got from a redis-server using a brpc client, you can debug with this tool interactively as well. +For native Redis Cluster, you can start from [example/redis_c++/redis_cluster_client.cpp](https://github.com/apache/brpc/blob/master/example/redis_c%2B%2B/redis_cluster_client.cpp). + Like the official CLI, `redis_cli ` runs the command directly, and `-server` which is address of the redis-server can be specified. ``` diff --git a/example/redis_c++/CMakeLists.txt b/example/redis_c++/CMakeLists.txt index e29dcbeb45..af3bd59153 100644 --- a/example/redis_c++/CMakeLists.txt +++ b/example/redis_c++/CMakeLists.txt @@ -138,9 +138,11 @@ endif() add_executable(redis_cli redis_cli.cpp) add_executable(redis_press redis_press.cpp) add_executable(redis_server redis_server.cpp) +add_executable(redis_cluster_client redis_cluster_client.cpp) set(AUX_LIB readline ncurses) target_link_libraries(redis_cli ${BRPC_LIB} ${DYNAMIC_LIB} ${AUX_LIB}) target_link_libraries(redis_press ${BRPC_LIB} ${DYNAMIC_LIB}) target_link_libraries(redis_server ${BRPC_LIB} ${DYNAMIC_LIB}) +target_link_libraries(redis_cluster_client ${BRPC_LIB} ${DYNAMIC_LIB}) diff --git a/example/redis_c++/Makefile b/example/redis_c++/Makefile index 7c94e195db..c1225af18b 100644 --- a/example/redis_c++/Makefile +++ b/example/redis_c++/Makefile @@ -29,10 +29,12 @@ DYNAMIC_LINKINGS += -lreadline -lncurses PRESS_SOURCES = redis_press.cpp CLI_SOURCES = redis_cli.cpp SERVER_SOURCES = redis_server.cpp +CLUSTER_SOURCES = redis_cluster_client.cpp PRESS_OBJS = $(addsuffix .o, $(basename $(PRESS_SOURCES))) CLI_OBJS = $(addsuffix .o, $(basename $(CLI_SOURCES))) SERVER_OBJS = $(addsuffix .o, $(basename $(SERVER_SOURCES))) +CLUSTER_OBJS = $(addsuffix .o, $(basename $(CLUSTER_SOURCES))) ifeq ($(SYSTEM),Darwin) ifneq ("$(LINK_SO)", "") @@ -50,12 +52,13 @@ else ifeq ($(SYSTEM),Linux) endif .PHONY:all -all: redis_press redis_cli redis_server +all: redis_press redis_cli redis_server redis_cluster_client .PHONY:clean clean: @echo "> Cleaning" - rm -rf redis_press redis_cli $(PRESS_OBJS) $(CLI_OBJS) $(SERVER_OBJS) + rm -rf redis_press redis_cli redis_server redis_cluster_client \ + $(PRESS_OBJS) $(CLI_OBJS) $(SERVER_OBJS) $(CLUSTER_OBJS) redis_press:$(PRESS_OBJS) @echo "> Linking $@" @@ -81,6 +84,14 @@ else $(CXX) $(LIBPATHS) $(LINK_OPTIONS) -o $@ endif +redis_cluster_client:$(CLUSTER_OBJS) + @echo "> Linking $@" +ifneq ("$(LINK_SO)", "") + $(CXX) $(LIBPATHS) $(SOPATHS) $(LINK_OPTIONS_SO) -o $@ +else + $(CXX) $(LIBPATHS) $(LINK_OPTIONS) -o $@ +endif + %.o:%.cpp @echo "> Compiling $@" $(CXX) -c $(HDRPATHS) $(CXXFLAGS) $< -o $@ diff --git a/example/redis_c++/redis_cluster_client.cpp b/example/redis_c++/redis_cluster_client.cpp new file mode 100644 index 0000000000..2a9f43b46f --- /dev/null +++ b/example/redis_c++/redis_cluster_client.cpp @@ -0,0 +1,119 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// A basic client for native Redis Cluster using brpc::RedisClusterChannel. + +#include +#include +#include +#include +#include +#include + +DEFINE_string(seeds, "127.0.0.1:7000,127.0.0.1:7001", + "Comma-separated redis cluster seed endpoints"); +DEFINE_string(key_prefix, "brpc_cluster_demo", "Prefix for demo keys"); +DEFINE_int32(timeout_ms, 1000, "RPC timeout in milliseconds"); +DEFINE_int32(rpc_max_retry, 1, "Max retries for a single sub RPC"); +DEFINE_int32(max_redirect, 5, "Max MOVED/ASK redirect retries"); +DEFINE_int32(refresh_interval_s, 30, "Periodic topology refresh interval"); +DEFINE_int32(topology_refresh_timeout_ms, 1000, + "Timeout of CLUSTER SLOTS/NODES request"); +DEFINE_bool(disable_periodic_refresh, false, "Disable periodic topology refresh"); + +namespace { + +class Done : public google::protobuf::Closure { +public: + explicit Done(bthread::CountdownEvent* event) : _event(event) {} + void Run() override { _event->signal(); } + +private: + bthread::CountdownEvent* _event; +}; + +int PrintResponse(const brpc::RedisResponse& response) { + for (int i = 0; i < response.reply_size(); ++i) { + const brpc::RedisReply& reply = response.reply(i); + if (reply.is_error()) { + LOG(ERROR) << "reply[" << i << "] error=" << reply.error_message(); + return -1; + } + LOG(INFO) << "reply[" << i << "] " << reply; + } + return 0; +} + +} // namespace + +int main(int argc, char* argv[]) { + GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true); + + brpc::RedisClusterChannelOptions options; + options.max_redirect = FLAGS_max_redirect; + options.refresh_interval_s = FLAGS_refresh_interval_s; + options.enable_periodic_refresh = !FLAGS_disable_periodic_refresh; + options.topology_refresh_timeout_ms = FLAGS_topology_refresh_timeout_ms; + options.channel_options.timeout_ms = FLAGS_timeout_ms; + options.channel_options.max_retry = FLAGS_rpc_max_retry; + + brpc::RedisClusterChannel channel; + if (channel.Init(FLAGS_seeds, &options) != 0) { + LOG(ERROR) << "Fail to init redis cluster channel, seeds=" << FLAGS_seeds; + return -1; + } + + const std::string key1 = FLAGS_key_prefix + "_1"; + const std::string key2 = FLAGS_key_prefix + "_2"; + + // Sync pipeline. + brpc::RedisRequest request; + brpc::RedisResponse response; + brpc::Controller cntl; + CHECK(request.AddCommand("set %s v1", key1.c_str())); + CHECK(request.AddCommand("set %s v2", key2.c_str())); + CHECK(request.AddCommand("mget %s %s", key1.c_str(), key2.c_str())); + channel.CallMethod(NULL, &cntl, &request, &response, NULL); + if (cntl.Failed()) { + LOG(ERROR) << "Sync call failed: " << cntl.ErrorText(); + return -1; + } + if (PrintResponse(response) != 0) { + return -1; + } + + // Async single request. + brpc::RedisRequest async_request; + brpc::RedisResponse async_response; + brpc::Controller async_cntl; + CHECK(async_request.AddCommand("get %s", key1.c_str())); + + bthread::CountdownEvent event(1); + Done done(&event); + channel.CallMethod(NULL, &async_cntl, &async_request, &async_response, &done); + event.wait(); + if (async_cntl.Failed()) { + LOG(ERROR) << "Async call failed: " << async_cntl.ErrorText(); + return -1; + } + if (PrintResponse(async_response) != 0) { + return -1; + } + + LOG(INFO) << "Redis cluster demo finished"; + return 0; +} diff --git a/src/brpc/redis_cluster.cpp b/src/brpc/redis_cluster.cpp new file mode 100644 index 0000000000..d532e80c2f --- /dev/null +++ b/src/brpc/redis_cluster.cpp @@ -0,0 +1,1190 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "brpc/redis_cluster.h" + +#include +#include +#include + +#include +#include +#include + +#include "butil/endpoint.h" +#include "butil/logging.h" +#include "brpc/controller.h" +#include "brpc/redis_command.h" + +namespace brpc { +namespace { + +static const size_t kRedisClusterSlotCount = 16384; + +static const uint16_t kCrc16Table[256] = { + 0x0000,0x1021,0x2042,0x3063,0x4084,0x50A5,0x60C6,0x70E7, + 0x8108,0x9129,0xA14A,0xB16B,0xC18C,0xD1AD,0xE1CE,0xF1EF, + 0x1231,0x0210,0x3273,0x2252,0x52B5,0x4294,0x72F7,0x62D6, + 0x9339,0x8318,0xB37B,0xA35A,0xD3BD,0xC39C,0xF3FF,0xE3DE, + 0x2462,0x3443,0x0420,0x1401,0x64E6,0x74C7,0x44A4,0x5485, + 0xA56A,0xB54B,0x8528,0x9509,0xE5EE,0xF5CF,0xC5AC,0xD58D, + 0x3653,0x2672,0x1611,0x0630,0x76D7,0x66F6,0x5695,0x46B4, + 0xB75B,0xA77A,0x9719,0x8738,0xF7DF,0xE7FE,0xD79D,0xC7BC, + 0x48C4,0x58E5,0x6886,0x78A7,0x0840,0x1861,0x2802,0x3823, + 0xC9CC,0xD9ED,0xE98E,0xF9AF,0x8948,0x9969,0xA90A,0xB92B, + 0x5AF5,0x4AD4,0x7AB7,0x6A96,0x1A71,0x0A50,0x3A33,0x2A12, + 0xDBFD,0xCBDC,0xFBBF,0xEB9E,0x9B79,0x8B58,0xBB3B,0xAB1A, + 0x6CA6,0x7C87,0x4CE4,0x5CC5,0x2C22,0x3C03,0x0C60,0x1C41, + 0xEDAE,0xFD8F,0xCDEC,0xDDCD,0xAD2A,0xBD0B,0x8D68,0x9D49, + 0x7E97,0x6EB6,0x5ED5,0x4EF4,0x3E13,0x2E32,0x1E51,0x0E70, + 0xFF9F,0xEFBE,0xDFDD,0xCFFC,0xBF1B,0xAF3A,0x9F59,0x8F78, + 0x9188,0x81A9,0xB1CA,0xA1EB,0xD10C,0xC12D,0xF14E,0xE16F, + 0x1080,0x00A1,0x30C2,0x20E3,0x5004,0x4025,0x7046,0x6067, + 0x83B9,0x9398,0xA3FB,0xB3DA,0xC33D,0xD31C,0xE37F,0xF35E, + 0x02B1,0x1290,0x22F3,0x32D2,0x4235,0x5214,0x6277,0x7256, + 0xB5EA,0xA5CB,0x95A8,0x8589,0xF56E,0xE54F,0xD52C,0xC50D, + 0x34E2,0x24C3,0x14A0,0x0481,0x7466,0x6447,0x5424,0x4405, + 0xA7DB,0xB7FA,0x8799,0x97B8,0xE75F,0xF77E,0xC71D,0xD73C, + 0x26D3,0x36F2,0x0691,0x16B0,0x6657,0x7676,0x4615,0x5634, + 0xD94C,0xC96D,0xF90E,0xE92F,0x99C8,0x89E9,0xB98A,0xA9AB, + 0x5844,0x4865,0x7806,0x6827,0x18C0,0x08E1,0x3882,0x28A3, + 0xCB7D,0xDB5C,0xEB3F,0xFB1E,0x8BF9,0x9BD8,0xABBB,0xBB9A, + 0x4A75,0x5A54,0x6A37,0x7A16,0x0AF1,0x1AD0,0x2AB3,0x3A92, + 0xFD2E,0xED0F,0xDD6C,0xCD4D,0xBDAA,0xAD8B,0x9DE8,0x8DC9, + 0x7C26,0x6C07,0x5C64,0x4C45,0x3CA2,0x2C83,0x1CE0,0x0CC1, + 0xEF1F,0xFF3E,0xCF5D,0xDF7C,0xAF9B,0xBFBA,0x8FD9,0x9FF8, + 0x6E17,0x7E36,0x4E55,0x5E74,0x2E93,0x3EB2,0x0ED1,0x1EF0 +}; + +static std::string Trim(const std::string& in) { + size_t begin = 0; + while (begin < in.size() && isspace(static_cast(in[begin]))) { + ++begin; + } + size_t end = in.size(); + while (end > begin && isspace(static_cast(in[end - 1]))) { + --end; + } + return in.substr(begin, end - begin); +} + +static std::vector SplitByChar(const std::string& text, char delim) { + std::vector out; + std::string current; + std::stringstream ss(text); + while (std::getline(ss, current, delim)) { + current = Trim(current); + if (!current.empty()) { + out.push_back(current); + } + } + return out; +} + +static std::vector SplitByWhitespace(const std::string& text) { + std::vector out; + std::stringstream ss(text); + std::string token; + while (ss >> token) { + out.push_back(token); + } + return out; +} + +static std::string EndpointHost(const std::string& endpoint) { + std::string ep = endpoint; + if (!ep.empty() && ep[0] == '[') { + const size_t right = ep.find(']'); + if (right != std::string::npos) { + return ep.substr(1, right - 1); + } + return ep; + } + const size_t p = ep.rfind(':'); + if (p == std::string::npos) { + return ep; + } + return ep.substr(0, p); +} + +static bool EncodeReply(const RedisReply& reply, butil::IOBuf* out) { + butil::IOBufAppender appender; + // RedisReply::SerializeTo does not support REDIS_REPLY_NIL directly. + // Encode nil as a null bulk string so response parsing can consume it. + if (reply.type() == REDIS_REPLY_NIL) { + appender.append("$-1\r\n", 5); + appender.move_to(*out); + return true; + } + if (!const_cast(reply).SerializeTo(&appender)) { + return false; + } + appender.move_to(*out); + return true; +} + +} // namespace + +RedisClusterChannelOptions::RedisClusterChannelOptions() + : max_redirect(5) + , refresh_interval_s(30) + , enable_periodic_refresh(true) + , topology_refresh_timeout_ms(1000) { + channel_options.protocol = brpc::PROTOCOL_REDIS; +} + +RedisClusterChannel::SingleCommandResult::SingleCommandResult() + : ok(false) + , is_status_ok(false) + , integer_value(0) + , is_error(false) { +} + +struct RedisClusterChannel::AsyncCall { + RedisClusterChannel* self; + Controller* cntl; + const RedisRequest* request; + RedisResponse* response; + google::protobuf::Closure* done; +}; + +RedisClusterChannel::RedisClusterChannel() + : _stop_refresh(false) + , _refresh_started(false) + , _refresh_tid(0) { + _slot_to_endpoint.resize(kRedisClusterSlotCount); +} + +RedisClusterChannel::~RedisClusterChannel() { + _stop_refresh.store(true); + if (_refresh_started) { + bthread_join(_refresh_tid, NULL); + } +} + +int RedisClusterChannel::Init(const std::string& seed_nodes, + const RedisClusterChannelOptions* options) { + if (seed_nodes.empty()) { + LOG(ERROR) << "seed_nodes is empty"; + return -1; + } + + RedisClusterChannelOptions resolved; + if (options) { + resolved = *options; + } + resolved.channel_options.protocol = brpc::PROTOCOL_REDIS; + _options = resolved; + + const std::vector seeds = SplitByChar(seed_nodes, ','); + if (seeds.empty()) { + LOG(ERROR) << "No valid seed endpoint in " << seed_nodes; + return -1; + } + + { + BAIDU_SCOPED_LOCK(_mutex); + _seed_endpoints = seeds; + } + + for (size_t i = 0; i < seeds.size(); ++i) { + if (GetOrCreateChannel(seeds[i]) == NULL) { + LOG(WARNING) << "Fail to init seed channel=" << seeds[i]; + } + } + + if (!RefreshTopology()) { + LOG(ERROR) << "Fail to fetch redis cluster topology from seeds"; + return -1; + } + + if (_options.enable_periodic_refresh && _options.refresh_interval_s > 0) { + _stop_refresh.store(false); + if (bthread_start_background(&_refresh_tid, NULL, + RedisClusterChannel::RunPeriodicRefresh, + this) == 0) { + _refresh_started = true; + } else { + LOG(WARNING) << "Fail to start periodic refresh bthread"; + } + } + return 0; +} + +void RedisClusterChannel::CallMethod( + const google::protobuf::MethodDescriptor* /*method*/, + google::protobuf::RpcController* controller_base, + const google::protobuf::Message* request_base, + google::protobuf::Message* response_base, + google::protobuf::Closure* done) { + Controller* cntl = static_cast(controller_base); + if (cntl == NULL) { + LOG(ERROR) << "controller is NULL"; + if (done) { + done->Run(); + } + return; + } + + if (request_base == NULL || + request_base->GetDescriptor() != RedisRequest::descriptor()) { + cntl->SetFailed(EREQUEST, "request must be RedisRequest"); + if (done) { + done->Run(); + } + return; + } + if (response_base == NULL || + response_base->GetDescriptor() != RedisResponse::descriptor()) { + cntl->SetFailed(ERESPONSE, "response must be RedisResponse"); + if (done) { + done->Run(); + } + return; + } + + const RedisRequest* request = static_cast(request_base); + RedisResponse* response = static_cast(response_base); + + if (done == NULL) { + CallMethodImpl(cntl, *request, response); + return; + } + + AsyncCall* ac = new (std::nothrow) AsyncCall; + if (ac == NULL) { + cntl->SetFailed(ENOMEM, "Fail to allocate async context"); + done->Run(); + return; + } + ac->self = this; + ac->cntl = cntl; + ac->request = request; + ac->response = response; + ac->done = done; + + bthread_t tid; + if (bthread_start_background(&tid, NULL, RedisClusterChannel::RunAsyncCall, ac) != 0) { + delete ac; + CallMethodImpl(cntl, *request, response); + done->Run(); + } +} + +bool RedisClusterChannel::CallMethodImpl(Controller* cntl, + const RedisRequest& request, + RedisResponse* response) { + std::vector commands; + if (!ParseRequest(request, &commands, cntl)) { + return false; + } + if (commands.empty()) { + cntl->SetFailed(EREQUEST, "request has no redis command"); + return false; + } + + std::vector replies(commands.size()); + for (size_t i = 0; i < commands.size(); ++i) { + if (!ExecuteCommand(commands[i], &replies[i], cntl)) { + return false; + } + } + + butil::IOBuf merged; + for (size_t i = 0; i < replies.size(); ++i) { + merged.append(replies[i]); + } + + response->Clear(); + ParseError err = response->ConsumePartialIOBuf(merged, static_cast(commands.size())); + if (err != PARSE_OK || !merged.empty()) { + cntl->SetFailed(ERESPONSE, "Fail to parse merged redis response"); + return false; + } + return true; +} + +bool RedisClusterChannel::ParseRequest(const RedisRequest& request, + std::vector* commands, + Controller* cntl) const { + commands->clear(); + + butil::IOBuf serialized; + if (!request.SerializeTo(&serialized)) { + cntl->SetFailed(EREQUEST, "Fail to serialize redis request"); + return false; + } + + RedisCommandParser parser; + butil::Arena arena; + + while (!serialized.empty()) { + std::vector args; + ParseError err = parser.Consume(serialized, &args, &arena); + if (err != PARSE_OK) { + cntl->SetFailed(EREQUEST, "Fail to parse redis request (err=%d)", err); + return false; + } + ParsedCommand cmd; + cmd.args.reserve(args.size()); + for (size_t i = 0; i < args.size(); ++i) { + cmd.args.push_back(args[i].as_string()); + } + commands->push_back(cmd); + } + return true; +} + +bool RedisClusterChannel::ExecuteCommand(const ParsedCommand& cmd, + butil::IOBuf* encoded_reply, + Controller* cntl) { + if (cmd.args.empty()) { + cntl->SetFailed(EREQUEST, "Empty redis command"); + return false; + } + const std::string& name = cmd.args[0]; + + if (name == "multi" || name == "exec") { + AppendErrorReply(encoded_reply, + "ERR MULTI/EXEC is not supported by RedisClusterChannel"); + return true; + } + if (name == "mget") { + return ExecuteMGet(cmd, encoded_reply, cntl); + } + if (name == "mset") { + return ExecuteMSet(cmd, encoded_reply, cntl); + } + if (name == "del" || name == "exists" || name == "unlink") { + return ExecuteIntegerAggregate(cmd, encoded_reply, cntl); + } + if (name == "eval" || name == "evalsha") { + return ExecuteEvalLike(cmd, encoded_reply, cntl); + } + + SingleCommandResult result; + if (!ExecuteSingleCommand(cmd.args, NULL, &result, cntl)) { + return false; + } + encoded_reply->swap(result.encoded_reply); + return true; +} + +bool RedisClusterChannel::ExecuteSingleCommand(const std::vector& args, + const std::string* forced_endpoint, + SingleCommandResult* result, + Controller* cntl) { + if (args.empty()) { + cntl->SetFailed(EREQUEST, "Empty redis command"); + return false; + } + + std::string endpoint; + int key_slot = -1; + if (forced_endpoint != NULL) { + endpoint = *forced_endpoint; + } else if (!IsNoKeyCommand(args[0]) && args.size() >= 2) { + if (!PickEndpointForKey(args[1], &endpoint, &key_slot)) { + RefreshTopology(); + if (!PickEndpointForKey(args[1], &endpoint, &key_slot)) { + cntl->SetFailed(EHOSTDOWN, "No endpoint found for key"); + return false; + } + } + } else { + if (!PickAnyEndpoint(&endpoint)) { + RefreshTopology(); + if (!PickAnyEndpoint(&endpoint)) { + cntl->SetFailed(EHOSTDOWN, "No endpoint available in redis cluster"); + return false; + } + } + } + + bool asking = false; + std::string next_endpoint = endpoint; + const int max_redirect = std::max(_options.max_redirect, 0); + for (int i = 0; i <= max_redirect; ++i) { + RedirectInfo redirect; + if (!SendToEndpoint(next_endpoint, args, asking, result, &redirect, cntl)) { + return false; + } + if (!redirect.valid) { + return true; + } + + if (!redirect.endpoint.empty()) { + next_endpoint = redirect.endpoint; + GetOrCreateChannel(next_endpoint); + } + if (redirect.slot >= 0 && static_cast(redirect.slot) < _slot_to_endpoint.size() && + !redirect.endpoint.empty()) { + BAIDU_SCOPED_LOCK(_mutex); + _slot_to_endpoint[redirect.slot] = redirect.endpoint; + } + + if (!redirect.asking) { + RefreshTopology(); + } + asking = redirect.asking; + } + + cntl->SetFailed(ERESPONSE, "Too many redis cluster redirects"); + return false; +} + +bool RedisClusterChannel::ExecuteMGet(const ParsedCommand& cmd, + butil::IOBuf* encoded_reply, + Controller* cntl) { + if (cmd.args.size() < 2) { + AppendErrorReply(encoded_reply, + "ERR wrong number of arguments for 'mget' command"); + return true; + } + + std::vector values; + values.reserve(cmd.args.size() - 1); + for (size_t i = 1; i < cmd.args.size(); ++i) { + std::vector sub_args; + sub_args.push_back("get"); + sub_args.push_back(cmd.args[i]); + SingleCommandResult sub_result; + if (!ExecuteSingleCommand(sub_args, NULL, &sub_result, cntl)) { + return false; + } + values.push_back(sub_result.encoded_reply); + } + + AppendArrayHeader(encoded_reply, values.size()); + for (size_t i = 0; i < values.size(); ++i) { + encoded_reply->append(values[i]); + } + return true; +} + +bool RedisClusterChannel::ExecuteMSet(const ParsedCommand& cmd, + butil::IOBuf* encoded_reply, + Controller* cntl) { + if (cmd.args.size() < 3 || ((cmd.args.size() - 1) % 2 != 0)) { + AppendErrorReply(encoded_reply, + "ERR wrong number of arguments for 'mset' command"); + return true; + } + + for (size_t i = 1; i + 1 < cmd.args.size(); i += 2) { + std::vector sub_args; + sub_args.push_back("set"); + sub_args.push_back(cmd.args[i]); + sub_args.push_back(cmd.args[i + 1]); + + SingleCommandResult sub_result; + if (!ExecuteSingleCommand(sub_args, NULL, &sub_result, cntl)) { + return false; + } + if (sub_result.is_error || !sub_result.is_status_ok) { + encoded_reply->swap(sub_result.encoded_reply); + return true; + } + } + + AppendStatusReply(encoded_reply, "OK"); + return true; +} + +bool RedisClusterChannel::ExecuteIntegerAggregate(const ParsedCommand& cmd, + butil::IOBuf* encoded_reply, + Controller* cntl) { + if (cmd.args.size() < 2) { + AppendErrorReply(encoded_reply, + "ERR wrong number of arguments"); + return true; + } + + int64_t total = 0; + for (size_t i = 1; i < cmd.args.size(); ++i) { + std::vector sub_args; + sub_args.push_back(cmd.args[0]); + sub_args.push_back(cmd.args[i]); + + SingleCommandResult sub_result; + if (!ExecuteSingleCommand(sub_args, NULL, &sub_result, cntl)) { + return false; + } + if (sub_result.is_error) { + encoded_reply->swap(sub_result.encoded_reply); + return true; + } + total += sub_result.integer_value; + } + + AppendIntegerReply(encoded_reply, total); + return true; +} + +bool RedisClusterChannel::ExecuteEvalLike(const ParsedCommand& cmd, + butil::IOBuf* encoded_reply, + Controller* cntl) { + if (cmd.args.size() < 3) { + AppendErrorReply(encoded_reply, + "ERR wrong number of arguments for eval/evalsha"); + return true; + } + + int64_t numkeys = 0; + if (!ParseInt(cmd.args[2], &numkeys) || numkeys < 0) { + AppendErrorReply(encoded_reply, + "ERR invalid numkeys for eval/evalsha"); + return true; + } + + if (cmd.args.size() < static_cast(3 + numkeys)) { + AppendErrorReply(encoded_reply, + "ERR not enough keys for eval/evalsha"); + return true; + } + + std::string forced_endpoint; + if (numkeys > 0) { + const std::string tag_key = cmd.args[3]; + const int first_slot = HashSlot(tag_key); + for (int64_t i = 1; i < numkeys; ++i) { + if (HashSlot(cmd.args[3 + i]) != first_slot) { + AppendErrorReply(encoded_reply, + "CROSSSLOT Keys in request don't hash to the same slot"); + return true; + } + } + + int slot = -1; + if (!PickEndpointForKey(tag_key, &forced_endpoint, &slot)) { + RefreshTopology(); + if (!PickEndpointForKey(tag_key, &forced_endpoint, &slot)) { + cntl->SetFailed(EHOSTDOWN, "No endpoint found for eval/evalsha"); + return false; + } + } + } + + SingleCommandResult result; + if (!ExecuteSingleCommand(cmd.args, + numkeys > 0 ? &forced_endpoint : NULL, + &result, + cntl)) { + return false; + } + encoded_reply->swap(result.encoded_reply); + return true; +} + +bool RedisClusterChannel::PickEndpointForKey(const std::string& key, + std::string* endpoint, + int* slot) const { + const int key_slot = HashSlot(key); + BAIDU_SCOPED_LOCK(_mutex); + if (key_slot < 0 || static_cast(key_slot) >= _slot_to_endpoint.size()) { + return false; + } + const std::string& mapped = _slot_to_endpoint[key_slot]; + if (mapped.empty()) { + return false; + } + *endpoint = mapped; + if (slot != NULL) { + *slot = key_slot; + } + return true; +} + +bool RedisClusterChannel::PickAnyEndpoint(std::string* endpoint) const { + BAIDU_SCOPED_LOCK(_mutex); + for (std::unordered_map >::const_iterator + it = _channels.begin(); it != _channels.end(); ++it) { + *endpoint = it->first; + return true; + } + if (!_seed_endpoints.empty()) { + *endpoint = _seed_endpoints.front(); + return true; + } + return false; +} + +bool RedisClusterChannel::SendToEndpoint(const std::string& endpoint, + const std::vector& args, + bool asking, + SingleCommandResult* result, + RedirectInfo* redirect, + Controller* cntl) { + result->ok = false; + redirect->valid = false; + redirect->asking = false; + redirect->slot = -1; + redirect->endpoint.clear(); + + Channel* channel = GetOrCreateChannel(endpoint); + if (channel == NULL) { + cntl->SetFailed(EHOSTDOWN, "Fail to get channel for %s", endpoint.c_str()); + return false; + } + + RedisRequest request; + if (asking) { + if (!request.AddCommand("asking")) { + cntl->SetFailed(EREQUEST, "Fail to build ASKING command"); + return false; + } + std::vector components; + components.reserve(args.size()); + for (size_t i = 0; i < args.size(); ++i) { + components.push_back(args[i]); + } + if (!request.AddCommandByComponents(&components[0], components.size())) { + cntl->SetFailed(EREQUEST, "Fail to build redis command"); + return false; + } + } else { + if (!BuildRedisRequest(args, &request)) { + cntl->SetFailed(EREQUEST, "Fail to build redis command"); + return false; + } + } + + RedisResponse response; + Controller sub_cntl; + if (cntl->timeout_ms() > 0) { + sub_cntl.set_timeout_ms(cntl->timeout_ms()); + } + channel->CallMethod(NULL, &sub_cntl, &request, &response, NULL); + if (sub_cntl.Failed()) { + cntl->SetFailed(sub_cntl.ErrorCode(), + "Redis cluster sub-request to %s failed: %s", + endpoint.c_str(), + sub_cntl.ErrorText().c_str()); + return false; + } + + const int expected = asking ? 2 : 1; + if (response.reply_size() != expected) { + cntl->SetFailed(ERESPONSE, + "Unexpected redis response size=%d expected=%d", + response.reply_size(), + expected); + return false; + } + + const RedisReply& selected = response.reply(asking ? 1 : 0); + if (!EncodeReply(selected, &result->encoded_reply)) { + cntl->SetFailed(ERESPONSE, "Fail to encode redis reply"); + return false; + } + + result->ok = true; + result->is_error = selected.is_error(); + result->is_status_ok = (selected.type() == REDIS_REPLY_STATUS && + selected.data() == "OK"); + result->integer_value = selected.is_integer() ? selected.integer() : 0; + if (result->is_error) { + result->error_text = selected.error_message(); + } else { + result->error_text.clear(); + } + + ParseRedirectReply(*result, redirect); + return true; +} + +bool RedisClusterChannel::RefreshTopology() { + std::vector candidates; + { + BAIDU_SCOPED_LOCK(_mutex); + candidates = _seed_endpoints; + for (std::unordered_map >::const_iterator + it = _channels.begin(); it != _channels.end(); ++it) { + candidates.push_back(it->first); + } + } + + std::sort(candidates.begin(), candidates.end()); + candidates.erase(std::unique(candidates.begin(), candidates.end()), candidates.end()); + + for (size_t i = 0; i < candidates.size(); ++i) { + if (RefreshTopologyFromEndpoint(candidates[i])) { + return true; + } + } + return false; +} + +bool RedisClusterChannel::RefreshTopologyFromEndpoint(const std::string& endpoint) { + Channel* channel = GetOrCreateChannel(endpoint); + if (channel == NULL) { + return false; + } + + std::vector slot_to_endpoint(kRedisClusterSlotCount); + std::vector discovered; + if (FetchAndParseClusterSlots(channel, endpoint, + &slot_to_endpoint, &discovered)) { + ApplyTopology(slot_to_endpoint, discovered); + return true; + } + + slot_to_endpoint.assign(kRedisClusterSlotCount, std::string()); + discovered.clear(); + if (FetchAndParseClusterNodes(channel, &slot_to_endpoint, &discovered)) { + ApplyTopology(slot_to_endpoint, discovered); + return true; + } + return false; +} + +bool RedisClusterChannel::FetchAndParseClusterSlots( + Channel* channel, + const std::string& endpoint, + std::vector* slot_to_endpoint, + std::vector* discovered_endpoints) { + RedisRequest request; + if (!request.AddCommand("cluster slots")) { + return false; + } + + RedisResponse response; + Controller cntl; + cntl.set_timeout_ms(_options.topology_refresh_timeout_ms); + channel->CallMethod(NULL, &cntl, &request, &response, NULL); + if (cntl.Failed()) { + return false; + } + + if (response.reply_size() != 1 || !response.reply(0).is_array()) { + return false; + } + + const RedisReply& root = response.reply(0); + const std::string fallback_host = EndpointHost(endpoint); + bool has_slot = false; + for (size_t i = 0; i < root.size(); ++i) { + const RedisReply& item = root[i]; + if (!item.is_array() || item.size() < 3) { + continue; + } + if (!item[0].is_integer() || !item[1].is_integer()) { + continue; + } + + int64_t start = item[0].integer(); + int64_t end = item[1].integer(); + if (start < 0 || end < start) { + continue; + } + + if (!item[2].is_array() || item[2].size() < 2 || !item[2][1].is_integer()) { + continue; + } + std::string host; + if (item[2][0].is_string()) { + host = item[2][0].data().as_string(); + } + if (host.empty()) { + host = fallback_host; + } + + const int64_t port = item[2][1].integer(); + if (port <= 0 || port > 65535) { + continue; + } + + std::ostringstream oss; + oss << host << ":" << port; + const std::string master_endpoint = oss.str(); + discovered_endpoints->push_back(master_endpoint); + + start = std::max(start, 0); + end = std::min(end, static_cast(kRedisClusterSlotCount - 1)); + for (int64_t slot = start; slot <= end; ++slot) { + (*slot_to_endpoint)[slot] = master_endpoint; + has_slot = true; + } + } + return has_slot; +} + +bool RedisClusterChannel::FetchAndParseClusterNodes( + Channel* channel, + std::vector* slot_to_endpoint, + std::vector* discovered_endpoints) { + RedisRequest request; + if (!request.AddCommand("cluster nodes")) { + return false; + } + + RedisResponse response; + Controller cntl; + cntl.set_timeout_ms(_options.topology_refresh_timeout_ms); + channel->CallMethod(NULL, &cntl, &request, &response, NULL); + if (cntl.Failed()) { + return false; + } + + if (response.reply_size() != 1 || !response.reply(0).is_string()) { + return false; + } + + bool has_slot = false; + const std::string payload = response.reply(0).data().as_string(); + const std::vector lines = SplitByChar(payload, '\n'); + for (size_t i = 0; i < lines.size(); ++i) { + std::string line = lines[i]; + if (!line.empty() && line[line.size() - 1] == '\r') { + line.resize(line.size() - 1); + } + const std::vector fields = SplitByWhitespace(line); + if (fields.size() < 8) { + continue; + } + + const std::string& flags = fields[2]; + if (flags.find("master") == std::string::npos) { + continue; + } + if (flags.find("fail") != std::string::npos || + flags.find("handshake") != std::string::npos || + flags.find("noaddr") != std::string::npos) { + continue; + } + + std::string endpoint; + if (!ParseRedisNodeAddress(fields[1], &endpoint)) { + continue; + } + discovered_endpoints->push_back(endpoint); + + for (size_t j = 8; j < fields.size(); ++j) { + const std::string& slot_token = fields[j]; + if (slot_token.empty() || + slot_token[0] == '[' || + slot_token.find("->") != std::string::npos || + slot_token.find("<-") != std::string::npos) { + continue; + } + + size_t dash = slot_token.find('-'); + int64_t start = 0; + int64_t end = 0; + if (dash == std::string::npos) { + if (!ParseInt(slot_token, &start)) { + continue; + } + end = start; + } else { + if (!ParseInt(slot_token.substr(0, dash), &start) || + !ParseInt(slot_token.substr(dash + 1), &end)) { + continue; + } + } + + if (start < 0 || end < start) { + continue; + } + start = std::max(start, 0); + end = std::min(end, static_cast(kRedisClusterSlotCount - 1)); + for (int64_t slot = start; slot <= end; ++slot) { + (*slot_to_endpoint)[slot] = endpoint; + has_slot = true; + } + } + } + return has_slot; +} + +void RedisClusterChannel::ApplyTopology( + const std::vector& slot_to_endpoint, + const std::vector& discovered_endpoints) { + std::set unique_eps; + for (size_t i = 0; i < discovered_endpoints.size(); ++i) { + if (!discovered_endpoints[i].empty()) { + unique_eps.insert(discovered_endpoints[i]); + } + } + for (size_t i = 0; i < slot_to_endpoint.size(); ++i) { + if (!slot_to_endpoint[i].empty()) { + unique_eps.insert(slot_to_endpoint[i]); + } + } + + for (std::set::const_iterator it = unique_eps.begin(); + it != unique_eps.end(); ++it) { + GetOrCreateChannel(*it); + } + + BAIDU_SCOPED_LOCK(_mutex); + _slot_to_endpoint = slot_to_endpoint; + for (std::set::const_iterator it = unique_eps.begin(); + it != unique_eps.end(); ++it) { + if (std::find(_seed_endpoints.begin(), _seed_endpoints.end(), *it) == + _seed_endpoints.end()) { + _seed_endpoints.push_back(*it); + } + } +} + +Channel* RedisClusterChannel::GetOrCreateChannel(const std::string& endpoint) { + { + BAIDU_SCOPED_LOCK(_mutex); + std::unordered_map >::iterator it = + _channels.find(endpoint); + if (it != _channels.end()) { + return it->second.get(); + } + } + + std::unique_ptr new_channel(new (std::nothrow) Channel); + if (!new_channel) { + return NULL; + } + ChannelOptions options = _options.channel_options; + options.protocol = brpc::PROTOCOL_REDIS; + if (new_channel->Init(endpoint.c_str(), &options) != 0) { + return NULL; + } + + BAIDU_SCOPED_LOCK(_mutex); + std::unordered_map >::iterator it = + _channels.find(endpoint); + if (it != _channels.end()) { + return it->second.get(); + } + Channel* ptr = new_channel.get(); + _channels[endpoint] = std::move(new_channel); + return ptr; +} + +uint16_t RedisClusterChannel::HashSlot(const std::string& key) { + const std::string hashed = ExtractHashtag(key); + uint16_t crc = 0; + for (size_t i = 0; i < hashed.size(); ++i) { + const uint8_t idx = static_cast((crc >> 8) ^ + static_cast(hashed[i])); + crc = static_cast((crc << 8) ^ kCrc16Table[idx]); + } + return crc & (kRedisClusterSlotCount - 1); +} + +std::string RedisClusterChannel::ExtractHashtag(const std::string& key) { + const size_t begin = key.find('{'); + if (begin == std::string::npos) { + return key; + } + const size_t end = key.find('}', begin + 1); + if (end == std::string::npos || end == begin + 1) { + return key; + } + return key.substr(begin + 1, end - begin - 1); +} + +bool RedisClusterChannel::ParseRedirectReply(const SingleCommandResult& result, + RedirectInfo* redirect) { + redirect->valid = false; + redirect->asking = false; + redirect->slot = -1; + redirect->endpoint.clear(); + + if (!result.is_error || result.error_text.empty()) { + return false; + } + + std::vector fields = SplitByWhitespace(result.error_text); + if (fields.size() < 3) { + return false; + } + + if (fields[0] == "MOVED") { + redirect->asking = false; + } else if (fields[0] == "ASK") { + redirect->asking = true; + } else { + return false; + } + + int64_t slot = -1; + if (!ParseInt(fields[1], &slot)) { + return false; + } + std::string endpoint; + if (!ParseRedisNodeAddress(fields[2], &endpoint)) { + return false; + } + + redirect->valid = true; + redirect->slot = static_cast(slot); + redirect->endpoint = endpoint; + return true; +} + +bool RedisClusterChannel::BuildRedisRequest(const std::vector& args, + RedisRequest* request) { + request->Clear(); + if (args.empty()) { + return false; + } + std::vector components; + components.reserve(args.size()); + for (size_t i = 0; i < args.size(); ++i) { + components.push_back(args[i]); + } + return request->AddCommandByComponents(&components[0], components.size()); +} + +void RedisClusterChannel::AppendIntegerReply(butil::IOBuf* buf, int64_t value) { + std::ostringstream oss; + oss << ':' << value << "\r\n"; + buf->append(oss.str()); +} + +void RedisClusterChannel::AppendStatusReply(butil::IOBuf* buf, + const std::string& value) { + buf->push_back('+'); + buf->append(value); + buf->append("\r\n"); +} + +void RedisClusterChannel::AppendErrorReply(butil::IOBuf* buf, + const std::string& value) { + buf->push_back('-'); + buf->append(value); + buf->append("\r\n"); +} + +void RedisClusterChannel::AppendArrayHeader(butil::IOBuf* buf, size_t size) { + std::ostringstream oss; + oss << '*' << size << "\r\n"; + buf->append(oss.str()); +} + +bool RedisClusterChannel::ParseRedisNodeAddress(const std::string& token, + std::string* endpoint) { + if (token.empty()) { + return false; + } + std::string t = token; + + const size_t comma = t.find(','); + if (comma != std::string::npos) { + t.resize(comma); + } + const size_t at = t.find('@'); + if (at != std::string::npos) { + t.resize(at); + } + + std::string host; + int64_t port = 0; + if (!t.empty() && t[0] == '[') { + const size_t r = t.find(']'); + if (r == std::string::npos || r + 2 > t.size() || t[r + 1] != ':') { + return false; + } + host = t.substr(1, r - 1); + if (!ParseInt(t.substr(r + 2), &port)) { + return false; + } + std::ostringstream oss; + oss << '[' << host << "]:" << port; + *endpoint = oss.str(); + return true; + } + + const size_t pos = t.rfind(':'); + if (pos == std::string::npos) { + return false; + } + host = t.substr(0, pos); + if (!ParseInt(t.substr(pos + 1), &port)) { + return false; + } + if (host.empty() || port <= 0 || port > 65535) { + return false; + } + + std::ostringstream oss; + oss << host << ':' << port; + *endpoint = oss.str(); + return true; +} + +bool RedisClusterChannel::ParseInt(const std::string& s, int64_t* out) { + if (s.empty()) { + return false; + } + char* end = NULL; + errno = 0; + const long long value = strtoll(s.c_str(), &end, 10); + if (errno != 0 || end != s.c_str() + s.size()) { + return false; + } + *out = value; + return true; +} + +bool RedisClusterChannel::IsNoKeyCommand(const std::string& cmd) { + return (cmd == "ping" || cmd == "info" || cmd == "auth" || + cmd == "select" || cmd == "echo" || cmd == "time" || + cmd == "dbsize" || cmd == "cluster"); +} + +void* RedisClusterChannel::RunPeriodicRefresh(void* arg) { + RedisClusterChannel* self = static_cast(arg); + while (!self->_stop_refresh.load()) { + const int interval_s = std::max(self->_options.refresh_interval_s, 1); + int64_t remain_us = interval_s * 1000000L; + while (remain_us > 0 && !self->_stop_refresh.load()) { + const int64_t step_us = std::min(remain_us, 100000L); + bthread_usleep(step_us); + remain_us -= step_us; + } + if (self->_stop_refresh.load()) { + break; + } + self->RefreshTopology(); + } + return NULL; +} + +void* RedisClusterChannel::RunAsyncCall(void* arg) { + std::unique_ptr ac(static_cast(arg)); + ac->self->CallMethodImpl(ac->cntl, *ac->request, ac->response); + ac->done->Run(); + return NULL; +} + +int RedisClusterChannel::CheckHealth() { + std::string endpoint; + return PickAnyEndpoint(&endpoint) ? 0 : -1; +} + +int RedisClusterChannel::Weight() { + std::set unique; + BAIDU_SCOPED_LOCK(_mutex); + for (size_t i = 0; i < _slot_to_endpoint.size(); ++i) { + if (!_slot_to_endpoint[i].empty()) { + unique.insert(_slot_to_endpoint[i]); + } + } + return static_cast(unique.size()); +} + +} // namespace brpc diff --git a/src/brpc/redis_cluster.h b/src/brpc/redis_cluster.h new file mode 100644 index 0000000000..47d399bc46 --- /dev/null +++ b/src/brpc/redis_cluster.h @@ -0,0 +1,183 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_REDIS_CLUSTER_H +#define BRPC_REDIS_CLUSTER_H + +#include + +#include +#include +#include +#include +#include + +#include "bthread/bthread.h" +#include "butil/synchronization/lock.h" +#include "brpc/channel.h" +#include "brpc/channel_base.h" +#include "brpc/redis.h" + +namespace brpc { + +struct RedisClusterChannelOptions { + RedisClusterChannelOptions(); + + ChannelOptions channel_options; + int max_redirect; + int refresh_interval_s; + bool enable_periodic_refresh; + int topology_refresh_timeout_ms; +}; + +// Channel implementation for Redis Cluster. +class RedisClusterChannel : public ChannelBase { +public: + RedisClusterChannel(); + ~RedisClusterChannel() override; + + DISALLOW_COPY_AND_ASSIGN(RedisClusterChannel); + + int Init(const std::string& seed_nodes, + const RedisClusterChannelOptions* options = NULL); + + void CallMethod(const google::protobuf::MethodDescriptor* method, + google::protobuf::RpcController* controller, + const google::protobuf::Message* request, + google::protobuf::Message* response, + google::protobuf::Closure* done) override; + + int CheckHealth() override; + int Weight() override; + +private: + struct ParsedCommand { + std::vector args; + }; + + struct RedirectInfo { + bool valid; + bool asking; + int slot; + std::string endpoint; + }; + + struct SingleCommandResult { + bool ok; + butil::IOBuf encoded_reply; + bool is_status_ok; + int64_t integer_value; + bool is_error; + std::string error_text; + SingleCommandResult(); + }; + + struct AsyncCall; + + static void* RunPeriodicRefresh(void* arg); + static void* RunAsyncCall(void* arg); + + bool CallMethodImpl(Controller* cntl, + const RedisRequest& request, + RedisResponse* response); + + bool ParseRequest(const RedisRequest& request, + std::vector* commands, + Controller* cntl) const; + + bool ExecuteCommand(const ParsedCommand& cmd, + butil::IOBuf* encoded_reply, + Controller* cntl); + + bool ExecuteSingleCommand(const std::vector& args, + const std::string* forced_endpoint, + SingleCommandResult* result, + Controller* cntl); + + bool ExecuteMGet(const ParsedCommand& cmd, + butil::IOBuf* encoded_reply, + Controller* cntl); + bool ExecuteMSet(const ParsedCommand& cmd, + butil::IOBuf* encoded_reply, + Controller* cntl); + bool ExecuteIntegerAggregate(const ParsedCommand& cmd, + butil::IOBuf* encoded_reply, + Controller* cntl); + bool ExecuteEvalLike(const ParsedCommand& cmd, + butil::IOBuf* encoded_reply, + Controller* cntl); + + bool PickEndpointForKey(const std::string& key, std::string* endpoint, int* slot) const; + bool PickAnyEndpoint(std::string* endpoint) const; + + bool SendToEndpoint(const std::string& endpoint, + const std::vector& args, + bool asking, + SingleCommandResult* result, + RedirectInfo* redirect, + Controller* cntl); + + bool RefreshTopology(); + bool RefreshTopologyFromEndpoint(const std::string& endpoint); + bool FetchAndParseClusterSlots(Channel* channel, + const std::string& endpoint, + std::vector* slot_to_endpoint, + std::vector* discovered_endpoints); + bool FetchAndParseClusterNodes(Channel* channel, + std::vector* slot_to_endpoint, + std::vector* discovered_endpoints); + + void ApplyTopology(const std::vector& slot_to_endpoint, + const std::vector& discovered_endpoints); + + Channel* GetOrCreateChannel(const std::string& endpoint); + + static uint16_t HashSlot(const std::string& key); + static std::string ExtractHashtag(const std::string& key); + static bool ParseRedirectReply(const SingleCommandResult& result, + RedirectInfo* redirect); + + static bool BuildRedisRequest(const std::vector& args, + RedisRequest* request); + + static void AppendIntegerReply(butil::IOBuf* buf, int64_t value); + static void AppendStatusReply(butil::IOBuf* buf, const std::string& value); + static void AppendErrorReply(butil::IOBuf* buf, const std::string& value); + static void AppendArrayHeader(butil::IOBuf* buf, size_t size); + + static bool ParseRedisNodeAddress(const std::string& token, + std::string* endpoint); + static bool ParseInt(const std::string& s, int64_t* out); + + static bool IsNoKeyCommand(const std::string& cmd); + +private: + RedisClusterChannelOptions _options; + + mutable butil::Mutex _mutex; + std::vector _slot_to_endpoint; + std::vector _seed_endpoints; + std::unordered_map > _channels; + + std::atomic _stop_refresh; + bool _refresh_started; + bthread_t _refresh_tid; +}; + +} // namespace brpc + +#endif // BRPC_REDIS_CLUSTER_H diff --git a/test/BUILD.bazel b/test/BUILD.bazel index 66aef4259e..b68b3fa08a 100644 --- a/test/BUILD.bazel +++ b/test/BUILD.bazel @@ -282,6 +282,21 @@ cc_test( ], ) +cc_test( + name = "brpc_redis_cluster_test", + srcs = [ + "brpc_redis_cluster_unittest.cpp", + ], + copts = COPTS, + deps = [ + ":sstream_workaround", + ":gperftools_helper", + "//:brpc", + "@com_google_googletest//:gtest", + "@com_google_googletest//:gtest_main", + ], +) + refresh_compile_commands( name = "brpc_test_compdb", # Specify the targets of interest. diff --git a/test/brpc_redis_cluster_unittest.cpp b/test/brpc_redis_cluster_unittest.cpp new file mode 100644 index 0000000000..7a639b1bd8 --- /dev/null +++ b/test/brpc_redis_cluster_unittest.cpp @@ -0,0 +1,1455 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bthread/bthread.h" +#include + +#include "bthread/countdown_event.h" +#include "butil/synchronization/lock.h" +#include "brpc/channel.h" +#include "brpc/redis.h" +#include "brpc/redis_cluster.h" +#include "brpc/server.h" + +namespace { + +const int kSplitSlot = 8191; + +uint16_t HashSlot(const std::string& key) { + // Keep this aligned with implementation in redis_cluster.cpp. + static const uint16_t table[256] = { + 0x0000,0x1021,0x2042,0x3063,0x4084,0x50A5,0x60C6,0x70E7, + 0x8108,0x9129,0xA14A,0xB16B,0xC18C,0xD1AD,0xE1CE,0xF1EF, + 0x1231,0x0210,0x3273,0x2252,0x52B5,0x4294,0x72F7,0x62D6, + 0x9339,0x8318,0xB37B,0xA35A,0xD3BD,0xC39C,0xF3FF,0xE3DE, + 0x2462,0x3443,0x0420,0x1401,0x64E6,0x74C7,0x44A4,0x5485, + 0xA56A,0xB54B,0x8528,0x9509,0xE5EE,0xF5CF,0xC5AC,0xD58D, + 0x3653,0x2672,0x1611,0x0630,0x76D7,0x66F6,0x5695,0x46B4, + 0xB75B,0xA77A,0x9719,0x8738,0xF7DF,0xE7FE,0xD79D,0xC7BC, + 0x48C4,0x58E5,0x6886,0x78A7,0x0840,0x1861,0x2802,0x3823, + 0xC9CC,0xD9ED,0xE98E,0xF9AF,0x8948,0x9969,0xA90A,0xB92B, + 0x5AF5,0x4AD4,0x7AB7,0x6A96,0x1A71,0x0A50,0x3A33,0x2A12, + 0xDBFD,0xCBDC,0xFBBF,0xEB9E,0x9B79,0x8B58,0xBB3B,0xAB1A, + 0x6CA6,0x7C87,0x4CE4,0x5CC5,0x2C22,0x3C03,0x0C60,0x1C41, + 0xEDAE,0xFD8F,0xCDEC,0xDDCD,0xAD2A,0xBD0B,0x8D68,0x9D49, + 0x7E97,0x6EB6,0x5ED5,0x4EF4,0x3E13,0x2E32,0x1E51,0x0E70, + 0xFF9F,0xEFBE,0xDFDD,0xCFFC,0xBF1B,0xAF3A,0x9F59,0x8F78, + 0x9188,0x81A9,0xB1CA,0xA1EB,0xD10C,0xC12D,0xF14E,0xE16F, + 0x1080,0x00A1,0x30C2,0x20E3,0x5004,0x4025,0x7046,0x6067, + 0x83B9,0x9398,0xA3FB,0xB3DA,0xC33D,0xD31C,0xE37F,0xF35E, + 0x02B1,0x1290,0x22F3,0x32D2,0x4235,0x5214,0x6277,0x7256, + 0xB5EA,0xA5CB,0x95A8,0x8589,0xF56E,0xE54F,0xD52C,0xC50D, + 0x34E2,0x24C3,0x14A0,0x0481,0x7466,0x6447,0x5424,0x4405, + 0xA7DB,0xB7FA,0x8799,0x97B8,0xE75F,0xF77E,0xC71D,0xD73C, + 0x26D3,0x36F2,0x0691,0x16B0,0x6657,0x7676,0x4615,0x5634, + 0xD94C,0xC96D,0xF90E,0xE92F,0x99C8,0x89E9,0xB98A,0xA9AB, + 0x5844,0x4865,0x7806,0x6827,0x18C0,0x08E1,0x3882,0x28A3, + 0xCB7D,0xDB5C,0xEB3F,0xFB1E,0x8BF9,0x9BD8,0xABBB,0xBB9A, + 0x4A75,0x5A54,0x6A37,0x7A16,0x0AF1,0x1AD0,0x2AB3,0x3A92, + 0xFD2E,0xED0F,0xDD6C,0xCD4D,0xBDAA,0xAD8B,0x9DE8,0x8DC9, + 0x7C26,0x6C07,0x5C64,0x4C45,0x3CA2,0x2C83,0x1CE0,0x0CC1, + 0xEF1F,0xFF3E,0xCF5D,0xDF7C,0xAF9B,0xBFBA,0x8FD9,0x9FF8, + 0x6E17,0x7E36,0x4E55,0x5E74,0x2E93,0x3EB2,0x0ED1,0x1EF0 + }; + + std::string hashed = key; + size_t begin = key.find('{'); + if (begin != std::string::npos) { + size_t end = key.find('}', begin + 1); + if (end != std::string::npos && end > begin + 1) { + hashed = key.substr(begin + 1, end - begin - 1); + } + } + uint16_t crc = 0; + for (size_t i = 0; i < hashed.size(); ++i) { + uint8_t idx = static_cast((crc >> 8) ^ + static_cast(hashed[i])); + crc = static_cast((crc << 8) ^ table[idx]); + } + return crc & 16383; +} + +int OwnerBySlot(int slot) { + return slot <= kSplitSlot ? 0 : 1; +} + +struct ClusterMeta { + std::string endpoint[2]; + bool fail_slots; + bool fail_nodes; + bool slots_empty_host; + std::unordered_map owner_override; + std::unordered_map forced_error_by_key; + std::atomic slots_calls; + std::atomic nodes_calls; + std::atomic moved_error_calls; + std::atomic ask_error_calls; + std::string custom_nodes_payload; + + bool enable_ask; + std::string ask_key; + int ask_from; + int ask_to; + + std::string redirect_loop_key; + + ClusterMeta() + : fail_slots(false) + , fail_nodes(false) + , slots_empty_host(false) + , slots_calls(0) + , nodes_calls(0) + , moved_error_calls(0) + , ask_error_calls(0) + , enable_ask(false) + , ask_from(0) + , ask_to(1) { + } + + int OwnerOfKey(const std::string& key) const { + std::unordered_map::const_iterator it = owner_override.find(key); + if (it != owner_override.end()) { + return it->second; + } + return OwnerBySlot(HashSlot(key)); + } +}; + +struct NodeData { + int node_id; + ClusterMeta* meta; + butil::Mutex mutex; + std::unordered_map kv; +}; + +class Session : public brpc::Destroyable { +public: + Session() : asking(false) {} + void Destroy() override { delete this; } + bool asking; +}; + +static Session* GetOrCreateSession(brpc::RedisConnContext* ctx) { + if (ctx == NULL) { + return NULL; + } + Session* s = static_cast(ctx->get_session()); + if (s == NULL) { + s = new Session; + ctx->reset_session(s); + } + return s; +} + +static bool ParseEndpoint(const std::string& endpoint, std::string* host, int* port) { + size_t pos = endpoint.rfind(':'); + if (pos == std::string::npos) { + return false; + } + *host = endpoint.substr(0, pos); + *port = atoi(endpoint.substr(pos + 1).c_str()); + return (*port > 0); +} + +class AskingHandler : public brpc::RedisCommandHandler { +public: + brpc::RedisCommandHandlerResult Run(brpc::RedisConnContext* ctx, + const std::vector& /*args*/, + brpc::RedisReply* output, + bool /*flush_batched*/) override { + Session* s = GetOrCreateSession(ctx); + if (s != NULL) { + s->asking = true; + } + output->SetStatus("OK"); + return brpc::REDIS_CMD_HANDLED; + } +}; + +class ClusterCommandHandler : public brpc::RedisCommandHandler { +public: + explicit ClusterCommandHandler(ClusterMeta* meta) : _meta(meta) {} + + brpc::RedisCommandHandlerResult Run(brpc::RedisConnContext* /*ctx*/, + const std::vector& args, + brpc::RedisReply* output, + bool /*flush_batched*/) override { + if (args.size() < 2) { + output->SetError("ERR wrong number of arguments for 'cluster' command"); + return brpc::REDIS_CMD_HANDLED; + } + if (args[1] == "slots") { + _meta->slots_calls.fetch_add(1, std::memory_order_relaxed); + if (_meta->fail_slots) { + output->SetError("ERR cluster slots disabled for test"); + return brpc::REDIS_CMD_HANDLED; + } + output->SetArray(2); + FillSlotEntry((*output)[0], 0, kSplitSlot, _meta->endpoint[0], + _meta->slots_empty_host); + FillSlotEntry((*output)[1], kSplitSlot + 1, 16383, _meta->endpoint[1], + _meta->slots_empty_host); + return brpc::REDIS_CMD_HANDLED; + } + if (args[1] == "nodes") { + _meta->nodes_calls.fetch_add(1, std::memory_order_relaxed); + if (_meta->fail_nodes) { + output->SetError("ERR cluster nodes disabled for test"); + return brpc::REDIS_CMD_HANDLED; + } + if (!_meta->custom_nodes_payload.empty()) { + output->SetString(_meta->custom_nodes_payload); + return brpc::REDIS_CMD_HANDLED; + } + std::ostringstream oss; + oss << "node0 " << _meta->endpoint[0] << "@17000 master - 0 0 1 connected 0-" + << kSplitSlot << "\n"; + oss << "node1 " << _meta->endpoint[1] << "@17001 master - 0 0 1 connected " + << (kSplitSlot + 1) << "-16383\n"; + output->SetString(oss.str()); + return brpc::REDIS_CMD_HANDLED; + } + output->SetError("ERR unsupported CLUSTER subcommand"); + return brpc::REDIS_CMD_HANDLED; + } + +private: + static void FillSlotEntry(brpc::RedisReply& reply, int start, int end, + const std::string& endpoint, + bool empty_host) { + std::string host; + int port = 0; + ParseEndpoint(endpoint, &host, &port); + reply.SetArray(3); + reply[0].SetInteger(start); + reply[1].SetInteger(end); + reply[2].SetArray(2); + if (empty_host) { + reply[2][0].SetString(""); + } else { + reply[2][0].SetString(host); + } + reply[2][1].SetInteger(port); + } + +private: + ClusterMeta* _meta; +}; + +class KVCommandHandler : public brpc::RedisCommandHandler { +public: + explicit KVCommandHandler(NodeData* data) : _data(data) {} + + brpc::RedisCommandHandlerResult Run(brpc::RedisConnContext* ctx, + const std::vector& args, + brpc::RedisReply* output, + bool /*flush_batched*/) override { + if (args.empty()) { + output->SetError("ERR empty command"); + return brpc::REDIS_CMD_HANDLED; + } + const std::string command = args[0].as_string(); + if (command == "ping") { + output->SetStatus("PONG"); + return brpc::REDIS_CMD_HANDLED; + } + if (command == "eval" || command == "evalsha") { + output->SetStatus("OK"); + return brpc::REDIS_CMD_HANDLED; + } + if (args.size() < 2) { + output->SetError("ERR wrong number of arguments"); + return brpc::REDIS_CMD_HANDLED; + } + + const std::string key = args[1].as_string(); + const int slot = HashSlot(key); + const int owner = _data->meta->OwnerOfKey(key); + + std::unordered_map::const_iterator forced = + _data->meta->forced_error_by_key.find(key); + if (forced != _data->meta->forced_error_by_key.end()) { + output->SetError(forced->second); + return brpc::REDIS_CMD_HANDLED; + } + + if (!_data->meta->redirect_loop_key.empty() && + key == _data->meta->redirect_loop_key) { + const int target = 1 - _data->node_id; + _data->meta->moved_error_calls.fetch_add(1, std::memory_order_relaxed); + output->FormatError("MOVED %d %s", slot, + _data->meta->endpoint[target].c_str()); + return brpc::REDIS_CMD_HANDLED; + } + + bool bypass_owner_check = false; + if (_data->meta->enable_ask && key == _data->meta->ask_key) { + if (_data->node_id == _data->meta->ask_from) { + _data->meta->ask_error_calls.fetch_add(1, std::memory_order_relaxed); + output->FormatError("ASK %d %s", slot, + _data->meta->endpoint[_data->meta->ask_to].c_str()); + return brpc::REDIS_CMD_HANDLED; + } + if (_data->node_id == _data->meta->ask_to) { + Session* s = GetOrCreateSession(ctx); + if (s == NULL || !s->asking) { + output->SetError("ERR ASKING required"); + return brpc::REDIS_CMD_HANDLED; + } + s->asking = false; + bypass_owner_check = true; + } + } + + if (!bypass_owner_check && owner != _data->node_id) { + _data->meta->moved_error_calls.fetch_add(1, std::memory_order_relaxed); + output->FormatError("MOVED %d %s", slot, _data->meta->endpoint[owner].c_str()); + return brpc::REDIS_CMD_HANDLED; + } + + if (command == "set") { + if (args.size() < 3) { + output->SetError("ERR wrong number of arguments for 'set' command"); + return brpc::REDIS_CMD_HANDLED; + } + BAIDU_SCOPED_LOCK(_data->mutex); + _data->kv[key] = args[2].as_string(); + output->SetStatus("OK"); + return brpc::REDIS_CMD_HANDLED; + } + if (command == "get") { + BAIDU_SCOPED_LOCK(_data->mutex); + std::unordered_map::iterator it = _data->kv.find(key); + if (it == _data->kv.end()) { + output->SetNullString(); + } else { + output->SetString(it->second); + } + return brpc::REDIS_CMD_HANDLED; + } + if (command == "del" || command == "unlink") { + BAIDU_SCOPED_LOCK(_data->mutex); + output->SetInteger(_data->kv.erase(key) ? 1 : 0); + return brpc::REDIS_CMD_HANDLED; + } + if (command == "exists") { + BAIDU_SCOPED_LOCK(_data->mutex); + output->SetInteger(_data->kv.count(key) ? 1 : 0); + return brpc::REDIS_CMD_HANDLED; + } + + output->SetError("ERR unsupported command"); + return brpc::REDIS_CMD_HANDLED; + } + +private: + NodeData* _data; +}; + +class ClusterRedisService : public brpc::RedisService { +public: + explicit ClusterRedisService(NodeData* data) { + AddCommandHandler("asking", new AskingHandler()); + AddCommandHandler("cluster", new ClusterCommandHandler(data->meta)); + + KVCommandHandler* handler = new KVCommandHandler(data); + AddCommandHandler("ping", handler); + AddCommandHandler("get", handler); + AddCommandHandler("set", handler); + AddCommandHandler("del", handler); + AddCommandHandler("exists", handler); + AddCommandHandler("unlink", handler); + AddCommandHandler("eval", handler); + AddCommandHandler("evalsha", handler); + } +}; + +class Done : public google::protobuf::Closure { +public: + explicit Done(bthread::CountdownEvent* e) : _event(e) {} + void Run() override { _event->signal(); } +private: + bthread::CountdownEvent* _event; +}; + +class RedisClusterChannelTest : public testing::Test { +protected: + void SetUp() override { + _meta.reset(new ClusterMeta); + _node[0].meta = _meta.get(); + _node[0].node_id = 0; + _node[1].meta = _meta.get(); + _node[1].node_id = 1; + + StartServer(0); + StartServer(1); + } + + void TearDown() override { + for (int i = 0; i < 2; ++i) { + _server[i].Stop(0); + } + for (int i = 0; i < 2; ++i) { + _server[i].Join(); + } + } + + std::string SeedList() const { + return _meta->endpoint[0] + "," + _meta->endpoint[1]; + } + + void InitChannel(brpc::RedisClusterChannel* channel, int max_redirect = 5) { + brpc::RedisClusterChannelOptions options; + options.max_redirect = max_redirect; + options.enable_periodic_refresh = false; + ASSERT_EQ(0, channel->Init(SeedList(), &options)); + } + + std::string FindKeyForNode(int node_id) const { + for (int i = 0; i < 200000; ++i) { + std::ostringstream oss; + oss << "key_" << node_id << '_' << i; + if (OwnerBySlot(HashSlot(oss.str())) == node_id) { + return oss.str(); + } + } + return "fallback_key"; + } + + std::vector FindKeysForNode(int node_id, size_t count) const { + std::vector keys; + keys.reserve(count); + for (int i = 0; i < 400000 && keys.size() < count; ++i) { + std::ostringstream oss; + oss << "key_batch_" << node_id << '_' << i; + if (OwnerBySlot(HashSlot(oss.str())) == node_id) { + keys.push_back(oss.str()); + } + } + return keys; + } + + std::string FindHashTagForNode(int node_id) const { + for (int i = 0; i < 200000; ++i) { + std::ostringstream oss; + oss << "tag_" << node_id << '_' << i; + const std::string key = "{" + oss.str() + "}"; + if (OwnerBySlot(HashSlot(key)) == node_id) { + return oss.str(); + } + } + return "fallback_tag"; + } + +private: + void StartServer(int index) { + brpc::ServerOptions options; + options.redis_service = new ClusterRedisService(&_node[index]); + brpc::PortRange range(20000 + index * 1000, 20999 + index * 1000); + ASSERT_EQ(0, _server[index].Start("127.0.0.1", range, &options)); + std::ostringstream oss; + oss << "127.0.0.1:" << _server[index].listen_address().port; + _meta->endpoint[index] = oss.str(); + } + +protected: + std::unique_ptr _meta; + NodeData _node[2]; + brpc::Server _server[2]; +}; + +TEST_F(RedisClusterChannelTest, basic_routing_and_multi_key_commands) { + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + const std::string key0 = FindKeyForNode(0); + const std::string key1 = FindKeyForNode(1); + + { + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("mset %s v0 %s v1", key0.c_str(), key1.c_str())); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(1, resp.reply_size()); + ASSERT_TRUE(resp.reply(0).is_string()); + ASSERT_EQ("OK", resp.reply(0).data()); + } + + { + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("mget %s %s", key0.c_str(), key1.c_str())); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(1, resp.reply_size()); + ASSERT_TRUE(resp.reply(0).is_array()); + ASSERT_EQ(2u, resp.reply(0).size()); + ASSERT_EQ("v0", resp.reply(0)[0].data()); + ASSERT_EQ("v1", resp.reply(0)[1].data()); + } + + { + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("exists %s %s", key0.c_str(), key1.c_str())); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(2, resp.reply(0).integer()); + } + + { + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("del %s %s", key0.c_str(), key1.c_str())); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(2, resp.reply(0).integer()); + } +} + +TEST_F(RedisClusterChannelTest, moved_redirection) { + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + const std::string moved_key = FindKeyForNode(0); + _meta->owner_override[moved_key] = 1; + { + BAIDU_SCOPED_LOCK(_node[1].mutex); + _node[1].kv[moved_key] = "moved-value"; + } + + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("get %s", moved_key.c_str())); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(1, resp.reply_size()); + ASSERT_TRUE(resp.reply(0).is_string()); + ASSERT_EQ("moved-value", resp.reply(0).data()); +} + +TEST_F(RedisClusterChannelTest, ask_redirection) { + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + _meta->enable_ask = true; + _meta->ask_from = 0; + _meta->ask_to = 1; + _meta->ask_key = FindKeyForNode(0); + { + BAIDU_SCOPED_LOCK(_node[1].mutex); + _node[1].kv[_meta->ask_key] = "ask-value"; + } + + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("get %s", _meta->ask_key.c_str())); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(1, resp.reply_size()); + ASSERT_TRUE(resp.reply(0).is_string()); + ASSERT_EQ("ask-value", resp.reply(0).data()); +} + +TEST_F(RedisClusterChannelTest, cluster_nodes_fallback) { + _meta->fail_slots = true; + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + const std::string key = FindKeyForNode(1); + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("set %s vv", key.c_str())); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_TRUE(resp.reply(0).is_string()); + ASSERT_EQ("OK", resp.reply(0).data()); +} + +TEST_F(RedisClusterChannelTest, eval_and_evalsha) { + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + const std::string key0 = FindKeyForNode(0); + const std::string key1 = FindKeyForNode(1); + + { + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + const butil::StringPiece parts[] = { + "eval", "return 1", "2", key0, key1 + }; + ASSERT_TRUE(req.AddCommandByComponents(parts, sizeof(parts) / sizeof(parts[0]))); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(1, resp.reply_size()); + ASSERT_TRUE(resp.reply(0).is_error()); + ASSERT_NE(std::string::npos, + std::string(resp.reply(0).error_message()).find("CROSSSLOT")); + } + + { + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("evalsha abcdef 1 %s arg1", key0.c_str())); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(1, resp.reply_size()); + ASSERT_TRUE(resp.reply(0).is_string()); + ASSERT_EQ("OK", resp.reply(0).data()); + } +} + +TEST_F(RedisClusterChannelTest, redirect_retry_limit) { + brpc::RedisClusterChannel channel; + InitChannel(&channel, 3); + + _meta->redirect_loop_key = FindKeyForNode(0); + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("get %s", _meta->redirect_loop_key.c_str())); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + ASSERT_TRUE(cntl.Failed()); + ASSERT_NE(std::string::npos, cntl.ErrorText().find("redirect")); +} + +TEST_F(RedisClusterChannelTest, async_call) { + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + const std::string key = FindKeyForNode(1); + { + BAIDU_SCOPED_LOCK(_node[1].mutex); + _node[1].kv[key] = "async-value"; + } + + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("get %s", key.c_str())); + + bthread::CountdownEvent event(1); + Done done(&event); + channel.CallMethod(NULL, &cntl, &req, &resp, &done); + event.wait(); + + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(1, resp.reply_size()); + ASSERT_TRUE(resp.reply(0).is_string()); + ASSERT_EQ("async-value", resp.reply(0).data()); +} + +TEST_F(RedisClusterChannelTest, pipeline_order_with_mixed_commands) { + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + const std::string key0 = FindKeyForNode(0); + const std::string key1 = FindKeyForNode(1); + + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("set %s va", key0.c_str())); + ASSERT_TRUE(req.AddCommand("set %s vb", key1.c_str())); + ASSERT_TRUE(req.AddCommand("mget %s %s", key0.c_str(), key1.c_str())); + ASSERT_TRUE(req.AddCommand("exists %s %s", key0.c_str(), key1.c_str())); + ASSERT_TRUE(req.AddCommand("unlink %s %s", key0.c_str(), key1.c_str())); + ASSERT_TRUE(req.AddCommand("mget %s %s", key0.c_str(), key1.c_str())); + + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(6, resp.reply_size()); + ASSERT_EQ("OK", resp.reply(0).data()); + ASSERT_EQ("OK", resp.reply(1).data()); + + ASSERT_TRUE(resp.reply(2).is_array()); + ASSERT_EQ(2u, resp.reply(2).size()); + ASSERT_EQ("va", resp.reply(2)[0].data()); + ASSERT_EQ("vb", resp.reply(2)[1].data()); + + ASSERT_TRUE(resp.reply(3).is_integer()); + ASSERT_EQ(2, resp.reply(3).integer()); + ASSERT_TRUE(resp.reply(4).is_integer()); + ASSERT_EQ(2, resp.reply(4).integer()); + + ASSERT_TRUE(resp.reply(5).is_array()); + ASSERT_EQ(2u, resp.reply(5).size()); + ASSERT_TRUE(resp.reply(5)[0].is_nil()); + ASSERT_TRUE(resp.reply(5)[1].is_nil()); +} + +TEST_F(RedisClusterChannelTest, transaction_commands_are_not_supported) { + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("multi")); + ASSERT_TRUE(req.AddCommand("exec")); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(2, resp.reply_size()); + ASSERT_TRUE(resp.reply(0).is_error()); + ASSERT_TRUE(resp.reply(1).is_error()); + ASSERT_NE(std::string::npos, + std::string(resp.reply(0).error_message()).find("not supported")); +} + +TEST_F(RedisClusterChannelTest, eval_argument_validation) { + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + { + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + const butil::StringPiece parts[] = { + "eval", "return 1", "abc", "k1" + }; + ASSERT_TRUE(req.AddCommandByComponents(parts, sizeof(parts) / sizeof(parts[0]))); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(1, resp.reply_size()); + ASSERT_TRUE(resp.reply(0).is_error()); + ASSERT_NE(std::string::npos, + std::string(resp.reply(0).error_message()).find("invalid numkeys")); + } + + { + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + const butil::StringPiece parts[] = { + "eval", "return 1", "2", "k1" + }; + ASSERT_TRUE(req.AddCommandByComponents(parts, sizeof(parts) / sizeof(parts[0]))); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(1, resp.reply_size()); + ASSERT_TRUE(resp.reply(0).is_error()); + ASSERT_NE(std::string::npos, + std::string(resp.reply(0).error_message()).find("not enough keys")); + } +} + +TEST_F(RedisClusterChannelTest, async_failure_propagation) { + brpc::RedisClusterChannel channel; + InitChannel(&channel, 1); + _meta->redirect_loop_key = FindKeyForNode(0); + + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("get %s", _meta->redirect_loop_key.c_str())); + + bthread::CountdownEvent event(1); + Done done(&event); + channel.CallMethod(NULL, &cntl, &req, &resp, &done); + event.wait(); + + ASSERT_TRUE(cntl.Failed()); + ASSERT_NE(std::string::npos, cntl.ErrorText().find("redirect")); +} + +TEST_F(RedisClusterChannelTest, max_redirect_zero_fails_on_single_redirect) { + brpc::RedisClusterChannel channel; + InitChannel(&channel, 0); + + const std::string key = FindKeyForNode(0); + _meta->owner_override[key] = 1; + { + BAIDU_SCOPED_LOCK(_node[1].mutex); + _node[1].kv[key] = "value-on-node1"; + } + + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("get %s", key.c_str())); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + + ASSERT_TRUE(cntl.Failed()); + ASSERT_NE(std::string::npos, cntl.ErrorText().find("redirect")); +} + +TEST_F(RedisClusterChannelTest, redirect_with_refresh_failure_still_returns_reply) { + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + const std::string key = FindKeyForNode(0); + _meta->owner_override[key] = 1; + { + BAIDU_SCOPED_LOCK(_node[1].mutex); + _node[1].kv[key] = "moved-value"; + } + + const int before_slots = _meta->slots_calls.load(std::memory_order_relaxed); + const int before_nodes = _meta->nodes_calls.load(std::memory_order_relaxed); + _meta->fail_slots = true; + _meta->fail_nodes = true; + + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("get %s", key.c_str())); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_TRUE(resp.reply(0).is_string()); + ASSERT_EQ("moved-value", resp.reply(0).data()); + + ASSERT_GT(_meta->slots_calls.load(std::memory_order_relaxed), before_slots); + ASSERT_GT(_meta->nodes_calls.load(std::memory_order_relaxed), before_nodes); +} + +TEST_F(RedisClusterChannelTest, pipeline_with_ask_and_moved_keeps_order) { + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + const std::string ask_key = FindKeyForNode(0); + std::string moved_key; + for (int i = 0; i < 200000; ++i) { + std::ostringstream oss; + oss << "moved_key_" << i; + if (OwnerBySlot(HashSlot(oss.str())) == 0 && oss.str() != ask_key) { + moved_key = oss.str(); + break; + } + } + ASSERT_FALSE(moved_key.empty()); + + _meta->enable_ask = true; + _meta->ask_from = 0; + _meta->ask_to = 1; + _meta->ask_key = ask_key; + _meta->owner_override[moved_key] = 1; + { + BAIDU_SCOPED_LOCK(_node[1].mutex); + _node[1].kv[ask_key] = "ask-value"; + _node[1].kv[moved_key] = "moved-value"; + } + + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("get %s", ask_key.c_str())); + ASSERT_TRUE(req.AddCommand("get %s", moved_key.c_str())); + ASSERT_TRUE(req.AddCommand("get %s", ask_key.c_str())); + + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(3, resp.reply_size()); + ASSERT_EQ("ask-value", resp.reply(0).data()); + ASSERT_EQ("moved-value", resp.reply(1).data()); + ASSERT_EQ("ask-value", resp.reply(2).data()); +} + +TEST_F(RedisClusterChannelTest, fallback_to_nodes_then_recover_to_slots) { + _meta->fail_slots = true; + + brpc::RedisClusterChannel channel; + InitChannel(&channel); + ASSERT_GT(_meta->nodes_calls.load(std::memory_order_relaxed), 0); + + _meta->fail_slots = false; + const int before_slots = _meta->slots_calls.load(std::memory_order_relaxed); + + const std::string key = FindKeyForNode(0); + _meta->owner_override[key] = 1; + { + BAIDU_SCOPED_LOCK(_node[1].mutex); + _node[1].kv[key] = "recover-value"; + } + + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("get %s", key.c_str())); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ("recover-value", resp.reply(0).data()); + ASSERT_GT(_meta->slots_calls.load(std::memory_order_relaxed), before_slots); +} + +TEST_F(RedisClusterChannelTest, cluster_slots_empty_host_uses_seed_host) { + _meta->slots_empty_host = true; + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + const std::string key = FindKeyForNode(1); + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("set %s host-fallback-value", key.c_str())); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ("OK", resp.reply(0).data()); +} + +TEST_F(RedisClusterChannelTest, init_accepts_whitespace_in_seed_list) { + brpc::RedisClusterChannel channel; + brpc::RedisClusterChannelOptions options; + options.enable_periodic_refresh = false; + const std::string seeds = " " + _meta->endpoint[0] + " , " + _meta->endpoint[1] + " "; + ASSERT_EQ(0, channel.Init(seeds, &options)); +} + +TEST_F(RedisClusterChannelTest, init_with_invalid_seed_tokens_should_fail) { + brpc::RedisClusterChannel channel; + ASSERT_NE(0, channel.Init(" , , ")); +} + +TEST_F(RedisClusterChannelTest, init_fails_when_cluster_topology_unavailable) { + _meta->fail_slots = true; + _meta->fail_nodes = true; + + brpc::RedisClusterChannel channel; + brpc::RedisClusterChannelOptions options; + options.enable_periodic_refresh = false; + ASSERT_NE(0, channel.Init(SeedList(), &options)); +} + +TEST_F(RedisClusterChannelTest, ping_without_key_uses_any_endpoint) { + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("ping")); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(1, resp.reply_size()); + ASSERT_TRUE(resp.reply(0).is_string()); + ASSERT_EQ("PONG", resp.reply(0).data()); +} + +TEST_F(RedisClusterChannelTest, wrong_argument_count_commands_return_error_reply) { + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("mget")); + ASSERT_TRUE(req.AddCommand("mset only_key")); + ASSERT_TRUE(req.AddCommand("del")); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(3, resp.reply_size()); + ASSERT_TRUE(resp.reply(0).is_error()); + ASSERT_TRUE(resp.reply(1).is_error()); + ASSERT_TRUE(resp.reply(2).is_error()); +} + +TEST_F(RedisClusterChannelTest, malformed_redirect_error_is_returned_directly) { + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + const std::string key = FindKeyForNode(0); + _meta->forced_error_by_key[key] = "MOVED not_a_slot bad_endpoint"; + + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("get %s", key.c_str())); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(1, resp.reply_size()); + ASSERT_TRUE(resp.reply(0).is_error()); + ASSERT_EQ("MOVED not_a_slot bad_endpoint", + std::string(resp.reply(0).error_message())); +} + +TEST_F(RedisClusterChannelTest, cluster_nodes_parser_ignores_migration_tokens) { + _meta->fail_slots = true; + std::ostringstream nodes; + nodes << "node0 " << _meta->endpoint[0] + << "@17000 master - 0 0 1 connected 0-" << kSplitSlot + << " [100->-node1]\n"; + nodes << "node1 " << _meta->endpoint[1] + << "@17001 master - 0 0 1 connected " << (kSplitSlot + 1) + << "-16383 [100-<-node0]\n"; + _meta->custom_nodes_payload = nodes.str(); + + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + const std::string key = FindKeyForNode(1); + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("set %s from-nodes", key.c_str())); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ("OK", resp.reply(0).data()); +} + +TEST_F(RedisClusterChannelTest, eval_numkeys_zero_routes_without_slot) { + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + const butil::StringPiece parts[] = { + "eval", "return 'ok'", "0", "arg1" + }; + ASSERT_TRUE(req.AddCommandByComponents(parts, sizeof(parts) / sizeof(parts[0]))); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(1, resp.reply_size()); + ASSERT_TRUE(resp.reply(0).is_string()); + ASSERT_EQ("OK", resp.reply(0).data()); +} + +TEST_F(RedisClusterChannelTest, mset_stops_after_subcommand_error) { + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + std::vector keys0 = FindKeysForNode(0, 2); + ASSERT_EQ(2u, keys0.size()); + const std::string key_ok = keys0[0]; + const std::string key_tail = keys0[1]; + const std::string key_err = FindKeyForNode(1); + ASSERT_NE(key_ok, key_err); + ASSERT_NE(key_tail, key_err); + + _meta->forced_error_by_key[key_err] = "ERR injected set failure"; + + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("mset %s v0 %s v1 %s v2", + key_ok.c_str(), + key_err.c_str(), + key_tail.c_str())); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(1, resp.reply_size()); + ASSERT_TRUE(resp.reply(0).is_error()); + ASSERT_NE(std::string::npos, + std::string(resp.reply(0).error_message()).find("injected set failure")); + + { + brpc::RedisRequest get_req; + brpc::RedisResponse get_resp; + brpc::Controller get_cntl; + ASSERT_TRUE(get_req.AddCommand("get %s", key_ok.c_str())); + channel.CallMethod(NULL, &get_cntl, &get_req, &get_resp, NULL); + ASSERT_FALSE(get_cntl.Failed()) << get_cntl.ErrorText(); + ASSERT_TRUE(get_resp.reply(0).is_string()); + ASSERT_EQ("v0", get_resp.reply(0).data()); + } + { + brpc::RedisRequest get_req; + brpc::RedisResponse get_resp; + brpc::Controller get_cntl; + ASSERT_TRUE(get_req.AddCommand("get %s", key_tail.c_str())); + channel.CallMethod(NULL, &get_cntl, &get_req, &get_resp, NULL); + ASSERT_FALSE(get_cntl.Failed()) << get_cntl.ErrorText(); + ASSERT_TRUE(get_resp.reply(0).is_nil()); + } +} + +TEST_F(RedisClusterChannelTest, integer_aggregate_stops_after_subcommand_error) { + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + const std::string key0 = FindKeyForNode(0); + std::vector keys1 = FindKeysForNode(1, 2); + ASSERT_EQ(2u, keys1.size()); + const std::string key_err = keys1[0]; + const std::string key_tail = keys1[1]; + + { + BAIDU_SCOPED_LOCK(_node[0].mutex); + _node[0].kv[key0] = "v0"; + } + { + BAIDU_SCOPED_LOCK(_node[1].mutex); + _node[1].kv[key_err] = "verr"; + _node[1].kv[key_tail] = "vtail"; + } + _meta->forced_error_by_key[key_err] = "ERR injected unlink failure"; + + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("unlink %s %s %s", + key0.c_str(), key_err.c_str(), key_tail.c_str())); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(1, resp.reply_size()); + ASSERT_TRUE(resp.reply(0).is_error()); + ASSERT_NE(std::string::npos, + std::string(resp.reply(0).error_message()).find("injected unlink failure")); + + brpc::RedisRequest get_req; + brpc::RedisResponse get_resp; + brpc::Controller get_cntl; + ASSERT_TRUE(get_req.AddCommand("get %s", key_tail.c_str())); + channel.CallMethod(NULL, &get_cntl, &get_req, &get_resp, NULL); + ASSERT_FALSE(get_cntl.Failed()) << get_cntl.ErrorText(); + ASSERT_TRUE(get_resp.reply(0).is_string()); + ASSERT_EQ("vtail", get_resp.reply(0).data()); +} + +TEST_F(RedisClusterChannelTest, async_concurrent_calls_with_mixed_redirections) { + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + std::vector keys0 = FindKeysForNode(0, 2); + ASSERT_EQ(2u, keys0.size()); + const std::string ask_key = keys0[0]; + const std::string moved_key = keys0[1]; + const std::string normal_key = FindKeyForNode(1); + + _meta->enable_ask = true; + _meta->ask_from = 0; + _meta->ask_to = 1; + _meta->ask_key = ask_key; + _meta->owner_override[moved_key] = 1; + { + BAIDU_SCOPED_LOCK(_node[1].mutex); + _node[1].kv[ask_key] = "ask-v"; + _node[1].kv[moved_key] = "moved-v"; + _node[1].kv[normal_key] = "normal-v"; + } + + const int req_count = 60; + bthread::CountdownEvent event(req_count); + std::vector > requests(req_count); + std::vector > responses(req_count); + std::vector > controllers(req_count); + std::vector > dones(req_count); + std::vector expected(req_count); + + for (int i = 0; i < req_count; ++i) { + requests[i].reset(new brpc::RedisRequest); + responses[i].reset(new brpc::RedisResponse); + controllers[i].reset(new brpc::Controller); + dones[i].reset(new Done(&event)); + + std::string key; + if (i % 3 == 0) { + key = ask_key; + expected[i] = "ask-v"; + } else if (i % 3 == 1) { + key = moved_key; + expected[i] = "moved-v"; + } else { + key = normal_key; + expected[i] = "normal-v"; + } + ASSERT_TRUE(requests[i]->AddCommand("get %s", key.c_str())); + channel.CallMethod(NULL, + controllers[i].get(), + requests[i].get(), + responses[i].get(), + dones[i].get()); + } + + event.wait(); + + for (int i = 0; i < req_count; ++i) { + ASSERT_FALSE(controllers[i]->Failed()) << controllers[i]->ErrorText(); + ASSERT_EQ(1, responses[i]->reply_size()); + ASSERT_TRUE(responses[i]->reply(0).is_string()); + ASSERT_EQ(expected[i], responses[i]->reply(0).data()); + } + ASSERT_GT(_meta->ask_error_calls.load(std::memory_order_relaxed), 0); + ASSERT_GT(_meta->moved_error_calls.load(std::memory_order_relaxed), 0); +} + +TEST_F(RedisClusterChannelTest, hashtag_keys_route_for_multi_key_commands) { + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + const std::string tag = FindHashTagForNode(1); + const std::string key0 = "k0{" + tag + "}suffix"; + const std::string key1 = "k1{" + tag + "}suffix"; + + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("mset %s v0 %s v1", key0.c_str(), key1.c_str())); + ASSERT_TRUE(req.AddCommand("mget %s %s", key0.c_str(), key1.c_str())); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(2, resp.reply_size()); + ASSERT_TRUE(resp.reply(0).is_string()); + ASSERT_EQ("OK", resp.reply(0).data()); + ASSERT_TRUE(resp.reply(1).is_array()); + ASSERT_EQ("v0", resp.reply(1)[0].data()); + ASSERT_EQ("v1", resp.reply(1)[1].data()); + ASSERT_EQ(0, _meta->moved_error_calls.load(std::memory_order_relaxed)); +} + +TEST_F(RedisClusterChannelTest, missing_key_get_returns_nil_reply) { + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + const std::string key = FindKeyForNode(0); + + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("get %s", key.c_str())); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(1, resp.reply_size()); + ASSERT_TRUE(resp.reply(0).is_nil()); +} + +TEST_F(RedisClusterChannelTest, pipeline_with_string_nil_error_and_string) { + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + const std::string key_ok = FindKeyForNode(1); + const std::string key_nil = FindKeyForNode(0); + std::string key_err; + for (int i = 0; i < 200000; ++i) { + std::ostringstream oss; + oss << "err_key_" << i; + if (OwnerBySlot(HashSlot(oss.str())) == 1 && oss.str() != key_ok) { + key_err = oss.str(); + break; + } + } + ASSERT_FALSE(key_err.empty()); + + { + BAIDU_SCOPED_LOCK(_node[1].mutex); + _node[1].kv[key_ok] = "ok-value"; + } + _meta->forced_error_by_key[key_err] = "ERR injected pipeline error"; + + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("get %s", key_ok.c_str())); + ASSERT_TRUE(req.AddCommand("get %s", key_nil.c_str())); + ASSERT_TRUE(req.AddCommand("get %s", key_err.c_str())); + ASSERT_TRUE(req.AddCommand("get %s", key_ok.c_str())); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(4, resp.reply_size()); + ASSERT_TRUE(resp.reply(0).is_string()); + ASSERT_EQ("ok-value", resp.reply(0).data()); + ASSERT_TRUE(resp.reply(1).is_nil()); + ASSERT_TRUE(resp.reply(2).is_error()); + ASSERT_NE(std::string::npos, + std::string(resp.reply(2).error_message()).find("injected pipeline error")); + ASSERT_TRUE(resp.reply(3).is_string()); + ASSERT_EQ("ok-value", resp.reply(3).data()); +} + +TEST_F(RedisClusterChannelTest, empty_request_should_fail) { + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + ASSERT_TRUE(cntl.Failed()); + ASSERT_NE(std::string::npos, cntl.ErrorText().find("no redis command")); +} + +TEST_F(RedisClusterChannelTest, pipeline_continues_after_command_error_reply) { + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + const std::string key_ok = FindKeyForNode(1); + const std::string key_err = FindKeyForNode(0); + _meta->forced_error_by_key[key_err] = "ERR injected get failure"; + { + BAIDU_SCOPED_LOCK(_node[1].mutex); + _node[1].kv[key_ok] = "ok-value"; + } + + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("get %s", key_err.c_str())); + ASSERT_TRUE(req.AddCommand("get %s", key_ok.c_str())); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(2, resp.reply_size()); + ASSERT_TRUE(resp.reply(0).is_error()); + ASSERT_NE(std::string::npos, + std::string(resp.reply(0).error_message()).find("injected get failure")); + ASSERT_TRUE(resp.reply(1).is_string()); + ASSERT_EQ("ok-value", resp.reply(1).data()); +} + +TEST_F(RedisClusterChannelTest, redirect_updates_slot_cache_even_when_refresh_fails) { + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + const std::string key = FindKeyForNode(0); + _meta->owner_override[key] = 1; + { + BAIDU_SCOPED_LOCK(_node[1].mutex); + _node[1].kv[key] = "value-on-node1"; + } + _meta->fail_slots = true; + _meta->fail_nodes = true; + + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("get %s", key.c_str())); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ("value-on-node1", resp.reply(0).data()); + + brpc::RedisRequest req2; + brpc::RedisResponse resp2; + brpc::Controller cntl2; + ASSERT_TRUE(req2.AddCommand("get %s", key.c_str())); + channel.CallMethod(NULL, &cntl2, &req2, &resp2, NULL); + ASSERT_FALSE(cntl2.Failed()) << cntl2.ErrorText(); + ASSERT_EQ("value-on-node1", resp2.reply(0).data()); + + ASSERT_EQ(1, _meta->moved_error_calls.load(std::memory_order_relaxed)); +} + +TEST_F(RedisClusterChannelTest, periodic_refresh_fallbacks_to_nodes_when_slots_fail) { + brpc::RedisClusterChannel channel; + brpc::RedisClusterChannelOptions options; + options.enable_periodic_refresh = true; + options.refresh_interval_s = 1; + options.max_redirect = 5; + ASSERT_EQ(0, channel.Init(SeedList(), &options)); + + _meta->fail_slots = true; + const int before_nodes = _meta->nodes_calls.load(std::memory_order_relaxed); + bool nodes_used = false; + for (int i = 0; i < 30; ++i) { + if (_meta->nodes_calls.load(std::memory_order_relaxed) > before_nodes) { + nodes_used = true; + break; + } + bthread_usleep(100000); + } + ASSERT_TRUE(nodes_used); +} + +TEST_F(RedisClusterChannelTest, async_pipeline_mixed_commands) { + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + const std::string key0 = FindKeyForNode(0); + const std::string key1 = FindKeyForNode(1); + + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("set %s p0", key0.c_str())); + ASSERT_TRUE(req.AddCommand("set %s p1", key1.c_str())); + ASSERT_TRUE(req.AddCommand("mget %s %s", key0.c_str(), key1.c_str())); + ASSERT_TRUE(req.AddCommand("del %s %s", key0.c_str(), key1.c_str())); + + bthread::CountdownEvent event(1); + Done done(&event); + channel.CallMethod(NULL, &cntl, &req, &resp, &done); + event.wait(); + + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(4, resp.reply_size()); + ASSERT_EQ("OK", resp.reply(0).data()); + ASSERT_EQ("OK", resp.reply(1).data()); + ASSERT_TRUE(resp.reply(2).is_array()); + ASSERT_EQ("p0", resp.reply(2)[0].data()); + ASSERT_EQ("p1", resp.reply(2)[1].data()); + ASSERT_TRUE(resp.reply(3).is_integer()); + ASSERT_EQ(2, resp.reply(3).integer()); +} + +TEST_F(RedisClusterChannelTest, periodic_refresh_updates_topology_in_background) { + brpc::RedisClusterChannel channel; + brpc::RedisClusterChannelOptions options; + options.enable_periodic_refresh = true; + options.refresh_interval_s = 1; + options.max_redirect = 5; + ASSERT_EQ(0, channel.Init(SeedList(), &options)); + + const int initial_slots_calls = _meta->slots_calls.load(std::memory_order_relaxed); + bool refreshed = false; + for (int i = 0; i < 30; ++i) { + if (_meta->slots_calls.load(std::memory_order_relaxed) > initial_slots_calls) { + refreshed = true; + break; + } + bthread_usleep(100000); + } + ASSERT_TRUE(refreshed); +} + +TEST_F(RedisClusterChannelTest, periodic_refresh_thread_stops_quickly_on_destroy) { + typedef std::chrono::steady_clock Clock; + const Clock::time_point begin = Clock::now(); + { + brpc::RedisClusterChannel channel; + brpc::RedisClusterChannelOptions options; + options.enable_periodic_refresh = true; + options.refresh_interval_s = 30; + ASSERT_EQ(0, channel.Init(SeedList(), &options)); + } + const Clock::time_point end = Clock::now(); + const int64_t elapsed_ms = + std::chrono::duration_cast(end - begin).count(); + ASSERT_LT(elapsed_ms, 2000); +} + +TEST_F(RedisClusterChannelTest, init_with_empty_seed_should_fail) { + brpc::RedisClusterChannel channel; + ASSERT_NE(0, channel.Init("")); +} + +} // namespace From 02295bf81a9e286d2608a6676ef40b0f1a73abd0 Mon Sep 17 00:00:00 2001 From: wayslog Date: Mon, 23 Feb 2026 13:13:31 +0800 Subject: [PATCH 34/84] chore(ci): trigger github actions rerun for PR #3222 From 2a6e52f2d993af021961d05b04a2720d8dce99e0 Mon Sep 17 00:00:00 2001 From: wayslog Date: Mon, 23 Feb 2026 13:43:51 +0800 Subject: [PATCH 35/84] fix(redis): do not cache ASK redirection in slot table --- src/brpc/redis_cluster.cpp | 6 +++++- test/brpc_redis_cluster_unittest.cpp | 26 ++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/brpc/redis_cluster.cpp b/src/brpc/redis_cluster.cpp index d532e80c2f..bec2cd4295 100644 --- a/src/brpc/redis_cluster.cpp +++ b/src/brpc/redis_cluster.cpp @@ -431,7 +431,11 @@ bool RedisClusterChannel::ExecuteSingleCommand(const std::vector& a next_endpoint = redirect.endpoint; GetOrCreateChannel(next_endpoint); } - if (redirect.slot >= 0 && static_cast(redirect.slot) < _slot_to_endpoint.size() && + // ASK is a temporary redirection during slot migration and should not + // overwrite the stable slot map. Only persist MOVED target. + if (!redirect.asking && + redirect.slot >= 0 && + static_cast(redirect.slot) < _slot_to_endpoint.size() && !redirect.endpoint.empty()) { BAIDU_SCOPED_LOCK(_mutex); _slot_to_endpoint[redirect.slot] = redirect.endpoint; diff --git a/test/brpc_redis_cluster_unittest.cpp b/test/brpc_redis_cluster_unittest.cpp index 7a639b1bd8..20a1ff759b 100644 --- a/test/brpc_redis_cluster_unittest.cpp +++ b/test/brpc_redis_cluster_unittest.cpp @@ -581,6 +581,32 @@ TEST_F(RedisClusterChannelTest, ask_redirection) { ASSERT_EQ("ask-value", resp.reply(0).data()); } +TEST_F(RedisClusterChannelTest, ask_redirection_does_not_override_slot_cache) { + brpc::RedisClusterChannel channel; + InitChannel(&channel); + + _meta->enable_ask = true; + _meta->ask_from = 0; + _meta->ask_to = 1; + _meta->ask_key = FindKeyForNode(0); + { + BAIDU_SCOPED_LOCK(_node[1].mutex); + _node[1].kv[_meta->ask_key] = "ask-value"; + } + + for (int i = 0; i < 5; ++i) { + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("get %s", _meta->ask_key.c_str())); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(1, resp.reply_size()); + ASSERT_TRUE(resp.reply(0).is_string()); + ASSERT_EQ("ask-value", resp.reply(0).data()); + } +} + TEST_F(RedisClusterChannelTest, cluster_nodes_fallback) { _meta->fail_slots = true; brpc::RedisClusterChannel channel; From 4aa276fcd4a3e9b1d79a3d6b82bc32570173fcf2 Mon Sep 17 00:00:00 2001 From: wayslog Date: Fri, 27 Feb 2026 14:07:46 +0800 Subject: [PATCH 36/84] perf(redis): protect slot cache with DoublyBufferedData Use DoublyBufferedData for RedisClusterChannel slot->endpoint map to reduce lock contention in hot path, and add a unit test that verifies slot map updates on topology change. --- src/brpc/redis_cluster.cpp | 49 +++++++++--- src/brpc/redis_cluster.h | 3 +- test/brpc_redis_cluster_unittest.cpp | 113 ++++++++++++++++++++++++++- 3 files changed, 151 insertions(+), 14 deletions(-) diff --git a/src/brpc/redis_cluster.cpp b/src/brpc/redis_cluster.cpp index bec2cd4295..f2531c95ac 100644 --- a/src/brpc/redis_cluster.cpp +++ b/src/brpc/redis_cluster.cpp @@ -166,7 +166,10 @@ RedisClusterChannel::RedisClusterChannel() : _stop_refresh(false) , _refresh_started(false) , _refresh_tid(0) { - _slot_to_endpoint.resize(kRedisClusterSlotCount); + _db_slot_to_endpoint.Modify([](std::vector& bg) -> size_t { + bg.assign(kRedisClusterSlotCount, std::string()); + return 1; + }); } RedisClusterChannel::~RedisClusterChannel() { @@ -435,10 +438,18 @@ bool RedisClusterChannel::ExecuteSingleCommand(const std::vector& a // overwrite the stable slot map. Only persist MOVED target. if (!redirect.asking && redirect.slot >= 0 && - static_cast(redirect.slot) < _slot_to_endpoint.size() && + redirect.slot < static_cast(kRedisClusterSlotCount) && !redirect.endpoint.empty()) { - BAIDU_SCOPED_LOCK(_mutex); - _slot_to_endpoint[redirect.slot] = redirect.endpoint; + _db_slot_to_endpoint.Modify( + [](std::vector& bg, int slot, + const std::string& endpoint) -> size_t { + if (bg[slot] == endpoint) { + return 0; + } + bg[slot] = endpoint; + return 1; + }, + redirect.slot, redirect.endpoint); } if (!redirect.asking) { @@ -598,11 +609,15 @@ bool RedisClusterChannel::PickEndpointForKey(const std::string& key, std::string* endpoint, int* slot) const { const int key_slot = HashSlot(key); - BAIDU_SCOPED_LOCK(_mutex); - if (key_slot < 0 || static_cast(key_slot) >= _slot_to_endpoint.size()) { + if (key_slot < 0 || key_slot >= static_cast(kRedisClusterSlotCount)) { + return false; + } + butil::DoublyBufferedData >::ScopedPtr s; + if (_db_slot_to_endpoint.Read(&s) != 0 || + static_cast(key_slot) >= s->size()) { return false; } - const std::string& mapped = _slot_to_endpoint[key_slot]; + const std::string& mapped = (*s)[key_slot]; if (mapped.empty()) { return false; } @@ -935,8 +950,15 @@ void RedisClusterChannel::ApplyTopology( GetOrCreateChannel(*it); } + _db_slot_to_endpoint.Modify( + [](std::vector& bg, + const std::vector& src) -> size_t { + bg = src; + return 1; + }, + slot_to_endpoint); + BAIDU_SCOPED_LOCK(_mutex); - _slot_to_endpoint = slot_to_endpoint; for (std::set::const_iterator it = unique_eps.begin(); it != unique_eps.end(); ++it) { if (std::find(_seed_endpoints.begin(), _seed_endpoints.end(), *it) == @@ -1182,10 +1204,13 @@ int RedisClusterChannel::CheckHealth() { int RedisClusterChannel::Weight() { std::set unique; - BAIDU_SCOPED_LOCK(_mutex); - for (size_t i = 0; i < _slot_to_endpoint.size(); ++i) { - if (!_slot_to_endpoint[i].empty()) { - unique.insert(_slot_to_endpoint[i]); + butil::DoublyBufferedData >::ScopedPtr s; + if (_db_slot_to_endpoint.Read(&s) != 0) { + return 0; + } + for (size_t i = 0; i < s->size(); ++i) { + if (!(*s)[i].empty()) { + unique.insert((*s)[i]); } } return static_cast(unique.size()); diff --git a/src/brpc/redis_cluster.h b/src/brpc/redis_cluster.h index 47d399bc46..1250a8e386 100644 --- a/src/brpc/redis_cluster.h +++ b/src/brpc/redis_cluster.h @@ -27,6 +27,7 @@ #include #include "bthread/bthread.h" +#include "butil/containers/doubly_buffered_data.h" #include "butil/synchronization/lock.h" #include "brpc/channel.h" #include "brpc/channel_base.h" @@ -169,7 +170,7 @@ class RedisClusterChannel : public ChannelBase { RedisClusterChannelOptions _options; mutable butil::Mutex _mutex; - std::vector _slot_to_endpoint; + mutable butil::DoublyBufferedData > _db_slot_to_endpoint; std::vector _seed_endpoints; std::unordered_map > _channels; diff --git a/test/brpc_redis_cluster_unittest.cpp b/test/brpc_redis_cluster_unittest.cpp index 20a1ff759b..3047159cca 100644 --- a/test/brpc_redis_cluster_unittest.cpp +++ b/test/brpc_redis_cluster_unittest.cpp @@ -101,6 +101,9 @@ struct ClusterMeta { bool fail_slots; bool fail_nodes; bool slots_empty_host; + std::atomic slots_override_slot; + std::atomic slots_override_owner; + std::atomic accept_requests_on_wrong_owner; std::unordered_map owner_override; std::unordered_map forced_error_by_key; std::atomic slots_calls; @@ -120,6 +123,9 @@ struct ClusterMeta { : fail_slots(false) , fail_nodes(false) , slots_empty_host(false) + , slots_override_slot(-1) + , slots_override_owner(-1) + , accept_requests_on_wrong_owner(false) , slots_calls(0) , nodes_calls(0) , moved_error_calls(0) @@ -207,6 +213,51 @@ class ClusterCommandHandler : public brpc::RedisCommandHandler { output->SetError("ERR cluster slots disabled for test"); return brpc::REDIS_CMD_HANDLED; } + + const int override_slot = _meta->slots_override_slot.load(std::memory_order_relaxed); + const int override_owner = _meta->slots_override_owner.load(std::memory_order_relaxed); + const int default_owner = (override_slot >= 0 && override_slot <= 16383) + ? OwnerBySlot(override_slot) + : -1; + if (default_owner != -1 && + (override_owner == 0 || override_owner == 1) && + override_owner != default_owner) { + struct SlotRange { + int start; + int end; + int owner; + SlotRange(int s, int e, int o) : start(s), end(e), owner(o) {} + }; + + std::vector ranges; + if (override_slot <= kSplitSlot) { + if (override_slot > 0) { + ranges.push_back(SlotRange(0, override_slot - 1, 0)); + } + ranges.push_back(SlotRange(override_slot, override_slot, override_owner)); + if (override_slot < kSplitSlot) { + ranges.push_back(SlotRange(override_slot + 1, kSplitSlot, 0)); + } + ranges.push_back(SlotRange(kSplitSlot + 1, 16383, 1)); + } else { + ranges.push_back(SlotRange(0, kSplitSlot, 0)); + if (override_slot > kSplitSlot + 1) { + ranges.push_back(SlotRange(kSplitSlot + 1, override_slot - 1, 1)); + } + ranges.push_back(SlotRange(override_slot, override_slot, override_owner)); + if (override_slot < 16383) { + ranges.push_back(SlotRange(override_slot + 1, 16383, 1)); + } + } + + output->SetArray(ranges.size()); + for (size_t i = 0; i < ranges.size(); ++i) { + FillSlotEntry((*output)[i], ranges[i].start, ranges[i].end, + _meta->endpoint[ranges[i].owner], _meta->slots_empty_host); + } + return brpc::REDIS_CMD_HANDLED; + } + output->SetArray(2); FillSlotEntry((*output)[0], 0, kSplitSlot, _meta->endpoint[0], _meta->slots_empty_host); @@ -324,7 +375,9 @@ class KVCommandHandler : public brpc::RedisCommandHandler { } } - if (!bypass_owner_check && owner != _data->node_id) { + const bool enforce_owner = + !_data->meta->accept_requests_on_wrong_owner.load(std::memory_order_relaxed); + if (!bypass_owner_check && enforce_owner && owner != _data->node_id) { _data->meta->moved_error_calls.fetch_add(1, std::memory_order_relaxed); output->FormatError("MOVED %d %s", slot, _data->meta->endpoint[owner].c_str()); return brpc::REDIS_CMD_HANDLED; @@ -1406,6 +1459,64 @@ TEST_F(RedisClusterChannelTest, periodic_refresh_fallbacks_to_nodes_when_slots_f ASSERT_TRUE(nodes_used); } +TEST_F(RedisClusterChannelTest, periodic_refresh_updates_slot_cache_on_topology_change) { + brpc::RedisClusterChannel channel; + brpc::RedisClusterChannelOptions options; + options.enable_periodic_refresh = true; + options.refresh_interval_s = 1; + options.max_redirect = 5; + ASSERT_EQ(0, channel.Init(SeedList(), &options)); + + _meta->accept_requests_on_wrong_owner.store(true, std::memory_order_relaxed); + + const std::string key = FindKeyForNode(0); + const int slot = static_cast(HashSlot(key)); + const std::string value_by_owner[2] = {"value-on-node0", "value-on-node1"}; + { + BAIDU_SCOPED_LOCK(_node[0].mutex); + _node[0].kv[key] = value_by_owner[0]; + } + { + BAIDU_SCOPED_LOCK(_node[1].mutex); + _node[1].kv[key] = value_by_owner[1]; + } + + brpc::RedisRequest req; + brpc::RedisResponse resp; + brpc::Controller cntl; + ASSERT_TRUE(req.AddCommand("get %s", key.c_str())); + channel.CallMethod(NULL, &cntl, &req, &resp, NULL); + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + ASSERT_EQ(1, resp.reply_size()); + ASSERT_TRUE(resp.reply(0).is_string()); + ASSERT_EQ(value_by_owner[0], resp.reply(0).data()); + + const int before_slots = _meta->slots_calls.load(std::memory_order_relaxed); + const int target_owner = 1 - OwnerBySlot(slot); + _meta->slots_override_slot.store(slot, std::memory_order_relaxed); + _meta->slots_override_owner.store(target_owner, std::memory_order_relaxed); + + bool updated = false; + for (int i = 0; i < 50; ++i) { + brpc::RedisRequest req2; + brpc::RedisResponse resp2; + brpc::Controller cntl2; + ASSERT_TRUE(req2.AddCommand("get %s", key.c_str())); + channel.CallMethod(NULL, &cntl2, &req2, &resp2, NULL); + ASSERT_FALSE(cntl2.Failed()) << cntl2.ErrorText(); + ASSERT_EQ(1, resp2.reply_size()); + ASSERT_TRUE(resp2.reply(0).is_string()); + if (resp2.reply(0).data() == value_by_owner[target_owner]) { + updated = true; + break; + } + bthread_usleep(100000); + } + ASSERT_TRUE(updated); + ASSERT_GT(_meta->slots_calls.load(std::memory_order_relaxed), before_slots); + ASSERT_EQ(0, _meta->moved_error_calls.load(std::memory_order_relaxed)); +} + TEST_F(RedisClusterChannelTest, async_pipeline_mixed_commands) { brpc::RedisClusterChannel channel; InitChannel(&channel); From f804ef2f2c90c4e4688dce2db2a077a69f5e993a Mon Sep 17 00:00:00 2001 From: MalikHou Date: Fri, 6 Mar 2026 20:26:15 +0800 Subject: [PATCH 37/84] add tcp transport event dispatcher unsched flag & fix RDMA event dispatcher unsched flag --- docs/cn/rdma.md | 22 ++++++++++++++++++++-- docs/en/rdma.md | 20 +++++++++++++++++++- src/brpc/event_dispatcher.cpp | 19 +++++++++++++++++++ src/brpc/event_dispatcher.h | 8 ++++++++ src/brpc/rdma/rdma_endpoint.cpp | 1 - src/brpc/rdma/rdma_endpoint.h | 1 - src/brpc/rdma_transport.cpp | 9 +++++---- src/brpc/tcp_transport.cpp | 11 +++++++++-- 8 files changed, 80 insertions(+), 11 deletions(-) diff --git a/docs/cn/rdma.md b/docs/cn/rdma.md index e775459893..3106f28caf 100644 --- a/docs/cn/rdma.md +++ b/docs/cn/rdma.md @@ -47,7 +47,24 @@ RDMA要求数据收发所使用的内存空间必须被注册(memory register RDMA是硬件相关的通信技术,有很多独特的概念,比如device、port、GID、LID、MaxSge等。这些参数在初始化时会从对应的网卡中读取出来,并且做出默认的选择(参见src/brpc/rdma/rdma_helper.cpp)。有时默认的选择并非用户的期望,则可以通过flag参数方式指定。 -RDMA支持事件驱动和轮询两种模式,默认是事件驱动模式,通过设置rdma_use_polling可以开启轮询模式。轮询模式下还可以设置轮询器数目(rdma_poller_num),以及是否主动放弃CPU(rdma_poller_yield)。轮询模式下还可以设置一个回调函数,在每次轮询时调用,可以配合io_uring/spdk等使用。在配合使用spdk等驱动的时候,因为spdk只支持轮询模式,并且只能在单线程使用(或者叫Run To Completion模式上使用)执行一个任务过程中不允许被调度到别的线程上,所以这时候需要设置(rdma_edisp_unsched)为true,使事件驱动程序一直占用一个worker线程,不能调度别的任务。 +RDMA支持事件驱动和轮询两种模式,默认是事件驱动模式,通过设置rdma_use_polling可以开启轮询模式。轮询模式下还可以设置轮询器数目(rdma_poller_num),以及是否主动放弃CPU(rdma_poller_yield)。轮询模式下还可以设置一个回调函数,在每次轮询时调用,可以配合io_uring/spdk等使用。 + +`event_dispatcher_edisp_unsched` 是全局开关,同时影响普通模式(TCP)和RDMA模式的EventDispatcher调度行为。兼容历史配置,`rdma_edisp_unsched` 仍保留,但已标记为废弃,未来版本会移除。 + +最终生效条件统一为: +`event_dispatcher_edisp_unsched || rdma_edisp_unsched` + +启动时不会再改写用户传入的 flag,运行时严格按用户配置值生效。 + +推荐使用方式: +1. 新部署:只配置 `event_dispatcher_edisp_unsched`。 +2. 存量部署:`rdma_edisp_unsched` 仅作过渡兼容,逐步迁移到 `event_dispatcher_edisp_unsched`。 +3. 避免脚本中给出“冲突值”;在统一 OR 语义下,只要任一 flag 为 `true`,EventDispatcher 就不可调度。 + +行为示例: +1. 仅设置 `-rdma_edisp_unsched=true`:`rdma_edisp_unsched=true`、`event_dispatcher_edisp_unsched=false`;TCP和RDMA均不可调度。 +2. 仅设置 `-event_dispatcher_edisp_unsched=true`:两个flag同为`true`;TCP和RDMA均不可调度。 +3. 同时设置 `-rdma_edisp_unsched=true -event_dispatcher_edisp_unsched=false`:`rdma_edisp_unsched=true`、`event_dispatcher_edisp_unsched=false`;TCP和RDMA均不可调度。 # 参数 @@ -73,5 +90,6 @@ RDMA支持事件驱动和轮询两种模式,默认是事件驱动模式,通 * rdma_use_polling: 是否使用RDMA的轮询模式,默认false。 * rdma_poller_num: 轮询模式下的poller数目,默认1。 * rdma_poller_yield: 轮询模式下的poller是否主动放弃CPU,默认是false。 -* rdma_edisp_unsched: 让事件驱动器不可以被调度,默认是false。 +* event_dispatcher_edisp_unsched: 全局开关,控制EventDispatcher是否不可被调度(true时不可调度),默认是false。 +* rdma_edisp_unsched: 废弃兼容参数(未来版本计划移除)。当前仍参与统一生效判断,默认是false。 * rdma_disable_bthread: 禁用bthread,默认是false。 diff --git a/docs/en/rdma.md b/docs/en/rdma.md index 99f1ecd781..2781c2d550 100644 --- a/docs/en/rdma.md +++ b/docs/en/rdma.md @@ -47,6 +47,23 @@ The application can manage memory by itself and send data with IOBuf::append_use RDMA is hardware-related. It has some different concepts such as device, port, GID, LID, MaxSge and so on. These parameters can be read from NICs at initialization, and brpc will make the default choice (see src/brpc/rdma/rdma_helper.cpp). Sometimes the default choice is not the expectation, then it can be changed in the flag way. +`event_dispatcher_edisp_unsched` is a global flag and affects EventDispatcher scheduling in both normal mode (TCP) and RDMA mode. For backward compatibility, `rdma_edisp_unsched` is still kept, but it is deprecated and will be removed in a future release. + +The effective unsched condition is unified as: +`event_dispatcher_edisp_unsched || rdma_edisp_unsched` + +No startup synchronization rewrites user flags. Runtime behavior is determined directly from user-provided values. + +Recommended usage: +1. New deployment: set only `event_dispatcher_edisp_unsched`. +2. Existing deployment: keep `rdma_edisp_unsched` temporarily, but migrate to `event_dispatcher_edisp_unsched`. +3. Avoid conflicting values in scripts; with unified OR semantics, either flag being `true` makes EventDispatcher unschedulable. + +Examples: +1. Only `-rdma_edisp_unsched=true`: `rdma_edisp_unsched=true`, `event_dispatcher_edisp_unsched=false`; both TCP and RDMA are unschedulable. +2. Only `-event_dispatcher_edisp_unsched=true`: both flags are `true`; both TCP and RDMA are unschedulable. +3. Both `-rdma_edisp_unsched=true -event_dispatcher_edisp_unsched=false`: `rdma_edisp_unsched=true`, `event_dispatcher_edisp_unsched=false`; both TCP and RDMA are unschedulable. + # Parameters Configurable parameters: @@ -71,5 +88,6 @@ Configurable parameters: * rdma_use_polling: Whether to use RDMA polling mode, default is false. * rdma_poller_num: The number of pollers in polling mode, default is 1. * rdma_poller_yield: Whether pollers in polling mode voluntarily relinquish the CPU, default is false. -* rdma_edisp_unsched`: Prevents the event driver from being scheduled, default is false. +* event_dispatcher_edisp_unsched: Global switch for EventDispatcher scheduling (true means unschedulable), default is false. +* rdma_edisp_unsched: Deprecated compatibility flag (planned removal in a future release). It still participates in unified unsched condition, default is false. * rdma_disable_bthread: Disables bthread, default is false. diff --git a/src/brpc/event_dispatcher.cpp b/src/brpc/event_dispatcher.cpp index bbd946846f..a4f253bcb6 100644 --- a/src/brpc/event_dispatcher.cpp +++ b/src/brpc/event_dispatcher.cpp @@ -30,6 +30,16 @@ DECLARE_int32(task_group_ntags); namespace brpc { DEFINE_int32(event_dispatcher_num, 1, "Number of event dispatcher"); +DEFINE_bool(event_dispatcher_edisp_unsched, false, + "Disable event dispatcher schedule"); + +#if BRPC_WITH_RDMA +namespace rdma { +DEFINE_bool(rdma_edisp_unsched, false, + "Deprecated and will be removed in a future release, " + "use event_dispatcher_edisp_unsched instead"); +} // namespace rdma +#endif DEFINE_bool(usercode_in_pthread, false, "Call user's callback in pthreads, use bthreads otherwise"); @@ -41,6 +51,15 @@ static bvar::LatencyRecorder* g_edisp_read_lantency = NULL; static bvar::LatencyRecorder* g_edisp_write_lantency = NULL; static pthread_once_t g_edisp_once = PTHREAD_ONCE_INIT; +bool EventDispatcherUnsched() { +#if BRPC_WITH_RDMA + return FLAGS_event_dispatcher_edisp_unsched || + rdma::FLAGS_rdma_edisp_unsched; +#else + return FLAGS_event_dispatcher_edisp_unsched; +#endif +} + static void StopAndJoinGlobalDispatchers() { for (int i = 0; i < FLAGS_task_group_ntags; ++i) { for (int j = 0; j < FLAGS_event_dispatcher_num; ++j) { diff --git a/src/brpc/event_dispatcher.h b/src/brpc/event_dispatcher.h index fd91d3c53f..f4f60e67ea 100644 --- a/src/brpc/event_dispatcher.h +++ b/src/brpc/event_dispatcher.h @@ -19,6 +19,7 @@ #ifndef BRPC_EVENT_DISPATCHER_H #define BRPC_EVENT_DISPATCHER_H +#include // DECLARE_bool #include "butil/macros.h" // DISALLOW_COPY_AND_ASSIGN #include "bthread/types.h" // bthread_t, bthread_attr_t #include "brpc/versioned_ref_with_id.h" @@ -26,6 +27,8 @@ namespace brpc { +DECLARE_bool(event_dispatcher_edisp_unsched); + // Unique identifier of a IOEventData. // Users shall store EventDataId instead of EventData and call EventData::Address() // to convert the identifier to an unique_ptr at each access. Whenever a @@ -188,6 +191,11 @@ template friend class IOEvent; EventDispatcher& GetGlobalEventDispatcher(int fd, bthread_tag_t tag); +// Unified unsched switch for transport layer. +// false -> background start (allowing schedule away), +// true -> urgent start (foreground scheduling before caller continues). +bool EventDispatcherUnsched(); + // IOEvent class manages the IO events of a file descriptor conveniently. template class IOEvent { diff --git a/src/brpc/rdma/rdma_endpoint.cpp b/src/brpc/rdma/rdma_endpoint.cpp index 3cc2107f23..a939332f4c 100644 --- a/src/brpc/rdma/rdma_endpoint.cpp +++ b/src/brpc/rdma/rdma_endpoint.cpp @@ -63,7 +63,6 @@ BRPC_VALIDATE_GFLAG(rdma_trace_verbose, brpc::PassValidate); DEFINE_bool(rdma_use_polling, false, "Use polling mode for RDMA."); DEFINE_int32(rdma_poller_num, 1, "Poller number in RDMA polling mode."); DEFINE_bool(rdma_poller_yield, false, "Yield thread in RDMA polling mode."); -DEFINE_bool(rdma_edisp_unsched, false, "Disable event dispatcher schedule"); DEFINE_bool(rdma_disable_bthread, false, "Disable bthread in RDMA"); static const size_t IOBUF_BLOCK_HEADER_LEN = 32; // implementation-dependent diff --git a/src/brpc/rdma/rdma_endpoint.h b/src/brpc/rdma/rdma_endpoint.h index eb4714ef0d..54a008f1f7 100644 --- a/src/brpc/rdma/rdma_endpoint.h +++ b/src/brpc/rdma/rdma_endpoint.h @@ -38,7 +38,6 @@ namespace rdma { DECLARE_bool(rdma_use_polling); DECLARE_int32(rdma_poller_num); -DECLARE_bool(rdma_edisp_unsched); DECLARE_bool(rdma_disable_bthread); class RdmaConnect : public AppConnect { diff --git a/src/brpc/rdma_transport.cpp b/src/brpc/rdma_transport.cpp index 8fe88c6b4b..88d89a7b06 100644 --- a/src/brpc/rdma_transport.cpp +++ b/src/brpc/rdma_transport.cpp @@ -18,6 +18,7 @@ #if BRPC_WITH_RDMA #include "brpc/rdma_transport.h" +#include "brpc/event_dispatcher.h" #include "brpc/tcp_transport.h" #include "brpc/rdma/rdma_endpoint.h" #include "brpc/rdma/rdma_helper.h" @@ -127,13 +128,13 @@ void RdmaTransport::ProcessEvent(bthread_attr_t attr) { bthread_t tid; if (FLAGS_usercode_in_coroutine) { OnEdge(_socket); - } else if (rdma::FLAGS_rdma_edisp_unsched == false) { - auto rc = bthread_start_background(&tid, &attr, OnEdge, _socket); + } else if (!EventDispatcherUnsched()) { + auto rc = bthread_start_urgent(&tid, &attr, OnEdge, _socket); if (rc != 0) { LOG(FATAL) << "Fail to start ProcessEvent"; OnEdge(_socket); } - } else if (bthread_start_urgent(&tid, &attr, OnEdge, _socket) != 0) { + } else if (bthread_start_background(&tid, &attr, OnEdge, _socket) != 0) { LOG(FATAL) << "Fail to start ProcessEvent"; OnEdge(_socket); } @@ -235,4 +236,4 @@ bool RdmaTransport::OptionsAvailableOverRdma(const ServerOptions* opt) { return true; } } // namespace brpc -#endif \ No newline at end of file +#endif diff --git a/src/brpc/tcp_transport.cpp b/src/brpc/tcp_transport.cpp index 37db7a8966..27e6ae87be 100644 --- a/src/brpc/tcp_transport.cpp +++ b/src/brpc/tcp_transport.cpp @@ -16,6 +16,7 @@ // under the License. #include "brpc/tcp_transport.h" +#include "brpc/event_dispatcher.h" namespace brpc { DECLARE_bool(usercode_in_coroutine); @@ -68,7 +69,13 @@ void TcpTransport::ProcessEvent(bthread_attr_t attr) { bthread_t tid; if (FLAGS_usercode_in_coroutine) { OnEdge(_socket); - } else if (bthread_start_urgent(&tid, &attr, OnEdge, _socket) != 0) { + } else if (!EventDispatcherUnsched()) { + auto rc = bthread_start_urgent(&tid, &attr, OnEdge, _socket); + if (rc != 0) { + LOG(FATAL) << "Fail to start ProcessEvent"; + OnEdge(_socket); + } + } else if (bthread_start_background(&tid, &attr, OnEdge, _socket) != 0) { LOG(FATAL) << "Fail to start ProcessEvent"; OnEdge(_socket); } @@ -96,4 +103,4 @@ void TcpTransport::QueueMessage(InputMessageClosure& input_msg, } } -} // namespace brpc \ No newline at end of file +} // namespace brpc From 85f453639162547d4ea60491e69f66d53c52aecd Mon Sep 17 00:00:00 2001 From: MalikHou Date: Sat, 7 Mar 2026 13:37:11 +0800 Subject: [PATCH 38/84] fix --- docs/cn/io.md | 2 ++ docs/cn/rdma.md | 7 +++++-- docs/en/io.md | 2 ++ docs/en/rdma.md | 7 +++++-- src/brpc/event_dispatcher.h | 9 +++++---- 5 files changed, 19 insertions(+), 8 deletions(-) diff --git a/docs/cn/io.md b/docs/cn/io.md index baadf27164..17c60d8ff5 100644 --- a/docs/cn/io.md +++ b/docs/cn/io.md @@ -14,6 +14,8 @@ linux一般使用non-blocking IO提高IO并发度。当IO并发度很低时,no 由于epoll的[一个bug](https://web.archive.org/web/20150423184820/https://patchwork.kernel.org/patch/1970231/)(开发brpc时仍有)及epoll_ctl较大的开销,EDISP使用Edge triggered模式。当收到事件时,EDISP给一个原子变量加1,只有当加1前的值是0时启动一个bthread处理对应fd上的数据。在背后,EDISP把所在的pthread让给了新建的bthread,使其有更好的cache locality,可以尽快地读取fd上的数据。而EDISP所在的bthread会被偷到另外一个pthread继续执行,这个过程即是bthread的work stealing调度。要准确理解那个原子变量的工作方式可以先阅读[atomic instructions](atomic_instructions.md),再看[Socket::StartInputEvent](https://github.com/apache/brpc/blob/master/src/brpc/socket.cpp)。这些方法使得brpc读取同一个fd时产生的竞争是[wait-free](http://en.wikipedia.org/wiki/Non-blocking_algorithm#Wait-freedom)的。 +在当前实现里,`Transport::ProcessEvent` 会按 `EventDispatcherUnsched()` 选择启动方式:返回 `false` 时走 `bthread_start_urgent`,返回 `true` 时走 `bthread_start_background`。此外,RDMA 在轮询模式与事件模式对 `last_msg` 的处理不同:`rdma_use_polling=false` 时不会在 `RdmaTransport::QueueMessage` 里处理 `last_msg`,轮询模式下会继续处理。 + [InputMessenger](https://github.com/apache/brpc/blob/master/src/brpc/input_messenger.h)负责从fd上切割和处理消息,它通过用户回调函数理解不同的格式。Parse一般是把消息从二进制流上切割下来,运行时间较固定;Process则是进一步解析消息(比如反序列化为protobuf)后调用用户回调,时间不确定。若一次从某个fd读取出n个消息(n > 1),InputMessenger会启动n-1个bthread分别处理前n-1个消息,最后一个消息则会在原地被Process。InputMessenger会逐一尝试多种协议,由于一个连接上往往只有一种消息格式,InputMessenger会记录下上次的选择,而避免每次都重复尝试。 可以看到,fd间和fd内的消息都会在brpc中获得并发,这使brpc非常擅长大消息的读取,在高负载时仍能及时处理不同来源的消息,减少长尾的存在。 diff --git a/docs/cn/rdma.md b/docs/cn/rdma.md index 3106f28caf..6211f819d0 100644 --- a/docs/cn/rdma.md +++ b/docs/cn/rdma.md @@ -49,7 +49,10 @@ RDMA是硬件相关的通信技术,有很多独特的概念,比如device、p RDMA支持事件驱动和轮询两种模式,默认是事件驱动模式,通过设置rdma_use_polling可以开启轮询模式。轮询模式下还可以设置轮询器数目(rdma_poller_num),以及是否主动放弃CPU(rdma_poller_yield)。轮询模式下还可以设置一个回调函数,在每次轮询时调用,可以配合io_uring/spdk等使用。 -`event_dispatcher_edisp_unsched` 是全局开关,同时影响普通模式(TCP)和RDMA模式的EventDispatcher调度行为。兼容历史配置,`rdma_edisp_unsched` 仍保留,但已标记为废弃,未来版本会移除。 +`event_dispatcher_edisp_unsched` 是全局开关,同时影响普通模式(TCP)和 RDMA 模式的 EventDispatcher 调度行为。 +它用于替代 `rdma_edisp_unsched`。当前保留 `rdma_edisp_unsched` 仅用于兼容历史命令行,未来版本会移除。两者语义一致:值为 `true` 时都表示 EventDispatcher 不可被调度。 + +历史说明:之前 RDMA 路径里出现过一次 `if` 判断 bug,导致行为和 flag 语义不一致;当前逻辑已修复,并按统一语义生效。 最终生效条件统一为: `event_dispatcher_edisp_unsched || rdma_edisp_unsched` @@ -63,7 +66,7 @@ RDMA支持事件驱动和轮询两种模式,默认是事件驱动模式,通 行为示例: 1. 仅设置 `-rdma_edisp_unsched=true`:`rdma_edisp_unsched=true`、`event_dispatcher_edisp_unsched=false`;TCP和RDMA均不可调度。 -2. 仅设置 `-event_dispatcher_edisp_unsched=true`:两个flag同为`true`;TCP和RDMA均不可调度。 +2. 仅设置 `-event_dispatcher_edisp_unsched=true`:`rdma_edisp_unsched=false`、`event_dispatcher_edisp_unsched=true`;TCP和RDMA均不可调度。 3. 同时设置 `-rdma_edisp_unsched=true -event_dispatcher_edisp_unsched=false`:`rdma_edisp_unsched=true`、`event_dispatcher_edisp_unsched=false`;TCP和RDMA均不可调度。 # 参数 diff --git a/docs/en/io.md b/docs/en/io.md index d048bcea5b..70ac85024f 100644 --- a/docs/en/io.md +++ b/docs/en/io.md @@ -14,6 +14,8 @@ A message is a bounded binary data read from a connection, which may be a reques Because of a [bug](https://web.archive.org/web/20150423184820/https://patchwork.kernel.org/patch/1970231/) of epoll (at the time of developing brpc) and overhead of epoll_ctl, edge triggered mode is used in EDISP. After receiving an event, an atomic variable associated with the fd is added by one atomically. If the variable is zero before addition, a bthread is started to handle the data from the fd. The pthread worker in which EDISP runs is yielded to the newly created bthread to make it start reading ASAP and have a better cache locality. The bthread in which EDISP runs will be stolen to another pthread and keep running, this mechanism is work stealing used in bthreads. To understand exactly how that atomic variable works, you can read [atomic instructions](atomic_instructions.md) first, then check [Socket::StartInputEvent](https://github.com/apache/brpc/blob/master/src/brpc/socket.cpp). These methods make contentions on dispatching events of one fd [wait-free](http://en.wikipedia.org/wiki/Non-blocking_algorithm#Wait-freedom). +In current implementation, `Transport::ProcessEvent` chooses start mode based on `EventDispatcherUnsched()`: `false` uses `bthread_start_urgent`, and `true` uses `bthread_start_background`. In addition, RDMA handles `last_msg` differently between polling and event modes: when `rdma_use_polling=false`, `RdmaTransport::QueueMessage` does not process `last_msg`; in polling mode it continues to process it. + [InputMessenger](https://github.com/apache/brpc/blob/master/src/brpc/input_messenger.h) cuts messages and uses customizable callbacks to handle different format of data. `Parse` callback cuts messages from binary data and has relatively stable running time; `Process` parses messages further(such as parsing by protobuf) and calls users' callbacks, which vary in running time. If n(n > 1) messages are read from the fd, InputMessenger launches n-1 bthreads to handle first n-1 messages respectively, and processes the last message in-place. InputMessenger tries protocols one by one. Since one connections often has only one type of messages, InputMessenger remembers current protocol to avoid trying for protocols next time. It can be seen that messages from different fds or even same fd are processed concurrently in brpc, which makes brpc good at handling large messages and reducing long tails on processing messages from different sources under high workloads. diff --git a/docs/en/rdma.md b/docs/en/rdma.md index 2781c2d550..b36ac2033c 100644 --- a/docs/en/rdma.md +++ b/docs/en/rdma.md @@ -47,7 +47,10 @@ The application can manage memory by itself and send data with IOBuf::append_use RDMA is hardware-related. It has some different concepts such as device, port, GID, LID, MaxSge and so on. These parameters can be read from NICs at initialization, and brpc will make the default choice (see src/brpc/rdma/rdma_helper.cpp). Sometimes the default choice is not the expectation, then it can be changed in the flag way. -`event_dispatcher_edisp_unsched` is a global flag and affects EventDispatcher scheduling in both normal mode (TCP) and RDMA mode. For backward compatibility, `rdma_edisp_unsched` is still kept, but it is deprecated and will be removed in a future release. +`event_dispatcher_edisp_unsched` is a global flag and affects EventDispatcher scheduling in both normal mode (TCP) and RDMA mode. +It replaces `rdma_edisp_unsched`. `rdma_edisp_unsched` is still kept only for command-line compatibility and is planned for removal in a future release. The two flags have the same semantics: `true` means EventDispatcher is unschedulable. + +Historical note: there was a previous `if`-condition bug on the RDMA path, where behavior did not match the flag semantics. The logic is now fixed and follows the unified semantics. The effective unsched condition is unified as: `event_dispatcher_edisp_unsched || rdma_edisp_unsched` @@ -61,7 +64,7 @@ Recommended usage: Examples: 1. Only `-rdma_edisp_unsched=true`: `rdma_edisp_unsched=true`, `event_dispatcher_edisp_unsched=false`; both TCP and RDMA are unschedulable. -2. Only `-event_dispatcher_edisp_unsched=true`: both flags are `true`; both TCP and RDMA are unschedulable. +2. Only `-event_dispatcher_edisp_unsched=true`: `rdma_edisp_unsched=false`, `event_dispatcher_edisp_unsched=true`; both TCP and RDMA are unschedulable. 3. Both `-rdma_edisp_unsched=true -event_dispatcher_edisp_unsched=false`: `rdma_edisp_unsched=true`, `event_dispatcher_edisp_unsched=false`; both TCP and RDMA are unschedulable. # Parameters diff --git a/src/brpc/event_dispatcher.h b/src/brpc/event_dispatcher.h index f4f60e67ea..3fdc9f17b9 100644 --- a/src/brpc/event_dispatcher.h +++ b/src/brpc/event_dispatcher.h @@ -90,8 +90,9 @@ namespace rdma { class RdmaEndpoint; } -// Dispatch edge-triggered events of file descriptors to consumers -// running in separate bthreads. +// Dispatch edge-triggered events of file descriptors to consumers. +// By default callbacks run in spawned bthreads; when usercode-in-coroutine is +// enabled, the callback may run inline in the current coroutine. class EventDispatcher { friend class Socket; friend class rdma::RdmaEndpoint; @@ -192,8 +193,8 @@ template friend class IOEvent; EventDispatcher& GetGlobalEventDispatcher(int fd, bthread_tag_t tag); // Unified unsched switch for transport layer. -// false -> background start (allowing schedule away), -// true -> urgent start (foreground scheduling before caller continues). +// false -> urgent start (foreground scheduling before caller continues), +// true -> background start (allowing schedule away). bool EventDispatcherUnsched(); // IOEvent class manages the IO events of a file descriptor conveniently. From d698032dbb3e3865f16ab8546d6fae864a2cf297 Mon Sep 17 00:00:00 2001 From: MalikHou Date: Mon, 9 Mar 2026 11:50:30 +0800 Subject: [PATCH 39/84] fix --- docs/cn/rdma.md | 20 ++++++-------------- docs/en/rdma.md | 20 ++++++-------------- src/brpc/event_dispatcher.cpp | 13 ------------- 3 files changed, 12 insertions(+), 41 deletions(-) diff --git a/docs/cn/rdma.md b/docs/cn/rdma.md index 6211f819d0..fd70686bb1 100644 --- a/docs/cn/rdma.md +++ b/docs/cn/rdma.md @@ -50,24 +50,17 @@ RDMA是硬件相关的通信技术,有很多独特的概念,比如device、p RDMA支持事件驱动和轮询两种模式,默认是事件驱动模式,通过设置rdma_use_polling可以开启轮询模式。轮询模式下还可以设置轮询器数目(rdma_poller_num),以及是否主动放弃CPU(rdma_poller_yield)。轮询模式下还可以设置一个回调函数,在每次轮询时调用,可以配合io_uring/spdk等使用。 `event_dispatcher_edisp_unsched` 是全局开关,同时影响普通模式(TCP)和 RDMA 模式的 EventDispatcher 调度行为。 -它用于替代 `rdma_edisp_unsched`。当前保留 `rdma_edisp_unsched` 仅用于兼容历史命令行,未来版本会移除。两者语义一致:值为 `true` 时都表示 EventDispatcher 不可被调度。 +值为 `true` 时 EventDispatcher 不可被调度,值为 `false` 时保持可调度(默认)。 -历史说明:之前 RDMA 路径里出现过一次 `if` 判断 bug,导致行为和 flag 语义不一致;当前逻辑已修复,并按统一语义生效。 - -最终生效条件统一为: -`event_dispatcher_edisp_unsched || rdma_edisp_unsched` - -启动时不会再改写用户传入的 flag,运行时严格按用户配置值生效。 +运行时严格按用户配置值生效。 推荐使用方式: -1. 新部署:只配置 `event_dispatcher_edisp_unsched`。 -2. 存量部署:`rdma_edisp_unsched` 仅作过渡兼容,逐步迁移到 `event_dispatcher_edisp_unsched`。 -3. 避免脚本中给出“冲突值”;在统一 OR 语义下,只要任一 flag 为 `true`,EventDispatcher 就不可调度。 +1. 默认不配置,保持 `false`。 +2. 需要不可调度行为时,设置 `-event_dispatcher_edisp_unsched=true`。 行为示例: -1. 仅设置 `-rdma_edisp_unsched=true`:`rdma_edisp_unsched=true`、`event_dispatcher_edisp_unsched=false`;TCP和RDMA均不可调度。 -2. 仅设置 `-event_dispatcher_edisp_unsched=true`:`rdma_edisp_unsched=false`、`event_dispatcher_edisp_unsched=true`;TCP和RDMA均不可调度。 -3. 同时设置 `-rdma_edisp_unsched=true -event_dispatcher_edisp_unsched=false`:`rdma_edisp_unsched=true`、`event_dispatcher_edisp_unsched=false`;TCP和RDMA均不可调度。 +1. `-event_dispatcher_edisp_unsched=false`:TCP 和 RDMA 均可调度。 +2. `-event_dispatcher_edisp_unsched=true`:TCP 和 RDMA 均不可调度。 # 参数 @@ -94,5 +87,4 @@ RDMA支持事件驱动和轮询两种模式,默认是事件驱动模式,通 * rdma_poller_num: 轮询模式下的poller数目,默认1。 * rdma_poller_yield: 轮询模式下的poller是否主动放弃CPU,默认是false。 * event_dispatcher_edisp_unsched: 全局开关,控制EventDispatcher是否不可被调度(true时不可调度),默认是false。 -* rdma_edisp_unsched: 废弃兼容参数(未来版本计划移除)。当前仍参与统一生效判断,默认是false。 * rdma_disable_bthread: 禁用bthread,默认是false。 diff --git a/docs/en/rdma.md b/docs/en/rdma.md index b36ac2033c..98ac6981bc 100644 --- a/docs/en/rdma.md +++ b/docs/en/rdma.md @@ -48,24 +48,17 @@ The application can manage memory by itself and send data with IOBuf::append_use RDMA is hardware-related. It has some different concepts such as device, port, GID, LID, MaxSge and so on. These parameters can be read from NICs at initialization, and brpc will make the default choice (see src/brpc/rdma/rdma_helper.cpp). Sometimes the default choice is not the expectation, then it can be changed in the flag way. `event_dispatcher_edisp_unsched` is a global flag and affects EventDispatcher scheduling in both normal mode (TCP) and RDMA mode. -It replaces `rdma_edisp_unsched`. `rdma_edisp_unsched` is still kept only for command-line compatibility and is planned for removal in a future release. The two flags have the same semantics: `true` means EventDispatcher is unschedulable. +`true` means EventDispatcher is unschedulable, and `false` means schedulable (default). -Historical note: there was a previous `if`-condition bug on the RDMA path, where behavior did not match the flag semantics. The logic is now fixed and follows the unified semantics. - -The effective unsched condition is unified as: -`event_dispatcher_edisp_unsched || rdma_edisp_unsched` - -No startup synchronization rewrites user flags. Runtime behavior is determined directly from user-provided values. +Runtime behavior is determined directly from user-provided values. Recommended usage: -1. New deployment: set only `event_dispatcher_edisp_unsched`. -2. Existing deployment: keep `rdma_edisp_unsched` temporarily, but migrate to `event_dispatcher_edisp_unsched`. -3. Avoid conflicting values in scripts; with unified OR semantics, either flag being `true` makes EventDispatcher unschedulable. +1. Keep the default `false` when unsched is not needed. +2. Set `-event_dispatcher_edisp_unsched=true` when unsched behavior is required. Examples: -1. Only `-rdma_edisp_unsched=true`: `rdma_edisp_unsched=true`, `event_dispatcher_edisp_unsched=false`; both TCP and RDMA are unschedulable. -2. Only `-event_dispatcher_edisp_unsched=true`: `rdma_edisp_unsched=false`, `event_dispatcher_edisp_unsched=true`; both TCP and RDMA are unschedulable. -3. Both `-rdma_edisp_unsched=true -event_dispatcher_edisp_unsched=false`: `rdma_edisp_unsched=true`, `event_dispatcher_edisp_unsched=false`; both TCP and RDMA are unschedulable. +1. `-event_dispatcher_edisp_unsched=false`: both TCP and RDMA are schedulable. +2. `-event_dispatcher_edisp_unsched=true`: both TCP and RDMA are unschedulable. # Parameters @@ -92,5 +85,4 @@ Configurable parameters: * rdma_poller_num: The number of pollers in polling mode, default is 1. * rdma_poller_yield: Whether pollers in polling mode voluntarily relinquish the CPU, default is false. * event_dispatcher_edisp_unsched: Global switch for EventDispatcher scheduling (true means unschedulable), default is false. -* rdma_edisp_unsched: Deprecated compatibility flag (planned removal in a future release). It still participates in unified unsched condition, default is false. * rdma_disable_bthread: Disables bthread, default is false. diff --git a/src/brpc/event_dispatcher.cpp b/src/brpc/event_dispatcher.cpp index a4f253bcb6..a5265b8ccd 100644 --- a/src/brpc/event_dispatcher.cpp +++ b/src/brpc/event_dispatcher.cpp @@ -33,14 +33,6 @@ DEFINE_int32(event_dispatcher_num, 1, "Number of event dispatcher"); DEFINE_bool(event_dispatcher_edisp_unsched, false, "Disable event dispatcher schedule"); -#if BRPC_WITH_RDMA -namespace rdma { -DEFINE_bool(rdma_edisp_unsched, false, - "Deprecated and will be removed in a future release, " - "use event_dispatcher_edisp_unsched instead"); -} // namespace rdma -#endif - DEFINE_bool(usercode_in_pthread, false, "Call user's callback in pthreads, use bthreads otherwise"); DEFINE_bool(usercode_in_coroutine, false, @@ -52,12 +44,7 @@ static bvar::LatencyRecorder* g_edisp_write_lantency = NULL; static pthread_once_t g_edisp_once = PTHREAD_ONCE_INIT; bool EventDispatcherUnsched() { -#if BRPC_WITH_RDMA - return FLAGS_event_dispatcher_edisp_unsched || - rdma::FLAGS_rdma_edisp_unsched; -#else return FLAGS_event_dispatcher_edisp_unsched; -#endif } static void StopAndJoinGlobalDispatchers() { From 05bcd9aa8b61bce4a462c0cf2b8d779e0cd4a196 Mon Sep 17 00:00:00 2001 From: MalikHou Date: Mon, 9 Mar 2026 12:29:50 +0800 Subject: [PATCH 40/84] fix md --- docs/cn/io.md | 2 +- docs/en/io.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/cn/io.md b/docs/cn/io.md index 17c60d8ff5..b39c5ec4a0 100644 --- a/docs/cn/io.md +++ b/docs/cn/io.md @@ -14,7 +14,7 @@ linux一般使用non-blocking IO提高IO并发度。当IO并发度很低时,no 由于epoll的[一个bug](https://web.archive.org/web/20150423184820/https://patchwork.kernel.org/patch/1970231/)(开发brpc时仍有)及epoll_ctl较大的开销,EDISP使用Edge triggered模式。当收到事件时,EDISP给一个原子变量加1,只有当加1前的值是0时启动一个bthread处理对应fd上的数据。在背后,EDISP把所在的pthread让给了新建的bthread,使其有更好的cache locality,可以尽快地读取fd上的数据。而EDISP所在的bthread会被偷到另外一个pthread继续执行,这个过程即是bthread的work stealing调度。要准确理解那个原子变量的工作方式可以先阅读[atomic instructions](atomic_instructions.md),再看[Socket::StartInputEvent](https://github.com/apache/brpc/blob/master/src/brpc/socket.cpp)。这些方法使得brpc读取同一个fd时产生的竞争是[wait-free](http://en.wikipedia.org/wiki/Non-blocking_algorithm#Wait-freedom)的。 -在当前实现里,`Transport::ProcessEvent` 会按 `EventDispatcherUnsched()` 选择启动方式:返回 `false` 时走 `bthread_start_urgent`,返回 `true` 时走 `bthread_start_background`。此外,RDMA 在轮询模式与事件模式对 `last_msg` 的处理不同:`rdma_use_polling=false` 时不会在 `RdmaTransport::QueueMessage` 里处理 `last_msg`,轮询模式下会继续处理。 +在当前实现里,`Transport::ProcessEvent` 会按 `EventDispatcherUnsched()` 选择启动方式:返回 `false` 时走 `bthread_start_urgent`,返回 `true` 时走 `bthread_start_background`。此外,RDMA 在轮询模式与事件模式对 `last_msg` 的处理不同:`rdma_use_polling=false` 时不会在 `RdmaTransport::QueueMessage` 里处理 `last_msg`,轮询模式下会继续处理。并且在 `EventDispatcherUnsched()` 返回 `true` 时,`last_msg` 不会在当前执行流里直接处理,而是在新的 bthread 中执行。用户可以通过 `event_dispatcher_edisp_unsched` 来控制这一行为。 [InputMessenger](https://github.com/apache/brpc/blob/master/src/brpc/input_messenger.h)负责从fd上切割和处理消息,它通过用户回调函数理解不同的格式。Parse一般是把消息从二进制流上切割下来,运行时间较固定;Process则是进一步解析消息(比如反序列化为protobuf)后调用用户回调,时间不确定。若一次从某个fd读取出n个消息(n > 1),InputMessenger会启动n-1个bthread分别处理前n-1个消息,最后一个消息则会在原地被Process。InputMessenger会逐一尝试多种协议,由于一个连接上往往只有一种消息格式,InputMessenger会记录下上次的选择,而避免每次都重复尝试。 diff --git a/docs/en/io.md b/docs/en/io.md index 70ac85024f..6b63f0d1c7 100644 --- a/docs/en/io.md +++ b/docs/en/io.md @@ -14,7 +14,7 @@ A message is a bounded binary data read from a connection, which may be a reques Because of a [bug](https://web.archive.org/web/20150423184820/https://patchwork.kernel.org/patch/1970231/) of epoll (at the time of developing brpc) and overhead of epoll_ctl, edge triggered mode is used in EDISP. After receiving an event, an atomic variable associated with the fd is added by one atomically. If the variable is zero before addition, a bthread is started to handle the data from the fd. The pthread worker in which EDISP runs is yielded to the newly created bthread to make it start reading ASAP and have a better cache locality. The bthread in which EDISP runs will be stolen to another pthread and keep running, this mechanism is work stealing used in bthreads. To understand exactly how that atomic variable works, you can read [atomic instructions](atomic_instructions.md) first, then check [Socket::StartInputEvent](https://github.com/apache/brpc/blob/master/src/brpc/socket.cpp). These methods make contentions on dispatching events of one fd [wait-free](http://en.wikipedia.org/wiki/Non-blocking_algorithm#Wait-freedom). -In current implementation, `Transport::ProcessEvent` chooses start mode based on `EventDispatcherUnsched()`: `false` uses `bthread_start_urgent`, and `true` uses `bthread_start_background`. In addition, RDMA handles `last_msg` differently between polling and event modes: when `rdma_use_polling=false`, `RdmaTransport::QueueMessage` does not process `last_msg`; in polling mode it continues to process it. +In current implementation, `Transport::ProcessEvent` chooses start mode based on `EventDispatcherUnsched()`: `false` uses `bthread_start_urgent`, and `true` uses `bthread_start_background`. In addition, RDMA handles `last_msg` differently between polling and event modes: when `rdma_use_polling=false`, `RdmaTransport::QueueMessage` does not process `last_msg`; in polling mode it continues to process it. And when `EventDispatcherUnsched()` returns `true`, `last_msg` is not processed directly in the current execution flow but in a new bthread. Users can control this behavior through `event_dispatcher_edisp_unsched`. [InputMessenger](https://github.com/apache/brpc/blob/master/src/brpc/input_messenger.h) cuts messages and uses customizable callbacks to handle different format of data. `Parse` callback cuts messages from binary data and has relatively stable running time; `Process` parses messages further(such as parsing by protobuf) and calls users' callbacks, which vary in running time. If n(n > 1) messages are read from the fd, InputMessenger launches n-1 bthreads to handle first n-1 messages respectively, and processes the last message in-place. InputMessenger tries protocols one by one. Since one connections often has only one type of messages, InputMessenger remembers current protocol to avoid trying for protocols next time. From 016bd408421770959be86dd6deedb961ec7d5dec Mon Sep 17 00:00:00 2001 From: Jenrry You Date: Tue, 10 Mar 2026 16:00:23 +0800 Subject: [PATCH 41/84] Prevent indefinite defer-close by checking last_active_time (#3216) Co-authored-by: youzhiyuan --- docs/cn/client.md | 3 ++- docs/en/client.md | 3 ++- src/brpc/socket_map.cpp | 41 ++++++++++++++++++++++++++++++++--------- src/brpc/socket_map.h | 8 ++++++++ 4 files changed, 44 insertions(+), 11 deletions(-) diff --git a/docs/cn/client.md b/docs/cn/client.md index b9fe872dd8..609becc086 100755 --- a/docs/cn/client.md +++ b/docs/cn/client.md @@ -820,8 +820,9 @@ brpc支持[Streaming RPC](streaming_rpc.md),这是一种应用层的连接, | Name | Value | Description | Defined At | | ------------------ | ----- | ---------------------------------------- | ----------------------- | | defer_close_second | 0 | Defer close of connections for so many seconds even if the connection is not used by anyone. Close immediately for non-positive values | src/brpc/socket_map.cpp | +| defer_close_respect_idle | false | 当 defer_close_second > 0 时,如果连接在最后一个引用释放时已经闲置超过 defer_close_second,则立刻关闭连接(默认关闭以保持兼容) | src/brpc/socket_map.cpp | -设置后引用计数清0时连接并不会立刻被关闭,而是会等待这么多秒再关闭,如果在这段时间内又有channel引用了这个连接,它会恢复正常被使用的状态。不管channel创建析构有多频率,这个选项使得关闭连接的频率有上限。这个选项的副作用是一些fd不会被及时关闭,如果延时被误设为一个大数值,程序占据的fd个数可能会很大。 +设置后引用计数清0时连接并不会立刻被关闭,而是会等待这么多秒再关闭,如果在这段时间内又有channel引用了这个连接,它会恢复正常被使用的状态。不管channel创建析构有多频率,这个选项使得关闭连接的频率有上限。这个选项的副作用是一些fd不会被及时关闭,如果延时被误设为一个大数值,程序占据的fd个数可能会很大。开启 -defer_close_respect_idle 后,如果连接在最后一个引用释放时已经闲置超过 defer_close_second,则可能会被关闭。 ## 连接的缓冲区大小 diff --git a/docs/en/client.md b/docs/en/client.md index f199fc6b78..60c458b62c 100644 --- a/docs/en/client.md +++ b/docs/en/client.md @@ -717,8 +717,9 @@ Another solution is setting gflag -defer_close_second | Name | Value | Description | Defined At | | ------------------ | ----- | ---------------------------------------- | ----------------------- | | defer_close_second | 0 | Defer close of connections for so many seconds even if the connection is not used by anyone. Close immediately for non-positive values | src/brpc/socket_map.cpp | +| defer_close_respect_idle | false | When defer_close_second > 0, close a connection immediately when the last reference is removed and the socket has already been idle for longer than defer_close_second | src/brpc/socket_map.cpp | -After setting, connection is not closed immediately after last referential count, instead it will be closed after so many seconds. If a channel references the connection again during the wait, the connection resumes to normal. No matter how frequent channels are created, this flag limits the frequency of closing connections. Side effect of the flag is that file descriptors are not closed immediately after destroying of channels, if the flag is wrongly set to be large, number of active file descriptors in the process may be large as well. +After setting, connection is not closed immediately after last referential count, instead it will be closed after so many seconds. If a channel references the connection again during the wait, the connection resumes to normal. No matter how frequent channels are created, this flag limits the frequency of closing connections. Side effect of the flag is that file descriptors are not closed immediately after destroying of channels, if the flag is wrongly set to be large, number of active file descriptors in the process may be large as well. When -defer_close_respect_idle is enabled, a connection that has already been idle for longer than defer_close_second may be closed when the last reference is removed. ## Buffer size of connections diff --git a/src/brpc/socket_map.cpp b/src/brpc/socket_map.cpp index 3984f6b866..8d934f5827 100644 --- a/src/brpc/socket_map.cpp +++ b/src/brpc/socket_map.cpp @@ -46,6 +46,13 @@ DEFINE_int32(defer_close_second, 0, "non-positive values."); BRPC_VALIDATE_GFLAG(defer_close_second, PassValidate); +DEFINE_bool(defer_close_respect_idle, false, + "When defer_close_second > 0, close a connection immediately when " + "the last reference is removed and the socket has already been " + "idle for longer than defer_close_second. Disabled by default for " + "backward compatibility."); +BRPC_VALIDATE_GFLAG(defer_close_respect_idle, PassValidate); + DEFINE_bool(show_socketmap_in_vars, false, "[DEBUG] Describe SocketMaps in /vars"); BRPC_VALIDATE_GFLAG(show_socketmap_in_vars, PassValidate); @@ -71,6 +78,7 @@ static void CreateClientSideSocketMap() { options.socket_creator = new GlobalSocketCreator; options.idle_timeout_second_dynamic = &FLAGS_idle_timeout_second; options.defer_close_second_dynamic = &FLAGS_defer_close_second; + options.defer_close_respect_idle_dynamic = &FLAGS_defer_close_respect_idle; if (socket_map->Init(options) != 0) { LOG(FATAL) << "Fail to init SocketMap"; exit(1); @@ -130,7 +138,8 @@ SocketMapOptions::SocketMapOptions() , idle_timeout_second_dynamic(NULL) , idle_timeout_second(0) , defer_close_second_dynamic(NULL) - , defer_close_second(0) { + , defer_close_second(0) + , defer_close_respect_idle_dynamic(NULL) { } SocketMap::SocketMap() @@ -296,15 +305,29 @@ void SocketMap::RemoveInternal(const SocketMapKey& key, *_options.defer_close_second_dynamic : _options.defer_close_second; if (!remove_orphan && defer_close_second > 0) { - // Start count down on this Socket - sc->no_ref_us = butil::cpuwide_time_us(); - } else { - Socket* const s = sc->socket; - _map.erase(key); - mu.unlock(); - s->ReleaseAdditionalReference(); // release extra ref - ReleaseReference(s); + const int64_t now_us = butil::cpuwide_time_us(); + // NOTE: save the gflag which may be reloaded at any time + const bool defer_close_respect_idle = _options.defer_close_respect_idle_dynamic ? + *_options.defer_close_respect_idle_dynamic : false; + if (!defer_close_respect_idle) { + // Start count down on this Socket. + sc->no_ref_us = now_us; + return; + } + const int64_t defer_us = (int64_t)defer_close_second * 1000000L; + if (sc->no_ref_us <= sc->socket->last_active_time_us() + defer_us) { + // When defer_close_respect_idle is enabled, a connection that has + // already been idle for longer than defer_close_second is closed + // immediately. + sc->no_ref_us = now_us; + return; + } } + Socket* const s = sc->socket; + _map.erase(key); + mu.unlock(); + s->ReleaseAdditionalReference(); // release extra ref + ReleaseReference(s); } } diff --git a/src/brpc/socket_map.h b/src/brpc/socket_map.h index b1922bf86e..c4dc5c28f3 100644 --- a/src/brpc/socket_map.h +++ b/src/brpc/socket_map.h @@ -154,6 +154,14 @@ struct SocketMapOptions { // Default: 0 (disabled) const int* defer_close_second_dynamic; int defer_close_second; + + // When defer_close_second > 0 and this flag is true, close a connection + // immediately when the last reference is removed and the socket has already + // been idle for longer than defer_close_second. + // If defer_close_respect_idle_dynamic is not NULL, use the dereferenced + // value each time. + // Default: NULL (treated as false) + const bool* defer_close_respect_idle_dynamic; }; // Share sockets to the same EndPoint. From 7fea47c8c588b7c7d2c71ec0ea29926351ceca59 Mon Sep 17 00:00:00 2001 From: Michael Cho Date: Tue, 10 Mar 2026 14:35:17 -0400 Subject: [PATCH 42/84] Support building with Protobuf v34+ This handles removal of FieldDescriptor::is_optional()[^1] and ClassData removal of on_demand_register_arena_dtor[^2] [^1]: protocolbuffers/protobuf@9dbc5d479a8e453921485d8d3de47fb3c005f1af [^2]: protocolbuffers/protobuf@49e15fecf6d8c23118668e4bb2d00ee7f4cd11c5 --- src/brpc/nonreflectable_message.h | 15 ++++++++++++++- src/json2pb/json_to_pb.cpp | 2 +- src/mcpack2pb/generator.cpp | 6 +++--- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/src/brpc/nonreflectable_message.h b/src/brpc/nonreflectable_message.h index 1494cd1b75..7f2acd78a3 100644 --- a/src/brpc/nonreflectable_message.h +++ b/src/brpc/nonreflectable_message.h @@ -223,7 +223,20 @@ class NonreflectableMessage : public ::google::protobuf::Message { struct NonreflectableMessageClassData : ClassDataFull { constexpr NonreflectableMessageClassData() : ClassDataFull( -# if GOOGLE_PROTOBUF_VERSION >= 5029000 +# if GOOGLE_PROTOBUF_VERSION >= 7034000 + ClassData{ + &_instance, // prototype + nullptr, // tc_table + nullptr, // is_initialized + nullptr, // merge_to_from + ::google::protobuf::internal::MessageCreator(), // message_creator + 0, // cached_size_offset + false, // is_lite + }, + nullptr, // descriptor_methods + nullptr, // descriptor_table + nullptr // get_metadata_tracker +# elif GOOGLE_PROTOBUF_VERSION >= 5029000 ClassData{ &_instance, // prototype nullptr, // tc_table diff --git a/src/json2pb/json_to_pb.cpp b/src/json2pb/json_to_pb.cpp index 491ab92116..c048a8b8c9 100644 --- a/src/json2pb/json_to_pb.cpp +++ b/src/json2pb/json_to_pb.cpp @@ -119,7 +119,7 @@ static void string_append_value(const BUTIL_RAPIDJSON_NAMESPACE::Value& value, //otherwise will append error into error message and return false. inline bool value_invalid(const google::protobuf::FieldDescriptor* field, const char* type, const BUTIL_RAPIDJSON_NAMESPACE::Value& value, std::string* err) { - bool optional = field->is_optional(); + bool optional = !field->is_required() && !field->is_repeated(); if (err) { if (!err->empty()) { err->append(", "); diff --git a/src/mcpack2pb/generator.cpp b/src/mcpack2pb/generator.cpp index fe47fb33fa..26aa4b5bd8 100644 --- a/src/mcpack2pb/generator.cpp +++ b/src/mcpack2pb/generator.cpp @@ -482,7 +482,7 @@ static bool generate_parsing(const google::protobuf::Descriptor* d, } break; } // switch } else { - if (f->is_optional()) { + if (!f->is_required() && !f->is_repeated()) { impl.Print("// optional $type$ $name$ = $number$;\n" , "type", field_to_string(f) , "name", f->name() @@ -888,12 +888,12 @@ static bool generate_serializing(const google::protobuf::Descriptor* d, butil::string_printf(&comment_template, "// %s $type$ $name$ = $number$;\n", (f->is_repeated() ? "repeated" : - (f->is_optional() ? "optional" : "required"))); + (f->is_required() ? "required" : "optional"))); } else { butil::string_printf(&comment_template, "// %s $type$ $name$ = $number$ [(idl_type)=%s];\n", (f->is_repeated() ? "repeated" : - (f->is_optional() ? "optional" : "required")), + (f->is_required() ? "required" : "optional")), describe_idl_type(cit)); } impl.Print(comment_template.c_str() From 9d0582fc56a6c48dc68f51eb054188cc69731d56 Mon Sep 17 00:00:00 2001 From: x1wan Date: Mon, 16 Mar 2026 20:47:20 +0800 Subject: [PATCH 43/84] Skip() return true if the end of the stream is reached (#3242) * return true if the end of the stream is reached * update the comment with protocolbuffers/protobuf@ab25c3e --------- Co-authored-by: zhaohanqing --- src/butil/iobuf.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/butil/iobuf.cpp b/src/butil/iobuf.cpp index 26046e3745..ce60932327 100644 --- a/src/butil/iobuf.cpp +++ b/src/butil/iobuf.cpp @@ -1849,10 +1849,14 @@ void IOBufAsZeroCopyInputStream::BackUp(int count) { } } -// Skips a number of bytes. Returns false if the end of the stream is -// reached or some input error occurred. In the end-of-stream case, the -// stream is advanced to the end of the stream (so ByteCount() will return -// the total size of the stream). +// Skips `count` number of bytes. +// Returns true on success, or false if some input error occurred, or `count` +// exceeds the end of the stream. This function may skip up to `count - 1` +// bytes in case of failure. +// +// Preconditions: +// * `count` is non-negative. +// bool IOBufAsZeroCopyInputStream::Skip(int count) { const IOBuf::BlockRef* cur_ref = _buf->_pref_at(_ref_index); while (cur_ref) { @@ -1867,7 +1871,7 @@ bool IOBufAsZeroCopyInputStream::Skip(int count) { _byte_count += left_bytes; cur_ref = _buf->_pref_at(++_ref_index); } - return false; + return (0 == count); } int64_t IOBufAsZeroCopyInputStream::ByteCount() const { @@ -2102,4 +2106,4 @@ bool IOBufBytesIterator::forward_one_block(const void** data, size_t* size) { void* fast_memcpy(void *__restrict dest, const void *__restrict src, size_t n) { return butil::iobuf::cp(dest, src, n); -} // namespace butil \ No newline at end of file +} // namespace butil From 5fbf615f7319f63f20d601f6eb49688dfda18f4a Mon Sep 17 00:00:00 2001 From: Bright Chen Date: Wed, 25 Mar 2026 19:54:57 +0800 Subject: [PATCH 44/84] Fix DoRead() returns error errno (#3248) --- src/brpc/socket.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/brpc/socket.cpp b/src/brpc/socket.cpp index c123fb6b6e..da926c9ff5 100644 --- a/src/brpc/socket.cpp +++ b/src/brpc/socket.cpp @@ -2106,12 +2106,14 @@ ssize_t Socket::DoRead(size_t size_hint) { << ": " << SSLError(e); errno = ESSL; } else { + int saved_errno = errno; // System error with corresponding errno set. bool is_fatal_error = (ssl_error != SSL_ERROR_ZERO_RETURN && ssl_error != SSL_ERROR_SYSCALL) || - BIO_fd_non_fatal_error(errno) != 0 || + BIO_fd_non_fatal_error(saved_errno) != 0 || nr < 0; PLOG_IF(WARNING, is_fatal_error) << "Fail to read from ssl_fd=" << fd(); + errno = saved_errno; } break; } From fdb0785539e8483a5c4f34e10c63112ca84dc0cc Mon Sep 17 00:00:00 2001 From: Jenrry You Date: Thu, 26 Mar 2026 15:13:22 +0800 Subject: [PATCH 45/84] Make batch create stream SendFeedback thread safe (#3215) --- src/brpc/stream.cpp | 34 +++- src/brpc/stream_impl.h | 7 +- test/brpc_streaming_rpc_unittest.cpp | 236 ++++++++++++++++++++++++++- 3 files changed, 267 insertions(+), 10 deletions(-) diff --git a/src/brpc/stream.cpp b/src/brpc/stream.cpp index 2a4430548f..a2a106a8b1 100644 --- a/src/brpc/stream.cpp +++ b/src/brpc/stream.cpp @@ -52,6 +52,7 @@ Stream::Stream() , _remote_consumed(0) , _cur_buf_size(0) , _local_consumed(0) + , _atomic_local_consumed(0) , _parse_rpc_response(false) , _pending_buf(NULL) , _start_idle_timer_us(0) @@ -287,7 +288,7 @@ void Stream::SetConnected(const StreamSettings* remote_settings) { CHECK(_host_socket != NULL); RPC_VLOG << "stream=" << id() << " is connected to stream_id=" << _remote_settings.stream_id() << " at host_socket=" << *_host_socket; - _connected = true; + _connected.store(true, butil::memory_order_release); _connect_meta.ec = 0; TriggerOnConnectIfNeed(); if (remote_settings == NULL) { @@ -295,6 +296,13 @@ void Stream::SetConnected(const StreamSettings* remote_settings) { // Client-side timer would triggered in Consume after received the first // message which is the very RPC response StartIdleTimer(); + } else { + // send first feedback for client-side stream if it already consumed data + if (_remote_settings.need_feedback()) { + auto consumed_bytes = _atomic_local_consumed.load(butil::memory_order_acquire); + if (consumed_bytes > 0) + SendFeedback(consumed_bytes); + } } } @@ -620,20 +628,34 @@ int Stream::Consume(void *meta, bthread::TaskIterator& iter) { } mb.flush(); - if (s->_remote_settings.need_feedback() && mb.total_length() > 0) { - s->_local_consumed += mb.total_length(); - s->SendFeedback(); + auto total_length = mb.total_length(); + if (total_length > 0) { + // fast path for connected stream + if (s->_connected.load(butil::memory_order_acquire)){ + if (s->_remote_settings.need_feedback()) { + s->_local_consumed += total_length; + s->SendFeedback(s->_local_consumed); + } + } else { + // Under the scenario of batch creation of Streams, there is concurrency between SetConnected and Consume for the same stream, + // and it is necessary to ensure the memory order. + s->_local_consumed = s->_atomic_local_consumed.fetch_add(total_length, butil::memory_order_release) + total_length; + if (s->_connected.load(butil::memory_order_acquire) && s->_remote_settings.need_feedback()) { + s->SendFeedback(s->_local_consumed); + } + } } + s->StartIdleTimer(); return 0; } -void Stream::SendFeedback() { +void Stream::SendFeedback(int64_t _consumed_bytes) { StreamFrameMeta fm; fm.set_frame_type(FRAME_TYPE_FEEDBACK); fm.set_stream_id(_remote_settings.stream_id()); fm.set_source_stream_id(id()); - fm.mutable_feedback()->set_consumed_size(_local_consumed); + fm.mutable_feedback()->set_consumed_size(_consumed_bytes); butil::IOBuf out; policy::PackStreamMessage(&out, fm, NULL); WriteToHostSocket(&out); diff --git a/src/brpc/stream_impl.h b/src/brpc/stream_impl.h index 5ff7cb04a2..284b33ca33 100644 --- a/src/brpc/stream_impl.h +++ b/src/brpc/stream_impl.h @@ -81,7 +81,7 @@ friend struct butil::DefaultDeleter; void TriggerOnConnectIfNeed(); void Wait(void (*on_writable)(StreamId, void*, int), void* arg, const timespec* due_time, bool new_thread, bthread_id_t *join_id); - void SendFeedback(); + void SendFeedback(int64_t _consumed_bytes); void StartIdleTimer(); void StopIdleTimer(); void HandleRpcResponse(butil::IOBuf* response_buffer); @@ -115,7 +115,7 @@ friend struct butil::DefaultDeleter; bthread_mutex_t _connect_mutex; ConnectMeta _connect_meta; - bool _connected; + butil::atomic _connected; bool _closed; int _error_code; std::string _error_text; @@ -127,7 +127,8 @@ friend struct butil::DefaultDeleter; bthread_id_list_t _writable_wait_list; int64_t _local_consumed; - StreamSettings _remote_settings; + butil::atomic _atomic_local_consumed; + StreamSettings _remote_settings; bool _parse_rpc_response; bthread::ExecutionQueueId _consumer_queue; diff --git a/test/brpc_streaming_rpc_unittest.cpp b/test/brpc_streaming_rpc_unittest.cpp index 056ea9a963..ecb88c6150 100644 --- a/test/brpc_streaming_rpc_unittest.cpp +++ b/test/brpc_streaming_rpc_unittest.cpp @@ -20,10 +20,12 @@ // Date: 2015/10/22 16:28:44 #include +#include #include "brpc/server.h" #include "brpc/controller.h" #include "brpc/channel.h" +#include "brpc/callback.h" #include "brpc/socket.h" #include "brpc/stream_impl.h" #include "brpc/policy/streaming_rpc_protocol.h" @@ -54,7 +56,7 @@ class MyServiceWithStream : public test::EchoService { const ::test::EchoRequest* request, ::test::EchoResponse* response, ::google::protobuf::Closure* done) { - brpc::ClosureGuard done_gurad(done); + brpc::ClosureGuard done_guard(done); response->set_message(request->message()); brpc::Controller* cntl = (brpc::Controller*)controller; brpc::StreamId response_stream; @@ -78,6 +80,158 @@ class StreamingRpcTest : public testing::Test { test::EchoResponse response; }; +struct BatchStreamFeedbackRaceState { + brpc::StreamId server_first_stream_id{brpc::INVALID_STREAM_ID}; + brpc::StreamId server_extra_stream_id{brpc::INVALID_STREAM_ID}; + brpc::StreamId client_extra_stream_id{brpc::INVALID_STREAM_ID}; + + std::atomic server_first_write_rc{-1}; + std::atomic server_second_write_rc{-1}; + std::atomic client_got_first_msg{false}; + std::atomic client_got_second_msg{false}; + std::atomic server_write_done{false}; + std::atomic rpc_done{false}; + + bthread_t server_send_tid{0}; + std::atomic server_send_started{false}; +}; + +class BatchStreamClientHandler : public brpc::StreamInputHandler { +public: + explicit BatchStreamClientHandler(BatchStreamFeedbackRaceState* state) + : _state(state) {} + + int on_received_messages(brpc::StreamId id, + butil::IOBuf* const messages[], + size_t size) override { + if (id != _state->client_extra_stream_id) { + // This test only cares about extra stream in batch creation. + return 0; + } + for (size_t i = 0; i < size; ++i) { + const size_t len = messages[i]->length(); + messages[i]->clear(); + // First payload: 64 bytes. Second payload: 1 byte. + if (len == 64) { + _state->client_got_first_msg.store(true, std::memory_order_release); + } else if (len == 1) { + _state->client_got_second_msg.store(true, std::memory_order_release); + } + } + return 0; + } + + void on_idle_timeout(brpc::StreamId /*id*/) override {} + + void on_closed(brpc::StreamId /*id*/) override {} + + void on_failed(brpc::StreamId /*id*/, int /*error_code*/, const std::string& /*error_text*/) override {} + +private: + BatchStreamFeedbackRaceState* _state; +}; + +static void* SendTwoMessagesOnServerExtraStream(void* arg) { + auto* state = static_cast(arg); + const brpc::StreamId sid = state->server_extra_stream_id; + + // Wait until server-side stream is connected. + const int64_t connect_deadline_us = butil::gettimeofday_us() + 2 * 1000 * 1000L; + bool connected = false; + while (butil::gettimeofday_us() < connect_deadline_us) { + brpc::SocketUniquePtr ptr; + if (brpc::Socket::Address(sid, &ptr) == 0) { + brpc::Stream* s = static_cast(ptr->conn()); + if (s->_host_socket != NULL && s->_connected) { + connected = true; + break; + } + } + usleep(1000); + } + + if (!connected) { + state->server_first_write_rc.store(ETIMEDOUT, std::memory_order_relaxed); + state->server_second_write_rc.store(ETIMEDOUT, std::memory_order_relaxed); + state->server_write_done.store(true, std::memory_order_release); + return NULL; + } + + // 1) Send a payload exactly equal to max_buf_size(64). + { + std::string payload(64, 'a'); + butil::IOBuf out; + out.append(payload); + state->server_first_write_rc.store(brpc::StreamWrite(sid, out), std::memory_order_relaxed); + } + + // 2) Then send another byte. This write should become writable only after + // client sends FEEDBACK with consumed_size >= 64. + const int64_t write_deadline_us = butil::gettimeofday_us() + 2 * 1000 * 1000L; + int rc = -1; + while (butil::gettimeofday_us() < write_deadline_us) { + butil::IOBuf out; + out.append("b", 1); + rc = brpc::StreamWrite(sid, out); + if (rc == 0) { + break; + } + if (rc != EAGAIN) { + break; + } + const timespec duetime = butil::milliseconds_from_now(100); + (void)brpc::StreamWait(sid, &duetime); + } + state->server_second_write_rc.store(rc, std::memory_order_relaxed); + state->server_write_done.store(true, std::memory_order_release); + return NULL; +} + +class MyServiceWithBatchStream : public test::EchoService { +public: + MyServiceWithBatchStream(const brpc::StreamOptions& options, + BatchStreamFeedbackRaceState* state) + : _options(options), _state(state) {} + + void Echo(::google::protobuf::RpcController* controller, + const ::test::EchoRequest* request, + ::test::EchoResponse* response, + ::google::protobuf::Closure* done) override { + brpc::ClosureGuard done_guard(done); + response->set_message(request->message()); + brpc::Controller* cntl = static_cast(controller); + + brpc::StreamIds response_streams; + ASSERT_EQ(0, brpc::StreamAccept(response_streams, *cntl, &_options)); + ASSERT_EQ(2u, response_streams.size()); + _state->server_first_stream_id = response_streams[0]; + _state->server_extra_stream_id = response_streams[1]; + + bthread_t tid; + ASSERT_EQ(0, bthread_start_background( + &tid, &BTHREAD_ATTR_NORMAL, + SendTwoMessagesOnServerExtraStream, _state)); + _state->server_send_tid = tid; + _state->server_send_started.store(true, std::memory_order_release); + } + +private: + brpc::StreamOptions _options; + BatchStreamFeedbackRaceState* _state; +}; + +static void SetAtomicTrue(std::atomic* f) { + f->store(true, std::memory_order_release); +} + +static bool WaitForTrue(const std::atomic& f, int timeout_ms) { + const int64_t deadline_us = butil::gettimeofday_us() + (int64_t)timeout_ms * 1000L; + while (!f.load(std::memory_order_acquire) && butil::gettimeofday_us() < deadline_us) { + usleep(1000); + } + return f.load(std::memory_order_acquire); +} + TEST_F(StreamingRpcTest, sanity) { brpc::Server server; MyServiceWithStream service; @@ -98,6 +252,86 @@ TEST_F(StreamingRpcTest, sanity) { server.Join(); } +TEST_F(StreamingRpcTest, batch_create_stream_feedback_race) { + BatchStreamFeedbackRaceState state; + BatchStreamClientHandler client_handler(&state); + + brpc::StreamOptions server_stream_opt; + // Make server-side sender sensitive to FEEDBACK quickly. + server_stream_opt.max_buf_size = 16; + + brpc::Server server; + MyServiceWithBatchStream service(server_stream_opt, &state); + ASSERT_EQ(0, server.AddService(&service, brpc::SERVER_DOESNT_OWN_SERVICE)); + ASSERT_EQ(0, server.Start(9007, NULL)); + + brpc::Channel channel; + ASSERT_EQ(0, channel.Init("127.0.0.1:9007", NULL)); + + brpc::Controller cntl; + brpc::StreamIds request_streams; + brpc::StreamOptions client_stream_opt; + client_stream_opt.handler = &client_handler; + client_stream_opt.max_buf_size = 0; + ASSERT_EQ(0, brpc::StreamCreate(request_streams, 2, cntl, &client_stream_opt)); + ASSERT_EQ(2u, request_streams.size()); + state.client_extra_stream_id = request_streams[1]; + + // Block SetConnected() on the extra stream to enlarge the race window. + brpc::SocketUniquePtr client_extra_ptr; + ASSERT_EQ(0, brpc::Socket::Address(state.client_extra_stream_id, &client_extra_ptr)); + brpc::Stream* client_extra_stream = static_cast(client_extra_ptr->conn()); + bthread_mutex_lock(&client_extra_stream->_connect_mutex); + struct UnlockGuard { + bthread_mutex_t* m; + ~UnlockGuard() { + if (m) { + bthread_mutex_unlock(m); + } + } + } unlock_guard{&client_extra_stream->_connect_mutex}; + + BRPC_SCOPE_EXIT { + if (state.server_extra_stream_id != brpc::INVALID_STREAM_ID) { + brpc::StreamClose(state.server_extra_stream_id); + } + if (state.server_first_stream_id != brpc::INVALID_STREAM_ID) { + brpc::StreamClose(state.server_first_stream_id); + } + for (auto sid : request_streams) { + brpc::StreamClose(sid); + } + + if (state.server_send_tid) { + bthread_join(state.server_send_tid, NULL); + } + server.Stop(0); + server.Join(); + }; + + test::EchoService_Stub stub(&channel); + stub.Echo(&cntl, &request, &response, brpc::NewCallback(SetAtomicTrue, &state.rpc_done)); + + // Wait until client consumes the first 64B payload on extra stream. + ASSERT_TRUE(WaitForTrue(state.client_got_first_msg, 2000)); + + // Unblock SetConnected(); the fix in PR 3215 should send the first FEEDBACK + // with consumed_size=64 here, making server-side stream writable again. + bthread_mutex_unlock(&client_extra_stream->_connect_mutex); + unlock_guard.m = NULL; + + ASSERT_TRUE(WaitForTrue(state.rpc_done, 2000)); + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); + + // Wait for server-side send thread to be started. + ASSERT_TRUE(WaitForTrue(state.server_send_started, 2000)); + + ASSERT_TRUE(WaitForTrue(state.server_write_done, 2000)); + ASSERT_EQ(0, state.server_first_write_rc.load(std::memory_order_relaxed)); + ASSERT_EQ(0, state.server_second_write_rc.load(std::memory_order_relaxed)); + ASSERT_TRUE(WaitForTrue(state.client_got_second_msg, 2000)); +} + struct HandlerControl { HandlerControl() : block(false) From 4e40f5573984ec75ac114f1ad0ad0297c2d2cc89 Mon Sep 17 00:00:00 2001 From: Bright Chen Date: Thu, 26 Mar 2026 15:34:27 +0800 Subject: [PATCH 46/84] Opt IOBuf Profiler (#3251) --- src/butil/iobuf_profiler.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/butil/iobuf_profiler.cpp b/src/butil/iobuf_profiler.cpp index 8d213a72d4..a88e7c559b 100644 --- a/src/butil/iobuf_profiler.cpp +++ b/src/butil/iobuf_profiler.cpp @@ -73,7 +73,9 @@ bool IsIOBufProfilerEnabled() { } bool IsIOBufProfilerSamplable() { - pthread_once(&g_iobuf_profiler_info_once, InitGlobalIOBufProfilerInfo); + if (!IsIOBufProfilerEnabled()) { + return false; + } if (g_iobuf_profiler_sample_rate == 100) { return true; } From 685b1645afb4d5663ac1880d10941245c82d7992 Mon Sep 17 00:00:00 2001 From: David Korczynski Date: Sat, 28 Mar 2026 11:59:47 -0700 Subject: [PATCH 47/84] Extend fuzz harness suite and fix false positive Extends the fuzzing suite to a broader set of targets. The main goal here is to improve code coverage, of which a recent report is available here: https://storage.googleapis.com/oss-fuzz-coverage/brpc/reports/20260326/linux/src/report.html This also fixes false positives in some of the fuzzing harnesses due to a missing Socket. This fixes issues e.g. https://issues.oss-fuzz.com/issues/42532345 The new fuzzing harnesses and the fix should show meaningful coverage gains. Signed-off-by: David Korczynski --- test/CMakeLists.txt | 5 +- test/fuzzing/fuzz_amf.cpp | 81 ++++++++++++++++++++++++++ test/fuzzing/fuzz_baidu_rpc.cpp | 38 ++++++++++++ test/fuzzing/fuzz_common.h | 44 ++++++++++++++ test/fuzzing/fuzz_couchbase.cpp | 38 ++++++++++++ test/fuzzing/fuzz_esp.cpp | 4 +- test/fuzzing/fuzz_http_parser.cpp | 97 +++++++++++++++++++++++++++++++ test/fuzzing/fuzz_hulu.cpp | 4 +- test/fuzzing/fuzz_memcache.cpp | 38 ++++++++++++ test/fuzzing/fuzz_mongo.cpp | 38 ++++++++++++ test/fuzzing/fuzz_shead.cpp | 4 +- test/fuzzing/fuzz_sofa.cpp | 4 +- test/fuzzing/fuzz_streaming.cpp | 38 ++++++++++++ test/fuzzing/fuzz_thrift.cpp | 36 ++++++++++++ test/fuzzing/oss-fuzz.sh | 5 +- 15 files changed, 468 insertions(+), 6 deletions(-) create mode 100644 test/fuzzing/fuzz_amf.cpp create mode 100644 test/fuzzing/fuzz_baidu_rpc.cpp create mode 100644 test/fuzzing/fuzz_common.h create mode 100644 test/fuzzing/fuzz_couchbase.cpp create mode 100644 test/fuzzing/fuzz_http_parser.cpp create mode 100644 test/fuzzing/fuzz_memcache.cpp create mode 100644 test/fuzzing/fuzz_mongo.cpp create mode 100644 test/fuzzing/fuzz_streaming.cpp create mode 100644 test/fuzzing/fuzz_thrift.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a478e8cc89..ade7350f5a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -275,7 +275,10 @@ if(BUILD_FUZZ_TESTS) endif() set(FUZZ_TARGETS fuzz_butil fuzz_esp fuzz_hpack fuzz_http - fuzz_hulu fuzz_json fuzz_redis fuzz_shead fuzz_sofa fuzz_uri) + fuzz_hulu fuzz_json fuzz_redis fuzz_shead fuzz_sofa fuzz_uri + fuzz_baidu_rpc fuzz_mongo fuzz_memcache + fuzz_couchbase fuzz_streaming fuzz_http_parser fuzz_amf) + foreach(target ${FUZZ_TARGETS}) add_executable(${target} fuzzing/${target}.cpp $) diff --git a/test/fuzzing/fuzz_amf.cpp b/test/fuzzing/fuzz_amf.cpp new file mode 100644 index 0000000000..60628ad124 --- /dev/null +++ b/test/fuzzing/fuzz_amf.cpp @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "brpc/amf.h" +#include "butil/iobuf.h" + +#define kMinInputLength 5 +#define kMaxInputLength 4096 + +extern "C" int +LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + if (size < kMinInputLength || size > kMaxInputLength){ + return 1; + } + + uint8_t mode = data[0] % 3; + const uint8_t *payload = data + 1; + size_t payload_size = size - 1; + + butil::IOBuf buf; + buf.append(payload, payload_size); + + switch (mode) { + case 0: { + // Read AMF object + butil::IOBufAsZeroCopyInputStream zc_stream(buf); + brpc::AMFInputStream stream(&zc_stream); + brpc::AMFObject obj; + brpc::ReadAMFObject(&obj, &stream); + break; + } + case 1: { + // Read AMF string + butil::IOBufAsZeroCopyInputStream zc_stream(buf); + brpc::AMFInputStream stream(&zc_stream); + std::string val; + brpc::ReadAMFString(&val, &stream); + break; + } + case 2: { + // Read raw AMF fields by consuming the stream directly + butil::IOBufAsZeroCopyInputStream zc_stream(buf); + brpc::AMFInputStream stream(&zc_stream); + uint8_t marker; + while (stream.good() && stream.cut_u8(&marker) == 1) { + // Try to identify marker type and read value + if (marker == brpc::AMF_MARKER_NUMBER) { + uint64_t num; + stream.cut_u64(&num); + } else if (marker == brpc::AMF_MARKER_BOOLEAN) { + uint8_t b; + stream.cut_u8(&b); + } else if (marker == brpc::AMF_MARKER_STRING) { + uint16_t len; + if (stream.cut_u16(&len) == 2 && len < 1024) { + char tmp[1024]; + stream.cutn(tmp, len); + } + } + } + break; + } + } + + return 0; +} diff --git a/test/fuzzing/fuzz_baidu_rpc.cpp b/test/fuzzing/fuzz_baidu_rpc.cpp new file mode 100644 index 0000000000..027dbbcb47 --- /dev/null +++ b/test/fuzzing/fuzz_baidu_rpc.cpp @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "brpc/policy/baidu_rpc_protocol.h" +#include "fuzz_common.h" + +#define kMinInputLength 5 +#define kMaxInputLength 4096 + +extern "C" int +LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + if (size < kMinInputLength || size > kMaxInputLength){ + return 1; + } + + std::string input(reinterpret_cast(data), size); + butil::IOBuf buf; + buf.append(input); + + brpc::Socket* sock = get_fuzz_socket(); + brpc::policy::ParseRpcMessage(&buf, sock, false, NULL); + return 0; +} diff --git a/test/fuzzing/fuzz_common.h b/test/fuzzing/fuzz_common.h new file mode 100644 index 0000000000..1ab6bf3b4b --- /dev/null +++ b/test/fuzzing/fuzz_common.h @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_TEST_FUZZING_FUZZ_COMMON_H +#define BRPC_TEST_FUZZING_FUZZ_COMMON_H + +#include "brpc/socket.h" +#include "butil/endpoint.h" + +// Create a valid Socket for use in fuzz harnesses that need a non-NULL Socket*. +// Returns a raw Socket* that remains valid for the lifetime of the process +// (held by the static SocketUniquePtr). +inline brpc::Socket* get_fuzz_socket() { + static brpc::SocketId sid = 0; + static brpc::SocketUniquePtr sock_ptr; + static bool initialized = false; + + if (!initialized) { + brpc::SocketOptions options; + options.remote_side = butil::EndPoint(butil::IP_ANY, 7777); + if (brpc::Socket::Create(options, &sid) == 0) { + brpc::Socket::Address(sid, &sock_ptr); + } + initialized = true; + } + + return sock_ptr.get(); +} + +#endif // BRPC_TEST_FUZZING_FUZZ_COMMON_H diff --git a/test/fuzzing/fuzz_couchbase.cpp b/test/fuzzing/fuzz_couchbase.cpp new file mode 100644 index 0000000000..11eee84adb --- /dev/null +++ b/test/fuzzing/fuzz_couchbase.cpp @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "brpc/policy/couchbase_protocol.h" +#include "fuzz_common.h" + +#define kMinInputLength 5 +#define kMaxInputLength 4096 + +extern "C" int +LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + if (size < kMinInputLength || size > kMaxInputLength){ + return 1; + } + + std::string input(reinterpret_cast(data), size); + butil::IOBuf buf; + buf.append(input); + + brpc::Socket* sock = get_fuzz_socket(); + brpc::policy::ParseCouchbaseMessage(&buf, sock, false, NULL); + return 0; +} diff --git a/test/fuzzing/fuzz_esp.cpp b/test/fuzzing/fuzz_esp.cpp index 462aaf8b55..4f93d635a9 100644 --- a/test/fuzzing/fuzz_esp.cpp +++ b/test/fuzzing/fuzz_esp.cpp @@ -16,6 +16,7 @@ // under the License. #include "brpc/policy/esp_protocol.h" +#include "fuzz_common.h" #define kMinInputLength 5 #define kMaxInputLength 1024 @@ -32,7 +33,8 @@ LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) butil::IOBuf buf; buf.append(input); - brpc::policy::ParseEspMessage(&buf, NULL, false, NULL); + brpc::Socket* sock = get_fuzz_socket(); + brpc::policy::ParseEspMessage(&buf, sock, false, NULL); return 0; } diff --git a/test/fuzzing/fuzz_http_parser.cpp b/test/fuzzing/fuzz_http_parser.cpp new file mode 100644 index 0000000000..0c6b792c83 --- /dev/null +++ b/test/fuzzing/fuzz_http_parser.cpp @@ -0,0 +1,97 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include "brpc/details/http_parser.h" +#include "brpc/http_method.h" + +#define kMinInputLength 5 +#define kMaxInputLength 4096 + +static int on_url_cb(brpc::http_parser* p, const char* at, size_t length) { return 0; } +static int on_header_field_cb(brpc::http_parser* p, const char* at, size_t length) { return 0; } +static int on_header_value_cb(brpc::http_parser* p, const char* at, size_t length) { return 0; } +static int on_body_cb(brpc::http_parser* p, const char* at, size_t length) { return 0; } +static int on_message_begin_cb(brpc::http_parser* p) { return 0; } +static int on_headers_complete_cb(brpc::http_parser* p) { return 0; } +static int on_message_complete_cb(brpc::http_parser* p) { return 0; } + +extern "C" int +LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + if (size < kMinInputLength || size > kMaxInputLength){ + return 1; + } + + // Use first byte to select mode + uint8_t mode = data[0] % 4; + const uint8_t *payload = data + 1; + size_t payload_size = size - 1; + + switch (mode) { + case 0: { + // Fuzz low-level HTTP request parsing + brpc::http_parser parser; + brpc::http_parser_init(&parser, brpc::HTTP_REQUEST); + brpc::http_parser_settings settings; + memset(&settings, 0, sizeof(settings)); + settings.on_url = on_url_cb; + settings.on_header_field = on_header_field_cb; + settings.on_header_value = on_header_value_cb; + settings.on_body = on_body_cb; + settings.on_message_begin = on_message_begin_cb; + settings.on_headers_complete = on_headers_complete_cb; + settings.on_message_complete = on_message_complete_cb; + brpc::http_parser_execute(&parser, &settings, + reinterpret_cast(payload), payload_size); + break; + } + case 1: { + // Fuzz low-level HTTP response parsing + brpc::http_parser parser; + brpc::http_parser_init(&parser, brpc::HTTP_RESPONSE); + brpc::http_parser_settings settings; + memset(&settings, 0, sizeof(settings)); + settings.on_url = on_url_cb; + settings.on_header_field = on_header_field_cb; + settings.on_header_value = on_header_value_cb; + settings.on_body = on_body_cb; + settings.on_message_begin = on_message_begin_cb; + settings.on_headers_complete = on_headers_complete_cb; + settings.on_message_complete = on_message_complete_cb; + brpc::http_parser_execute(&parser, &settings, + reinterpret_cast(payload), payload_size); + break; + } + case 2: { + // Fuzz URL parsing (not connect) + brpc::http_parser_url u; + brpc::http_parser_parse_url(reinterpret_cast(payload), + payload_size, 0, &u); + break; + } + case 3: { + // Fuzz URL parsing (connect mode) + brpc::http_parser_url u; + brpc::http_parser_parse_url(reinterpret_cast(payload), + payload_size, 1, &u); + break; + } + } + + return 0; +} diff --git a/test/fuzzing/fuzz_hulu.cpp b/test/fuzzing/fuzz_hulu.cpp index f90d7be864..cb81e1414d 100644 --- a/test/fuzzing/fuzz_hulu.cpp +++ b/test/fuzzing/fuzz_hulu.cpp @@ -16,6 +16,7 @@ // under the License. #include "brpc/policy/hulu_pbrpc_protocol.h" +#include "fuzz_common.h" #define kMinInputLength 5 #define kMaxInputLength 1024 @@ -32,7 +33,8 @@ LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) butil::IOBuf buf; buf.append(input); - brpc::policy::ParseHuluMessage(&buf, NULL, false, NULL); + brpc::Socket* sock = get_fuzz_socket(); + brpc::policy::ParseHuluMessage(&buf, sock, false, NULL); return 0; } diff --git a/test/fuzzing/fuzz_memcache.cpp b/test/fuzzing/fuzz_memcache.cpp new file mode 100644 index 0000000000..e1ef86e626 --- /dev/null +++ b/test/fuzzing/fuzz_memcache.cpp @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "brpc/policy/memcache_binary_protocol.h" +#include "fuzz_common.h" + +#define kMinInputLength 5 +#define kMaxInputLength 4096 + +extern "C" int +LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + if (size < kMinInputLength || size > kMaxInputLength){ + return 1; + } + + std::string input(reinterpret_cast(data), size); + butil::IOBuf buf; + buf.append(input); + + brpc::Socket* sock = get_fuzz_socket(); + brpc::policy::ParseMemcacheMessage(&buf, sock, false, NULL); + return 0; +} diff --git a/test/fuzzing/fuzz_mongo.cpp b/test/fuzzing/fuzz_mongo.cpp new file mode 100644 index 0000000000..c78ed96591 --- /dev/null +++ b/test/fuzzing/fuzz_mongo.cpp @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "brpc/policy/mongo_protocol.h" +#include "fuzz_common.h" + +#define kMinInputLength 5 +#define kMaxInputLength 4096 + +extern "C" int +LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + if (size < kMinInputLength || size > kMaxInputLength){ + return 1; + } + + std::string input(reinterpret_cast(data), size); + butil::IOBuf buf; + buf.append(input); + + brpc::Socket* sock = get_fuzz_socket(); + brpc::policy::ParseMongoMessage(&buf, sock, false, NULL); + return 0; +} diff --git a/test/fuzzing/fuzz_shead.cpp b/test/fuzzing/fuzz_shead.cpp index 2abf517b8c..e5d574da58 100644 --- a/test/fuzzing/fuzz_shead.cpp +++ b/test/fuzzing/fuzz_shead.cpp @@ -17,6 +17,7 @@ #include "brpc/policy/public_pbrpc_meta.pb.h" #include "brpc/policy/public_pbrpc_protocol.h" #include "brpc/policy/most_common_message.h" +#include "fuzz_common.h" #define kMinInputLength 5 #define kMaxInputLength 1024 @@ -32,7 +33,8 @@ LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) butil::IOBuf buf; buf.append(input); - brpc::policy::ParseNsheadMessage(&buf, NULL, false, NULL); + brpc::Socket* sock = get_fuzz_socket(); + brpc::policy::ParseNsheadMessage(&buf, sock, false, NULL); return 0; } diff --git a/test/fuzzing/fuzz_sofa.cpp b/test/fuzzing/fuzz_sofa.cpp index e26f6218cb..b393f85270 100644 --- a/test/fuzzing/fuzz_sofa.cpp +++ b/test/fuzzing/fuzz_sofa.cpp @@ -18,6 +18,7 @@ #include "brpc/policy/sofa_pbrpc_meta.pb.h" #include "brpc/policy/sofa_pbrpc_protocol.h" #include "brpc/policy/most_common_message.h" +#include "fuzz_common.h" #define kMinInputLength 5 #define kMaxInputLength 1024 @@ -34,6 +35,7 @@ LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) butil::IOBuf buf; buf.append(input); - brpc::policy::ParseSofaMessage(&buf, NULL, false, NULL); + brpc::Socket* sock = get_fuzz_socket(); + brpc::policy::ParseSofaMessage(&buf, sock, false, NULL); return 0; } diff --git a/test/fuzzing/fuzz_streaming.cpp b/test/fuzzing/fuzz_streaming.cpp new file mode 100644 index 0000000000..532bb72550 --- /dev/null +++ b/test/fuzzing/fuzz_streaming.cpp @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "brpc/policy/streaming_rpc_protocol.h" +#include "fuzz_common.h" + +#define kMinInputLength 5 +#define kMaxInputLength 4096 + +extern "C" int +LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + if (size < kMinInputLength || size > kMaxInputLength){ + return 1; + } + + std::string input(reinterpret_cast(data), size); + butil::IOBuf buf; + buf.append(input); + + brpc::Socket* sock = get_fuzz_socket(); + brpc::policy::ParseStreamingMessage(&buf, sock, false, NULL); + return 0; +} diff --git a/test/fuzzing/fuzz_thrift.cpp b/test/fuzzing/fuzz_thrift.cpp new file mode 100644 index 0000000000..c7ecd4323c --- /dev/null +++ b/test/fuzzing/fuzz_thrift.cpp @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "brpc/policy/thrift_protocol.h" + +#define kMinInputLength 5 +#define kMaxInputLength 4096 + +extern "C" int +LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + if (size < kMinInputLength || size > kMaxInputLength){ + return 1; + } + + std::string input(reinterpret_cast(data), size); + butil::IOBuf buf; + buf.append(input); + + brpc::policy::ParseThriftMessage(&buf, NULL, false, NULL); + return 0; +} diff --git a/test/fuzzing/oss-fuzz.sh b/test/fuzzing/oss-fuzz.sh index f959b70b74..3f1649e9bb 100644 --- a/test/fuzzing/oss-fuzz.sh +++ b/test/fuzzing/oss-fuzz.sh @@ -34,7 +34,10 @@ cmake \ # https://github.com/google/oss-fuzz/pull/10898 make \ fuzz_butil fuzz_esp fuzz_hpack fuzz_http fuzz_hulu fuzz_json \ - fuzz_redis fuzz_shead fuzz_sofa fuzz_uri --ignore-errors -j$(nproc) + fuzz_redis fuzz_shead fuzz_sofa fuzz_uri \ + fuzz_baidu_rpc fuzz_mongo fuzz_memcache \ + fuzz_couchbase fuzz_streaming fuzz_http_parser \ + fuzz_amf --ignore-errors -j$(nproc) cp test/fuzz_* $OUT/ From b236330acfcf9ad7f1cceb42b55149eac732d44c Mon Sep 17 00:00:00 2001 From: lh2debug Date: Tue, 31 Mar 2026 11:36:02 +0800 Subject: [PATCH 48/84] Fix span lifecycle with smart pointers to prevent use-after-free in async RPC callbacks (#3140) * Fix span lifecycle with smart pointers to prevent use-after-free in async RPC callbacks (#3068) * Refactor bthread span lifecycle management and optimize span API with smart pointer reuse (#3068) --------- Co-authored-by: lhh --- docs/cn/rpcz.md | 12 +- src/brpc/builtin/rpcz_service.cpp | 90 +++-- src/brpc/channel.cpp | 22 +- src/brpc/controller.cpp | 86 +++-- src/brpc/controller.h | 3 +- .../details/controller_private_accessor.h | 12 +- src/brpc/global.cpp | 8 +- src/brpc/policy/baidu_rpc_protocol.cpp | 19 +- src/brpc/policy/couchbase_protocol.cpp | 3 +- src/brpc/policy/esp_protocol.cpp | 6 +- src/brpc/policy/http_rpc_protocol.cpp | 10 +- src/brpc/policy/hulu_pbrpc_protocol.cpp | 9 +- src/brpc/policy/memcache_binary_protocol.cpp | 3 +- src/brpc/policy/nova_pbrpc_protocol.cpp | 3 +- src/brpc/policy/nshead_mcpack_protocol.cpp | 3 +- src/brpc/policy/nshead_protocol.cpp | 13 +- src/brpc/policy/public_pbrpc_protocol.cpp | 5 +- src/brpc/policy/redis_protocol.cpp | 3 +- src/brpc/policy/rtmp_protocol.cpp | 3 +- src/brpc/policy/sofa_pbrpc_protocol.cpp | 7 +- src/brpc/policy/thrift_protocol.cpp | 10 +- src/brpc/policy/ubrpc2pb_protocol.cpp | 3 +- src/brpc/span.cpp | 361 +++++++++++++----- src/brpc/span.h | 161 ++++++-- src/brpc/traceprintf.h | 17 + src/bthread/bthread.cpp | 24 +- src/bthread/bthread.h | 28 ++ src/bthread/key.cpp | 1 + src/bthread/task_group.cpp | 42 +- src/bthread/task_meta.h | 23 +- src/bthread/unstable.h | 3 - src/bvar/collector.cpp | 5 + test/brpc_channel_unittest.cpp | 11 +- test/bthread_unittest.cpp | 12 +- 34 files changed, 729 insertions(+), 292 deletions(-) diff --git a/docs/cn/rpcz.md b/docs/cn/rpcz.md index 637997d15f..12ba2ef3ea 100644 --- a/docs/cn/rpcz.md +++ b/docs/cn/rpcz.md @@ -66,4 +66,14 @@ bthread_attr_t attr = { BTHREAD_STACKTYPE_NORMAL, BTHREAD_INHERIT_SPAN, NULL }; bthread_start_urgent(&tid, &attr, thread_proc, arg); ``` -注意:使用这种方式创建子bthread来发送rpc,请确保rpc在server返回response之前完成,否则可能导致使用被释放的Span对象而出core。 +### Span生命周期管理 + +brpc使用智能指针(`std::shared_ptr`/`std::weak_ptr`)管理Span对象的生命周期,并通过自旋锁保护并发访问,解决了以下问题: + +1. **Use-after-free防护**:父Span通过`shared_ptr`持有子Span的强引用,TLS中使用`weak_ptr`存储,确保Span对象在被访问时仍然有效。即使server在子bthread完成前返回response,也不会导致访问已释放的Span对象。 + +2. **线程安全**:使用自旋锁保护`_client_list`和`_info`的并发修改,支持多个bthread同时创建子span或添加annotation。 + +3. **自动生命周期管理**:当父Span销毁时,会自动清理所有子Span(通过`_client_list.clear()`),无需手动管理。 + +使用`BTHREAD_INHERIT_SPAN`创建子bthread时,不再需要担心Span对象的生命周期问题,可以安全地在异步场景中使用。 diff --git a/src/brpc/builtin/rpcz_service.cpp b/src/brpc/builtin/rpcz_service.cpp index d9121eb555..e5111ac3d3 100644 --- a/src/brpc/builtin/rpcz_service.cpp +++ b/src/brpc/builtin/rpcz_service.cpp @@ -185,16 +185,43 @@ static void PrintElapse(std::ostream& os, int64_t cur_time, static void PrintAnnotations( std::ostream& os, int64_t cur_time, int64_t* last_time, - SpanInfoExtractor** extractors, int num_extr) { + SpanInfoExtractor** extractors, int num_extr, const RpczSpan* span) { int64_t anno_time; std::string a; + const char* span_type_str = "Span"; + if (span) { + switch (span->type()) { + case SPAN_TYPE_SERVER: + span_type_str = "ServerSpan"; + break; + case SPAN_TYPE_CLIENT: + span_type_str = "ClientSpan"; + break; + case SPAN_TYPE_BTHREAD: + span_type_str = "BthreadSpan"; + break; + } + } + // TODO: Going through all extractors is not strictly correct because // later extractors may have earlier annotations. for (int i = 0; i < num_extr; ++i) { while (extractors[i]->PopAnnotation(cur_time, &anno_time, &a)) { PrintRealTime(os, anno_time); PrintElapse(os, anno_time, last_time); - os << ' ' << WebEscape(a); + os << ' '; + if (span) { + const char* short_type = "SPAN"; + if (span->type() == SPAN_TYPE_SERVER) { + short_type = "Server"; + } else if (span->type() == SPAN_TYPE_CLIENT) { + short_type = "Client"; + } else if (span->type() == SPAN_TYPE_BTHREAD) { + short_type = "Bthread"; + } + os << '[' << short_type << " SPAN#" << Hex(span->span_id()) << "] "; + } + os << WebEscape(a); if (a.empty() || butil::back_char(a) != '\n') { os << '\n'; } @@ -204,12 +231,12 @@ static void PrintAnnotations( static bool PrintAnnotationsAndRealTimeSpan( std::ostream& os, int64_t cur_time, int64_t* last_time, - SpanInfoExtractor** extr, int num_extr) { + SpanInfoExtractor** extr, int num_extr, const RpczSpan* span) { if (cur_time == 0) { // the field was not set. return false; } - PrintAnnotations(os, cur_time, last_time, extr, num_extr); + PrintAnnotations(os, cur_time, last_time, extr, num_extr, span); PrintRealTime(os, cur_time); PrintElapse(os, cur_time, last_time); return true; @@ -239,9 +266,10 @@ static void PrintClientSpan( extr[num_extr++] = server_extr; } extr[num_extr++] = &client_extr; - // start_send_us is always set for client spans. - CHECK(PrintAnnotationsAndRealTimeSpan(os, span.start_send_real_us(), - last_time, extr, num_extr)); + if (!PrintAnnotationsAndRealTimeSpan(os, span.start_send_real_us(), + last_time, extr, num_extr, &span)) { + os << " start_send_real_us:not-set"; + } const Protocol* protocol = FindProtocol(span.protocol()); const char* protocol_name = (protocol ? protocol->name : "Unknown"); const butil::EndPoint remote_side(butil::int2ip(span.remote_ip()), span.remote_port()); @@ -271,12 +299,12 @@ static void PrintClientSpan( os << std::endl; if (PrintAnnotationsAndRealTimeSpan(os, span.sent_real_us(), - last_time, extr, num_extr)) { - os << " Requested(" << span.request_size() << ") [1]" << std::endl; + last_time, extr, num_extr, &span)) { + os << " [Client SPAN#" << Hex(span.span_id()) << "] Requested(" << span.request_size() << ") [1]" << std::endl; } if (PrintAnnotationsAndRealTimeSpan(os, span.received_real_us(), - last_time, extr, num_extr)) { - os << " Received response(" << span.response_size() << ")"; + last_time, extr, num_extr, &span)) { + os << " [Client SPAN#" << Hex(span.span_id()) << "] Received response(" << span.response_size() << ")"; if (span.base_cid() != 0 && span.ending_cid() != 0) { int64_t ver = span.ending_cid() - span.base_cid(); if (ver >= 1) { @@ -289,18 +317,18 @@ static void PrintClientSpan( } if (PrintAnnotationsAndRealTimeSpan(os, span.start_parse_real_us(), - last_time, extr, num_extr)) { - os << " Processing the response in a new bthread" << std::endl; + last_time, extr, num_extr, &span)) { + os << " [Client SPAN#" << Hex(span.span_id()) << "] Processing the response in a new bthread" << std::endl; } if (PrintAnnotationsAndRealTimeSpan( os, span.start_callback_real_us(), - last_time, extr, num_extr)) { - os << (span.async() ? " Enter user's done" : " Back to user's callsite") << std::endl; + last_time, extr, num_extr, &span)) { + os << " [Client SPAN#" << Hex(span.span_id()) << "] " << (span.async() ? " Enter user's done" : " Back to user's callsite") << std::endl; } PrintAnnotations(os, std::numeric_limits::max(), - last_time, extr, num_extr); + last_time, extr, num_extr, &span); } static void PrintClientSpan(std::ostream& os,const RpczSpan& span, @@ -318,7 +346,15 @@ static void PrintBthreadSpan(std::ostream& os, const RpczSpan& span, int64_t* la extr[num_extr++] = server_extr; } extr[num_extr++] = &client_extr; - PrintAnnotations(os, std::numeric_limits::max(), last_time, extr, num_extr); + + // Print span id for bthread span context identification + os << " [Bthread SPAN#" << Hex(span.span_id()); + if (span.parent_span_id() != 0) { + os << " parent#" << Hex(span.parent_span_id()); + } + os << "] "; + + PrintAnnotations(os, std::numeric_limits::max(), last_time, extr, num_extr, &span); } static void PrintServerSpan(std::ostream& os, const RpczSpan& span, @@ -348,16 +384,16 @@ static void PrintServerSpan(std::ostream& os, const RpczSpan& span, os << std::endl; if (PrintAnnotationsAndRealTimeSpan( os, span.start_parse_real_us(), - &last_time, extr, ARRAY_SIZE(extr))) { - os << " Processing the request in a new bthread" << std::endl; + &last_time, extr, ARRAY_SIZE(extr), &span)) { + os << " [Server SPAN#" << Hex(span.span_id()) << "] Processing the request in a new bthread" << std::endl; } bool entered_user_method = false; if (PrintAnnotationsAndRealTimeSpan( os, span.start_callback_real_us(), - &last_time, extr, ARRAY_SIZE(extr))) { + &last_time, extr, ARRAY_SIZE(extr), &span)) { entered_user_method = true; - os << " Enter " << WebEscape(span.full_method_name()) << std::endl; + os << " [Server SPAN#" << Hex(span.span_id()) << "] Enter " << WebEscape(span.full_method_name()) << std::endl; } const int nclient = span.client_spans_size(); @@ -372,22 +408,22 @@ static void PrintServerSpan(std::ostream& os, const RpczSpan& span, if (PrintAnnotationsAndRealTimeSpan( os, span.start_send_real_us(), - &last_time, extr, ARRAY_SIZE(extr))) { + &last_time, extr, ARRAY_SIZE(extr), &span)) { if (entered_user_method) { - os << " Leave " << WebEscape(span.full_method_name()) << std::endl; + os << " [Server SPAN#" << Hex(span.span_id()) << "] Leave " << WebEscape(span.full_method_name()) << std::endl; } else { - os << " Responding" << std::endl; + os << " [Server SPAN#" << Hex(span.span_id()) << "] Responding" << std::endl; } } if (PrintAnnotationsAndRealTimeSpan( os, span.sent_real_us(), - &last_time, extr, ARRAY_SIZE(extr))) { - os << " Responded(" << span.response_size() << ')' << std::endl; + &last_time, extr, ARRAY_SIZE(extr), &span)) { + os << " [Server SPAN#" << Hex(span.span_id()) << "] Responded(" << span.response_size() << ')' << std::endl; } PrintAnnotations(os, std::numeric_limits::max(), - &last_time, extr, ARRAY_SIZE(extr)); + &last_time, extr, ARRAY_SIZE(extr), &span); } class RpczSpanFilter : public SpanFilter { diff --git a/src/brpc/channel.cpp b/src/brpc/channel.cpp index dde4ca0f8c..a8caeaf953 100644 --- a/src/brpc/channel.cpp +++ b/src/brpc/channel.cpp @@ -38,6 +38,7 @@ #include "brpc/rdma/rdma_helper.h" #include "brpc/policy/esp_authenticator.h" #include "brpc/transport_factory.h" +#include "brpc/details/controller_private_accessor.h" namespace brpc { @@ -502,7 +503,7 @@ void Channel::CallMethod(const google::protobuf::MethodDescriptor* method, } cntl->set_used_by_rpc(); - if (cntl->_sender == NULL && IsTraceable(Span::tls_parent())) { + if (cntl->_sender == NULL && IsTraceable(Span::tls_parent().get())) { const int64_t start_send_us = butil::cpuwide_time_us(); std::string method_name; if (_get_method_name) { @@ -513,13 +514,16 @@ void Channel::CallMethod(const google::protobuf::MethodDescriptor* method, const static std::string NULL_METHOD_STR = "null-method"; method_name = NULL_METHOD_STR; } - Span* span = Span::CreateClientSpan( + std::shared_ptr span = Span::CreateClientSpan( method_name, start_send_real_us - start_send_us); - span->set_log_id(cntl->log_id()); - span->set_base_cid(correlation_id); - span->set_protocol(_options.protocol); - span->set_start_send_us(start_send_us); - cntl->_span = span; + if (span) { + ControllerPrivateAccessor accessor(cntl); + span->set_log_id(cntl->log_id()); + span->set_base_cid(correlation_id); + span->set_protocol(_options.protocol); + span->set_start_send_us(start_send_us); + accessor.set_span(span); + } } // Override some options if they haven't been set by Controller if (cntl->timeout_ms() == UNSET_MAGIC_NUM) { @@ -620,9 +624,7 @@ void Channel::CallMethod(const google::protobuf::MethodDescriptor* method, // be woken up by callback when RPC finishes (succeeds or still // fails after retry) Join(correlation_id); - if (cntl->_span) { - cntl->SubmitSpan(); - } + cntl->SubmitSpan(); cntl->OnRPCEnd(butil::gettimeofday_us()); } } diff --git a/src/brpc/controller.cpp b/src/brpc/controller.cpp index 133d1f0453..15c8c91887 100644 --- a/src/brpc/controller.cpp +++ b/src/brpc/controller.cpp @@ -183,8 +183,8 @@ static void CreateIgnoreAllRead() { s_ignore_all_read = new IgnoreAllRead; } // you don't have to set the fields to initial state after deletion since // they'll be set uniformly after this method is called. void Controller::ResetNonPods() { - if (_span) { - Span::Submit(_span, butil::cpuwide_time_us()); + if (auto span = _span.lock()) { + Span::Submit(span, butil::cpuwide_time_us()); } _error_text.clear(); _remote_side = butil::EndPoint(); @@ -240,7 +240,7 @@ void Controller::ResetNonPods() { void Controller::ResetPods() { // NOTE: Make the sequence of assignments same with the order that they're // defined in header. Better for cpu cache and faster for lookup. - _span = NULL; + _span.reset(); _flags = 0; #ifndef BAIDU_INTERNAL set_pb_bytes_to_base64(true); @@ -458,9 +458,9 @@ void Controller::SetFailed(const std::string& reason) { AppendServerIdentiy(); } _error_text.append(reason); - if (_span) { - _span->set_error_code(_error_code); - _span->Annotate(reason); + if (auto span = _span.lock()) { + span->set_error_code(_error_code); + span->Annotate(reason); } UpdateResponseHeader(this); } @@ -487,9 +487,9 @@ void Controller::SetFailed(int error_code, const char* reason_fmt, ...) { va_start(ap, reason_fmt); butil::string_vappendf(&_error_text, reason_fmt, ap); va_end(ap); - if (_span) { - _span->set_error_code(_error_code); - _span->AnnotateCStr(_error_text.c_str() + old_size, 0); + if (auto span = _span.lock()) { + span->set_error_code(_error_code); + span->AnnotateCStr(_error_text.c_str() + old_size, 0); } UpdateResponseHeader(this); } @@ -515,9 +515,9 @@ void Controller::CloseConnection(const char* reason_fmt, ...) { va_start(ap, reason_fmt); butil::string_vappendf(&_error_text, reason_fmt, ap); va_end(ap); - if (_span) { - _span->set_error_code(_error_code); - _span->AnnotateCStr(_error_text.c_str() + old_size, 0); + if (auto span = _span.lock()) { + span->set_error_code(_error_code); + span->AnnotateCStr(_error_text.c_str() + old_size, 0); } UpdateResponseHeader(this); } @@ -952,9 +952,9 @@ void Controller::EndRPC(const CompletionInfo& info) { } // RPC finished, now it's safe to release `LoadBalancerWithNaming' _lb.reset(); - if (_span) { - _span->set_ending_cid(info.id); - _span->set_async(_done); + if (auto span = _span.lock()) { + span->set_ending_cid(info.id); + span->set_async(_done); // Submit the span if we're in async RPC. For sync RPC, the span // is submitted after Join() to get a more accurate resuming timestamp. if (_done) { @@ -1028,12 +1028,16 @@ void Controller::DoneInBackupThread() { void Controller::SubmitSpan() { const int64_t now = butil::cpuwide_time_us(); - _span->set_start_callback_us(now); - if (_span->local_parent()) { - _span->local_parent()->AsParent(); + if (auto span = _span.lock()) { + span->set_start_callback_us(now); + if (auto parent_span = span->local_parent().lock()) { + if (parent_span->is_active()) { + parent_span->AsParent(); + } + } + Span::Submit(span, now); + _span.reset(); } - Span::Submit(_span, now); - _span = NULL; } void Controller::HandleSendFailed() { @@ -1131,8 +1135,7 @@ void Controller::IssueRPC(int64_t start_realtime_us) { CHECK_EQ(_remote_side, tmp_sock->remote_side()); } - Span* span = _span; - if (span) { + if (auto span = _span.lock()) { if (_current_call.nretry == 0) { span->set_remote_side(_remote_side); } else { @@ -1244,7 +1247,7 @@ void Controller::IssueRPC(int64_t start_realtime_us) { int rc; size_t packet_size = 0; if (user_packet_guard) { - if (span) { + if (auto span = _span.lock()) { packet_size = user_packet_guard->EstimatedByteSize(); } rc = _current_call.sending_sock->Write(user_packet_guard, &wopt); @@ -1252,7 +1255,7 @@ void Controller::IssueRPC(int64_t start_realtime_us) { packet_size = packet.size(); rc = _current_call.sending_sock->Write(&packet, &wopt); } - if (span) { + if (auto span = _span.lock()) { if (_current_call.nretry == 0) { span->set_sent_us(butil::cpuwide_time_us()); span->set_request_size(packet_size); @@ -1396,8 +1399,19 @@ const Controller* Controller::sub(int index) const { return NULL; } -uint64_t Controller::trace_id() const { return _span ? _span->trace_id() : 0; } -uint64_t Controller::span_id() const { return _span ? _span->span_id() : 0; } +uint64_t Controller::trace_id() const { + if (auto span = _span.lock()) { + return span->trace_id(); + } + return 0; +} + +uint64_t Controller::span_id() const { + if (auto span = _span.lock()) { + return span->span_id(); + } + return 0; +} void* Controller::session_local_data() { if (_session_local_data) { @@ -1724,4 +1738,24 @@ void Controller::DoPrintLogPrefix(std::ostream& os) const { } } + +ControllerPrivateAccessor& ControllerPrivateAccessor::set_span( + const std::shared_ptr& span) { + _cntl->_span = span; + return *this; +} + +ControllerPrivateAccessor& ControllerPrivateAccessor::set_span(Span* span) { + if (span) { + _cntl->_span = span->shared_from_this(); + } else { + _cntl->_span.reset(); + } + return *this; +} + +std::shared_ptr ControllerPrivateAccessor::span() const { + return _cntl->_span.lock(); +} + } // namespace brpc diff --git a/src/brpc/controller.h b/src/brpc/controller.h index 69d859ea8f..45f71b72f6 100644 --- a/src/brpc/controller.h +++ b/src/brpc/controller.h @@ -25,6 +25,7 @@ #include // std::function #include // Users often need gflags #include +#include #include "butil/intrusive_ptr.hpp" // butil::intrusive_ptr #include "bthread/errno.h" // Redefine errno #include "butil/endpoint.h" // butil::EndPoint @@ -803,7 +804,7 @@ friend void policy::ProcessThriftRequest(InputMessageBase*); private: // NOTE: align and group fields to make Controller as compact as possible. - Span* _span; + std::weak_ptr _span; uint32_t _flags; // all boolean fields inside Controller int32_t _error_code; std::string _error_text; diff --git a/src/brpc/details/controller_private_accessor.h b/src/brpc/details/controller_private_accessor.h index 1a9d7062af..0ad1aba640 100644 --- a/src/brpc/details/controller_private_accessor.h +++ b/src/brpc/details/controller_private_accessor.h @@ -30,9 +30,10 @@ class Message; } } - namespace brpc { +class Span; + class AuthContext; // A wrapper to access some private methods/fields of `Controller' @@ -90,17 +91,16 @@ class ControllerPrivateAccessor { return *this; } - ControllerPrivateAccessor &set_span(Span* span) { - _cntl->_span = span; - return *this; - } + // Overloaded set_span methods to support both shared_ptr and raw pointer + ControllerPrivateAccessor &set_span(const std::shared_ptr& span); + ControllerPrivateAccessor &set_span(Span* span); ControllerPrivateAccessor &set_request_protocol(ProtocolType protocol) { _cntl->_request_protocol = protocol; return *this; } - Span* span() const { return _cntl->_span; } + std::shared_ptr span() const; uint32_t pipelined_count() const { return _cntl->_pipelined_count; } void set_pipelined_count(uint32_t count) { _cntl->_pipelined_count = count; } diff --git a/src/brpc/global.cpp b/src/brpc/global.cpp index c561d927d7..1f67aee20b 100644 --- a/src/brpc/global.cpp +++ b/src/brpc/global.cpp @@ -54,6 +54,7 @@ // Span #include "brpc/span.h" #include "bthread/unstable.h" +#include "bthread/bthread.h" // Compress handlers #include "brpc/compress.h" @@ -343,8 +344,11 @@ static void GlobalInitializeOrDieImpl() { SetLogHandler(&BaiduStreamingLogHandler); #endif - // Set bthread create span function - bthread_set_create_span_func(CreateBthreadSpan); + if (bthread_set_span_funcs(CreateBthreadSpanAsVoid, + DestroyRpczParentSpan, + EndBthreadSpan) != 0) { + LOG(FATAL) << "Failed to register span callbacks to bthread"; + } // Setting the variable here does not work, the profiler probably check // the variable before main() for only once. diff --git a/src/brpc/policy/baidu_rpc_protocol.cpp b/src/brpc/policy/baidu_rpc_protocol.cpp index 0dba01624a..2c5a7e7224 100644 --- a/src/brpc/policy/baidu_rpc_protocol.cpp +++ b/src/brpc/policy/baidu_rpc_protocol.cpp @@ -272,9 +272,9 @@ struct BaiduProxyPBMessages : public RpcPBMessages { // Used by UT, can't be static. void SendRpcResponse(int64_t correlation_id, Controller* cntl, RpcPBMessages* messages, const Server* server, - MethodStatus* method_status, int64_t received_us) { + MethodStatus* method_status, int64_t received_us, + std::shared_ptr span) { ControllerPrivateAccessor accessor(cntl); - Span* span = accessor.span(); if (span) { span->set_start_send_us(butil::cpuwide_time_us()); } @@ -645,7 +645,7 @@ void ProcessRpcRequest(InputMessageBase* msg_base) { bthread_assign_data((void*)&server->thread_local_options()); } - Span* span = NULL; + std::shared_ptr span; if (IsTraceable(request_meta.has_trace_id())) { span = Span::CreateServerSpan( request_meta.trace_id(), request_meta.span_id(), @@ -827,9 +827,9 @@ void ProcessRpcRequest(InputMessageBase* msg_base) { // `socket' will be held until response has been sent google::protobuf::Closure* done = ::brpc::NewCallback< int64_t, Controller*, RpcPBMessages*, - const Server*, MethodStatus*, int64_t>( + const Server*, MethodStatus*, int64_t, std::shared_ptr>( &SendRpcResponse, meta.correlation_id(),cntl.get(), - messages, server, method_status, msg->received_us()); + messages, server, method_status, msg->received_us(), span); // optional, just release resource ASAP msg.reset(); @@ -858,10 +858,11 @@ void ProcessRpcRequest(InputMessageBase* msg_base) { // `cntl', `req' and `res' will be deleted inside `SendRpcResponse' // `socket' will be held until response has been sent + SendRpcResponse(meta.correlation_id(), cntl.release(), messages, server, method_status, - msg->received_us()); + msg->received_us(), span); } bool VerifyRpcRequest(const InputMessageBase* msg_base) { @@ -948,8 +949,7 @@ void ProcessRpcResponse(InputMessageBase* msg_base) { } cntl->set_rpc_received_us(msg->received_us()); - Span* span = accessor.span(); - if (span) { + if (auto span = accessor.span()) { span->set_base_real_us(msg->base_real_us()); span->set_received_us(msg->received_us()); span->set_response_size(msg->meta.size() + msg->payload.size() + 12); @@ -1119,8 +1119,7 @@ void PackRpcRequest(butil::IOBuf* req_buf, } meta.set_content_type(cntl->request_content_type()); - Span* span = accessor.span(); - if (span) { + if (auto span = accessor.span()) { request_meta->set_trace_id(span->trace_id()); request_meta->set_span_id(span->span_id()); request_meta->set_parent_span_id(span->parent_span_id()); diff --git a/src/brpc/policy/couchbase_protocol.cpp b/src/brpc/policy/couchbase_protocol.cpp index a014581ed5..0ece53dbfb 100644 --- a/src/brpc/policy/couchbase_protocol.cpp +++ b/src/brpc/policy/couchbase_protocol.cpp @@ -160,8 +160,7 @@ void ProcessCouchbaseResponse(InputMessageBase* msg_base) { } ControllerPrivateAccessor accessor(cntl); - Span* span = accessor.span(); - if (span) { + if (auto span = accessor.span()) { span->set_base_real_us(msg->base_real_us()); span->set_received_us(msg->received_us()); span->set_response_size(msg->meta.length()); diff --git a/src/brpc/policy/esp_protocol.cpp b/src/brpc/policy/esp_protocol.cpp index 5925796b88..ee8464b85e 100644 --- a/src/brpc/policy/esp_protocol.cpp +++ b/src/brpc/policy/esp_protocol.cpp @@ -101,8 +101,7 @@ void PackEspRequest(butil::IOBuf* packet_buf, } accessor.get_sending_socket()->set_correlation_id(correlation_id); - Span* span = accessor.span(); - if (span) { + if (auto span = accessor.span()) { span->set_request_size(request.length()); } @@ -131,8 +130,7 @@ void ProcessEspResponse(InputMessageBase* msg_base) { } ControllerPrivateAccessor accessor(cntl); - Span* span = accessor.span(); - if (span) { + if (auto span = accessor.span()) { span->set_base_real_us(msg->base_real_us()); span->set_received_us(msg->received_us()); span->set_response_size(msg->payload.length()); diff --git a/src/brpc/policy/http_rpc_protocol.cpp b/src/brpc/policy/http_rpc_protocol.cpp index d0150a63fd..b03a961b52 100644 --- a/src/brpc/policy/http_rpc_protocol.cpp +++ b/src/brpc/policy/http_rpc_protocol.cpp @@ -373,8 +373,7 @@ void ProcessHttpResponse(InputMessageBase* msg) { ControllerPrivateAccessor accessor(cntl); - Span* span = accessor.span(); - if (span) { + if (auto span = accessor.span()) { span->set_base_real_us(msg->base_real_us()); span->set_received_us(msg->received_us()); // TODO: changing when imsg_guard->read_body_progressively() is true @@ -721,8 +720,7 @@ void SerializeHttpRequest(butil::IOBuf* /*not used*/, hreq.uri().set_path(path); } - Span* span = accessor.span(); - if (span) { + if (auto span = accessor.span()) { hreq.SetHeader("x-bd-trace-id", butil::string_printf( "%llu", (unsigned long long)span->trace_id())); hreq.SetHeader("x-bd-span-id", butil::string_printf( @@ -838,7 +836,7 @@ HttpResponseSender::~HttpResponseSender() { return; } ControllerPrivateAccessor accessor(cntl); - Span* span = accessor.span(); + auto span = accessor.span(); if (span) { span->set_start_send_us(butil::cpuwide_time_us()); } @@ -1493,7 +1491,7 @@ void ProcessHttpRequest(InputMessageBase *msg) { bthread_assign_data((void*)&server->thread_local_options()); } - Span* span = NULL; + std::shared_ptr span; const std::string& path = req_header.uri().path(); const std::string* trace_id_str = req_header.GetHeader("x-bd-trace-id"); if (IsTraceable(trace_id_str)) { diff --git a/src/brpc/policy/hulu_pbrpc_protocol.cpp b/src/brpc/policy/hulu_pbrpc_protocol.cpp index bd0c496027..f69804851f 100644 --- a/src/brpc/policy/hulu_pbrpc_protocol.cpp +++ b/src/brpc/policy/hulu_pbrpc_protocol.cpp @@ -230,7 +230,7 @@ static void SendHuluResponse(int64_t correlation_id, MethodStatus* method_status, int64_t received_us) { ControllerPrivateAccessor accessor(cntl); - Span* span = accessor.span(); + auto span = accessor.span(); if (span) { span->set_start_send_us(butil::cpuwide_time_us()); } @@ -414,7 +414,7 @@ void ProcessHuluRequest(InputMessageBase* msg_base) { bthread_assign_data((void*)&server->thread_local_options()); } - Span* span = NULL; + std::shared_ptr span; if (IsTraceable(meta.has_trace_id())) { span = Span::CreateServerSpan( meta.trace_id(), meta.span_id(), meta.parent_span_id(), @@ -612,8 +612,7 @@ void ProcessHuluResponse(InputMessageBase* msg_base) { } ControllerPrivateAccessor accessor(cntl); - Span* span = accessor.span(); - if (span) { + if (auto span = accessor.span()) { span->set_base_real_us(msg->base_real_us()); span->set_received_us(msg->received_us()); span->set_response_size(msg->meta.size() + msg->payload.size() + 12); @@ -715,7 +714,7 @@ void PackHuluRequest(butil::IOBuf* req_buf, } // else don't set user_mesage_size when there's no attachment, otherwise // existing hulu-pbrpc server may complain about empty attachment. - Span* span = ControllerPrivateAccessor(cntl).span(); + auto span = ControllerPrivateAccessor(cntl).span(); if (span) { meta.set_trace_id(span->trace_id()); meta.set_span_id(span->span_id()); diff --git a/src/brpc/policy/memcache_binary_protocol.cpp b/src/brpc/policy/memcache_binary_protocol.cpp index d4c39dfd33..46432c4f7e 100644 --- a/src/brpc/policy/memcache_binary_protocol.cpp +++ b/src/brpc/policy/memcache_binary_protocol.cpp @@ -164,8 +164,7 @@ void ProcessMemcacheResponse(InputMessageBase* msg_base) { } ControllerPrivateAccessor accessor(cntl); - Span* span = accessor.span(); - if (span) { + if (auto span = accessor.span()) { span->set_base_real_us(msg->base_real_us()); span->set_received_us(msg->received_us()); span->set_response_size(msg->meta.length()); diff --git a/src/brpc/policy/nova_pbrpc_protocol.cpp b/src/brpc/policy/nova_pbrpc_protocol.cpp index 249e35c7a1..a1d88f2562 100644 --- a/src/brpc/policy/nova_pbrpc_protocol.cpp +++ b/src/brpc/policy/nova_pbrpc_protocol.cpp @@ -121,8 +121,7 @@ void ProcessNovaResponse(InputMessageBase* msg_base) { } ControllerPrivateAccessor accessor(cntl); - Span* span = accessor.span(); - if (span) { + if (auto span = accessor.span()) { span->set_base_real_us(msg->base_real_us()); span->set_received_us(msg->received_us()); span->set_response_size(msg->meta.size() + msg->payload.size()); diff --git a/src/brpc/policy/nshead_mcpack_protocol.cpp b/src/brpc/policy/nshead_mcpack_protocol.cpp index 052fd0f3b7..8ba49f936e 100644 --- a/src/brpc/policy/nshead_mcpack_protocol.cpp +++ b/src/brpc/policy/nshead_mcpack_protocol.cpp @@ -112,8 +112,7 @@ void ProcessNsheadMcpackResponse(InputMessageBase* msg_base) { } ControllerPrivateAccessor accessor(cntl); - Span* span = accessor.span(); - if (span) { + if (auto span = accessor.span()) { span->set_base_real_us(msg->base_real_us()); span->set_received_us(msg->received_us()); span->set_response_size(msg->meta.size() + msg->payload.size()); diff --git a/src/brpc/policy/nshead_protocol.cpp b/src/brpc/policy/nshead_protocol.cpp index a26dc96857..82f696e3c2 100644 --- a/src/brpc/policy/nshead_protocol.cpp +++ b/src/brpc/policy/nshead_protocol.cpp @@ -69,7 +69,7 @@ void NsheadClosure::Run() { std::unique_ptr recycle_ctx(this); ControllerPrivateAccessor accessor(&_controller); - Span* span = accessor.span(); + auto span = accessor.span(); if (span) { span->set_start_send_us(butil::cpuwide_time_us()); } @@ -144,8 +144,7 @@ void NsheadClosure::Run() { void NsheadClosure::SetMethodName(const std::string& full_method_name) { ControllerPrivateAccessor accessor(&_controller); - Span* span = accessor.span(); - if (span) { + if (auto span = accessor.span()) { span->ResetServerSpanName(full_method_name); } } @@ -298,7 +297,7 @@ void ProcessNsheadRequest(InputMessageBase* msg_base) { bthread_assign_data((void*)&server->thread_local_options()); } - Span* span = NULL; + std::shared_ptr span; if (IsTraceable(false)) { span = Span::CreateServerSpan(0, 0, 0, msg->base_real_us()); accessor.set_span(span); @@ -369,8 +368,7 @@ void ProcessNsheadResponse(InputMessageBase* msg_base) { } ControllerPrivateAccessor accessor(cntl); - Span* span = accessor.span(); - if (span) { + if (auto span = accessor.span()) { span->set_base_real_us(msg->base_real_us()); span->set_received_us(msg->received_us()); span->set_response_size(msg->payload.length()); @@ -439,8 +437,7 @@ void PackNsheadRequest( // pack the field. accessor.get_sending_socket()->set_correlation_id(correlation_id); - Span* span = accessor.span(); - if (span) { + if (auto span = accessor.span()) { span->set_request_size(request.length()); // TODO: Nowhere to set tracing ids. // request_meta->set_trace_id(span->trace_id()); diff --git a/src/brpc/policy/public_pbrpc_protocol.cpp b/src/brpc/policy/public_pbrpc_protocol.cpp index 38a749dc72..a4298a15da 100644 --- a/src/brpc/policy/public_pbrpc_protocol.cpp +++ b/src/brpc/policy/public_pbrpc_protocol.cpp @@ -174,8 +174,7 @@ void ProcessPublicPbrpcResponse(InputMessageBase* msg_base) { } ControllerPrivateAccessor accessor(cntl); - Span* span = accessor.span(); - if (span) { + if (auto span = accessor.span()) { span->set_base_real_us(msg->base_real_us()); span->set_received_us(msg->received_us()); span->set_response_size(msg->meta.size() + msg->payload.size()); @@ -269,7 +268,7 @@ void PackPublicPbrpcRequest(butil::IOBuf* buf, nshead.body_len = GetProtobufByteSize(pbreq); buf->append(&nshead, sizeof(nshead)); - Span* span = ControllerPrivateAccessor(controller).span(); + auto span = ControllerPrivateAccessor(controller).span(); if (span) { // TODO: Nowhere to set tracing ids. // request_meta->set_trace_id(span->trace_id()); diff --git a/src/brpc/policy/redis_protocol.cpp b/src/brpc/policy/redis_protocol.cpp index 9e8e148ebf..7dc5b5b8f3 100644 --- a/src/brpc/policy/redis_protocol.cpp +++ b/src/brpc/policy/redis_protocol.cpp @@ -237,8 +237,7 @@ void ProcessRedisResponse(InputMessageBase* msg_base) { } ControllerPrivateAccessor accessor(cntl); - Span* span = accessor.span(); - if (span) { + if (auto span = accessor.span()) { span->set_base_real_us(msg->base_real_us()); span->set_received_us(msg->received_us()); span->set_response_size(msg->response.ByteSize()); diff --git a/src/brpc/policy/rtmp_protocol.cpp b/src/brpc/policy/rtmp_protocol.cpp index 8b251eb2de..d706468650 100644 --- a/src/brpc/policy/rtmp_protocol.cpp +++ b/src/brpc/policy/rtmp_protocol.cpp @@ -3540,8 +3540,7 @@ void OnServerStreamCreated::Run(bool error, break; } } while (0); - Span* span = accessor.span(); - if (span) { + if (auto span = accessor.span()) { span->set_base_real_us(base_realtime); span->set_received_us(received_us); span->set_response_size(istream->popped_bytes()); diff --git a/src/brpc/policy/sofa_pbrpc_protocol.cpp b/src/brpc/policy/sofa_pbrpc_protocol.cpp index 2fb33ed578..01b21851d5 100644 --- a/src/brpc/policy/sofa_pbrpc_protocol.cpp +++ b/src/brpc/policy/sofa_pbrpc_protocol.cpp @@ -215,7 +215,7 @@ static void SendSofaResponse(int64_t correlation_id, MethodStatus* method_status, int64_t received_us) { ControllerPrivateAccessor accessor(cntl); - Span* span = accessor.span(); + auto span = accessor.span(); if (span) { span->set_start_send_us(butil::cpuwide_time_us()); } @@ -374,7 +374,7 @@ void ProcessSofaRequest(InputMessageBase* msg_base) { bthread_assign_data((void*)&server->thread_local_options()); } - Span* span = NULL; + std::shared_ptr span; if (IsTraceable(false)) { span = Span::CreateServerSpan( 0/*meta.trace_id()*/, 0/*meta.span_id()*/, @@ -517,8 +517,7 @@ void ProcessSofaResponse(InputMessageBase* msg_base) { } ControllerPrivateAccessor accessor(cntl); - Span* span = accessor.span(); - if (span) { + if (auto span = accessor.span()) { span->set_base_real_us(msg->base_real_us()); span->set_received_us(msg->received_us()); span->set_response_size(msg->meta.size() + msg->payload.size() + 24); diff --git a/src/brpc/policy/thrift_protocol.cpp b/src/brpc/policy/thrift_protocol.cpp index 1e25066d9f..2b5739ea3e 100755 --- a/src/brpc/policy/thrift_protocol.cpp +++ b/src/brpc/policy/thrift_protocol.cpp @@ -243,7 +243,7 @@ void ThriftClosure::DoRun() { const Server* server = _controller.server(); ControllerPrivateAccessor accessor(&_controller); - Span* span = accessor.span(); + auto span = accessor.span(); if (span) { span->set_start_send_us(butil::cpuwide_time_us()); } @@ -515,7 +515,7 @@ void ProcessThriftRequest(InputMessageBase* msg_base) { bthread_assign_data((void*)&server->thread_local_options()); } - Span* span = NULL; + std::shared_ptr span; if (IsTraceable(false)) { span = Span::CreateServerSpan(0, 0, 0, msg->base_real_us()); accessor.set_span(span); @@ -584,8 +584,7 @@ void ProcessThriftResponse(InputMessageBase* msg_base) { } ControllerPrivateAccessor accessor(cntl); - Span* span = accessor.span(); - if (span) { + if (auto span = accessor.span()) { span->set_base_real_us(msg->base_real_us()); span->set_received_us(msg->received_us()); span->set_response_size(msg->payload.length()); @@ -752,8 +751,7 @@ void PackThriftRequest( // pack the field. accessor.get_sending_socket()->set_correlation_id(correlation_id); - Span* span = accessor.span(); - if (span) { + if (auto span = accessor.span()) { span->set_request_size(request.length()); // TODO: Nowhere to set tracing ids. // request_meta->set_trace_id(span->trace_id()); diff --git a/src/brpc/policy/ubrpc2pb_protocol.cpp b/src/brpc/policy/ubrpc2pb_protocol.cpp index fe2c4619cb..2f5194c880 100644 --- a/src/brpc/policy/ubrpc2pb_protocol.cpp +++ b/src/brpc/policy/ubrpc2pb_protocol.cpp @@ -455,8 +455,7 @@ void ProcessUbrpcResponse(InputMessageBase* msg_base) { } ControllerPrivateAccessor accessor(cntl); - Span* span = accessor.span(); - if (span) { + if (auto span = accessor.span()) { span->set_base_real_us(msg->base_real_us()); span->set_received_us(msg->received_us()); span->set_response_size(msg->meta.size() + msg->payload.size()); diff --git a/src/brpc/span.cpp b/src/brpc/span.cpp index 8e9af46cc6..3a53f33a39 100644 --- a/src/brpc/span.cpp +++ b/src/brpc/span.cpp @@ -37,9 +37,92 @@ #define BRPC_SPAN_INFO_SEP "\1" - namespace brpc { +// Callback for creating a new bthread span when creating a new bthread. +// This is called by bthread layer when BTHREAD_INHERIT_SPAN flag is set. +// Returns a heap-allocated weak_ptr* as void*, or NULL if span creation fails. +void* CreateBthreadSpanAsVoid() { + const int64_t received_us = butil::cpuwide_time_us(); + const int64_t base_realtime = butil::gettimeofday_us() - received_us; + std::shared_ptr span = Span::CreateBthreadSpan("Bthread", base_realtime); + + if (!span) { + return NULL; + } + return new std::weak_ptr(span); +} + +void DestroyRpczParentSpan(void* ptr) { + if (ptr) { + delete static_cast*>(ptr); + } +} + +void EndBthreadSpan() { + std::shared_ptr span = GetTlsParentSpan(); + if (span) { + span->set_ending_tid(bthread_self()); + } + + ClearTlsParentSpan(); +} + +void SetTlsParentSpan(std::shared_ptr span) { + using namespace bthread; + LocalStorage ls = BAIDU_GET_VOLATILE_THREAD_LOCAL(tls_bls); + if (ls.rpcz_parent_span) { + *static_cast*>(ls.rpcz_parent_span) = span; + } else { + ls.rpcz_parent_span = new std::weak_ptr(span); + BAIDU_SET_VOLATILE_THREAD_LOCAL(tls_bls, ls); + } +} + +std::shared_ptr GetTlsParentSpan() { + using namespace bthread; + LocalStorage ls = BAIDU_GET_VOLATILE_THREAD_LOCAL(tls_bls); + if (!ls.rpcz_parent_span) { + return nullptr; + } + + auto* weak_ptr = static_cast*>(ls.rpcz_parent_span); + return weak_ptr->lock(); +} + +void ClearTlsParentSpan() { + using namespace bthread; + LocalStorage ls = BAIDU_GET_VOLATILE_THREAD_LOCAL(tls_bls); + if (ls.rpcz_parent_span) { + static_cast*>(ls.rpcz_parent_span)->reset(); + } +} + +bool HasTlsParentSpan() { + using namespace bthread; + LocalStorage ls = BAIDU_GET_VOLATILE_THREAD_LOCAL(tls_bls); + if (!ls.rpcz_parent_span) { + return false; + } + + auto* weak_ptr = static_cast*>(ls.rpcz_parent_span); + return !weak_ptr->expired(); +} + + +void SpanDeleter::operator()(Span* r) const { + if (r == NULL) { + return; + } + + // All children will be destroyed automatically along with the list. + // The list holds std::shared_ptr<> which will trigger deletion of + // children. + r->_client_list.clear(); + r->_info.clear(); + butil::return_object(r); +} + const int64_t SPAN_DELETE_INTERVAL_US = 10000000L/*10s*/; DEFINE_string(rpcz_database_dir, "./rpc_data/rpcz", @@ -104,15 +187,28 @@ inline uint64_t GenerateTraceId() { return (g->current_random & 0xFFFFFFFFFFFF0000ULL) | g->seq++; } -Span* Span::CreateClientSpan(const std::string& full_method_name, - int64_t base_real_us) { - Span* span = butil::get_object(Forbidden()); - if (__builtin_expect(span == NULL, 0)) { - return NULL; +Span::Span(Forbidden) { + CHECK_EQ(0, pthread_spin_init(&_info_spinlock, 0)) + << "Failed to initialize _info_spinlock"; + CHECK_EQ(0, pthread_spin_init(&_client_list_spinlock, 0)) + << "Failed to initialize _client_list_spinlock"; +} + +Span::~Span() { + pthread_spin_destroy(&_client_list_spinlock); + pthread_spin_destroy(&_info_spinlock); +} + +std::shared_ptr Span::CreateClientSpan(const std::string& full_method_name, + int64_t base_real_us) { + Span* span_raw = butil::get_object(Forbidden()); + if (__builtin_expect(span_raw == NULL, 0)) { + return nullptr; } + std::shared_ptr span(span_raw, SpanDeleter()); span->_log_id = 0; span->_base_cid = INVALID_BTHREAD_ID; - span->_ending_cid = INVALID_BTHREAD_ID; + span->_ending_cid = INVALID_BTHREAD_ID; // Client Span uses ending_cid span->_type = SPAN_TYPE_CLIENT; span->_async = false; span->_protocol = PROTOCOL_UNKNOWN; @@ -125,40 +221,36 @@ Span* Span::CreateClientSpan(const std::string& full_method_name, span->_start_callback_real_us = 0; span->_start_send_real_us = 0; span->_sent_real_us = 0; - span->_next_client = NULL; - span->_client_list = NULL; - span->_tls_next = NULL; span->_full_method_name = full_method_name; span->_info.clear(); - Span* parent = static_cast(bthread::tls_bls.rpcz_parent_span); + std::shared_ptr parent = Span::tls_parent(); if (parent) { span->_trace_id = parent->trace_id(); span->_parent_span_id = parent->span_id(); span->_local_parent = parent; - span->_next_client = parent->_client_list; - parent->_client_list = span; + { + BAIDU_SCOPED_LOCK(parent->_client_list_spinlock); + parent->_client_list.push_back(span); + } } else { span->_trace_id = GenerateTraceId(); span->_parent_span_id = 0; - span->_local_parent = NULL; } span->_span_id = GenerateSpanId(); return span; } -Span* Span::CreateBthreadSpan(const std::string& full_method_name, - int64_t base_real_us) { - Span* parent = static_cast(bthread::tls_bls.rpcz_parent_span); - if (parent == NULL) { - return NULL; - } - Span* span = butil::get_object(Forbidden()); - if (__builtin_expect(span == NULL, 0)) { - return NULL; +std::shared_ptr Span::CreateBthreadSpan(const std::string& full_method_name, + int64_t base_real_us) { + std::shared_ptr parent = Span::tls_parent(); + Span* span_raw = butil::get_object(Forbidden()); + if (__builtin_expect(span_raw == NULL, 0)) { + return nullptr; } + std::shared_ptr span(span_raw, SpanDeleter()); span->_log_id = 0; span->_base_cid = INVALID_BTHREAD_ID; - span->_ending_cid = INVALID_BTHREAD_ID; + span->_ending_tid = INVALID_BTHREAD; // Bthread Span uses ending_tid span->_type = SPAN_TYPE_BTHREAD; span->_async = false; span->_protocol = PROTOCOL_UNKNOWN; @@ -171,17 +263,21 @@ Span* Span::CreateBthreadSpan(const std::string& full_method_name, span->_start_callback_real_us = 0; span->_start_send_real_us = 0; span->_sent_real_us = 0; - span->_next_client = NULL; - span->_client_list = NULL; - span->_tls_next = NULL; span->_full_method_name = full_method_name; span->_info.clear(); - span->_trace_id = parent->trace_id(); - span->_parent_span_id = parent->span_id(); - span->_local_parent = parent; - span->_next_client = parent->_client_list; - parent->_client_list = span; + if (parent) { + span->_trace_id = parent->trace_id(); + span->_parent_span_id = parent->span_id(); + span->_local_parent = parent; + { + BAIDU_SCOPED_LOCK(parent->_client_list_spinlock); + parent->_client_list.push_back(span); + } + } else { + span->_trace_id = GenerateTraceId(); + span->_parent_span_id = 0; + } span->_span_id = GenerateSpanId(); return span; @@ -193,20 +289,21 @@ inline const std::string& unknown_span_name() { return s_unknown_method_name; } -Span* Span::CreateServerSpan( +std::shared_ptr Span::CreateServerSpan( const std::string& full_method_name, uint64_t trace_id, uint64_t span_id, uint64_t parent_span_id, int64_t base_real_us) { - Span* span = butil::get_object(Forbidden()); - if (__builtin_expect(span == NULL, 0)) { - return NULL; + Span* span_raw = butil::get_object(Forbidden()); + if (__builtin_expect(span_raw == NULL, 0)) { + return nullptr; } + std::shared_ptr span(span_raw, SpanDeleter()); span->_trace_id = (trace_id ? trace_id : GenerateTraceId()); span->_span_id = (span_id ? span_id : GenerateSpanId()); span->_parent_span_id = parent_span_id; span->_log_id = 0; span->_base_cid = INVALID_BTHREAD_ID; - span->_ending_cid = INVALID_BTHREAD_ID; + span->_ending_cid = INVALID_BTHREAD_ID; // Server Span uses ending_cid span->_type = SPAN_TYPE_SERVER; span->_async = false; span->_protocol = PROTOCOL_UNKNOWN; @@ -219,17 +316,13 @@ Span* Span::CreateServerSpan( span->_start_callback_real_us = 0; span->_start_send_real_us = 0; span->_sent_real_us = 0; - span->_next_client = NULL; - span->_client_list = NULL; - span->_tls_next = NULL; span->_full_method_name = (!full_method_name.empty() ? full_method_name : unknown_span_name()); span->_info.clear(); - span->_local_parent = NULL; return span; } -Span* Span::CreateServerSpan( +std::shared_ptr Span::CreateServerSpan( uint64_t trace_id, uint64_t span_id, uint64_t parent_span_id, int64_t base_real_us) { return CreateServerSpan(unknown_span_name(), trace_id, span_id, @@ -241,26 +334,22 @@ void Span::ResetServerSpanName(const std::string& full_method_name) { full_method_name : unknown_span_name()); } -void Span::destroy() { +void Span::submit(int64_t cpuwide_us) { + // Note: this method is not called for client-side spans. EndAsParent(); - traversal(this, [](Span* r) { - r->_info.clear(); - butil::return_object(r); - }); -} - -void Span::traversal(Span* r, const std::function& f) const { - if (r == NULL) { - return; - } - for (auto p = r->_client_list; p != NULL; p = p->_next_client) { - traversal(p, f); + SpanContainer* container = new(std::nothrow) SpanContainer(shared_from_this()); + // If memory allocation fails, the server span will not be submitted for persistence. + // The server span will be destroyed later when its shared_ptr refcount drops to zero + // Child spans (held in _client_list) will also be destroyed when + // their refcounts reach zero. + if (container) { + container->submit(cpuwide_us); } - f(r); } void Span::Annotate(const char* fmt, ...) { const int64_t anno_time = butil::cpuwide_time_us() + _base_real_us; + BAIDU_SCOPED_LOCK(_info_spinlock); butil::string_appendf(&_info, BRPC_SPAN_INFO_SEP "%lld ", (long long)anno_time); va_list ap; @@ -271,6 +360,7 @@ void Span::Annotate(const char* fmt, ...) { void Span::Annotate(const char* fmt, va_list args) { const int64_t anno_time = butil::cpuwide_time_us() + _base_real_us; + BAIDU_SCOPED_LOCK(_info_spinlock); butil::string_appendf(&_info, BRPC_SPAN_INFO_SEP "%lld ", (long long)anno_time); butil::string_vappendf(&_info, fmt, args); @@ -278,6 +368,7 @@ void Span::Annotate(const char* fmt, va_list args) { void Span::Annotate(const std::string& info) { const int64_t anno_time = butil::cpuwide_time_us() + _base_real_us; + BAIDU_SCOPED_LOCK(_info_spinlock); butil::string_appendf(&_info, BRPC_SPAN_INFO_SEP "%lld ", (long long)anno_time); _info.append(info); @@ -285,6 +376,7 @@ void Span::Annotate(const std::string& info) { void Span::AnnotateCStr(const char* info, size_t length) { const int64_t anno_time = butil::cpuwide_time_us() + _base_real_us; + BAIDU_SCOPED_LOCK(_info_spinlock); butil::string_appendf(&_info, BRPC_SPAN_INFO_SEP "%lld ", (long long)anno_time); if (length <= 0) { @@ -295,9 +387,14 @@ void Span::AnnotateCStr(const char* info, size_t length) { } size_t Span::CountClientSpans() const { - size_t n = 0; - traversal(const_cast(this), [&](Span*) { ++n; }); - return n - 1; + size_t n = 1; + { + BAIDU_SCOPED_LOCK(_client_list_spinlock); + for (const auto& child : _client_list) { + n += child->CountClientSpans(); + } + } + return n; } int64_t Span::GetStartRealTimeUs() const { @@ -345,15 +442,26 @@ bool SpanInfoExtractor::PopAnnotation( } bool CanAnnotateSpan() { - return bthread::tls_bls.rpcz_parent_span; + return HasTlsParentSpan(); } void AnnotateSpan(const char* fmt, ...) { - Span* span = static_cast(bthread::tls_bls.rpcz_parent_span); - va_list ap; - va_start(ap, fmt); - span->Annotate(fmt, ap); - va_end(ap); + std::shared_ptr span = GetTlsParentSpan(); + if (span) { // TRACEPRINTF checks CanAnnotateSpan, but this is safer. + va_list ap; + va_start(ap, fmt); + span->Annotate(fmt, ap); + va_end(ap); + } +} + +void AnnotateSpanEx(std::shared_ptr span, const char* fmt, ...) { + if (span) { + va_list ap; + va_start(ap, fmt); + span->Annotate(fmt, ap); + va_end(ap); + } } class SpanDB : public SharedObject { @@ -365,7 +473,7 @@ class SpanDB : public SharedObject { SpanDB() : id_db(NULL), time_db(NULL) { } static SpanDB* Open(); - leveldb::Status Index(const Span* span, std::string* value_buf); + leveldb::Status Index(std::shared_ptr span, std::string* value_buf); leveldb::Status RemoveSpansBefore(int64_t tm); private: @@ -405,10 +513,14 @@ static bvar::DisplaySamplingRatio s_display_sampling_ratio( "rpcz_sampling_ratio", &g_span_sl); struct SpanEarlier { - bool operator()(bvar::Collected* c1, bvar::Collected* c2) const { - const Span* span1 = static_cast(c1); - const Span* span2 = static_cast(c2); - return span1->GetStartRealTimeUs() < span2->GetStartRealTimeUs(); + bool operator()(const bvar::Collected* c1, const bvar::Collected* c2) const { + const SpanContainer* container1 = static_cast(c1); + const SpanContainer* container2 = static_cast(c2); + + const int64_t time1 = container1->span()->GetStartRealTimeUs(); + const int64_t time2 = container2->span()->GetStartRealTimeUs(); + + return time1 < time2; } }; class SpanPreprocessor : public bvar::CollectorPreprocessor { @@ -471,8 +583,13 @@ inline int GetSpanDB(butil::intrusive_ptr* db) { return -1; } -void Span::Submit(Span* span, int64_t cpuwide_time_us) { - if (span->local_parent() == NULL) { +void Span::Submit(std::shared_ptr span, int64_t cpuwide_time_us) { + // Only submit spans without a local parent (i.e., server spans). + // Server spans hold shared_ptr references to their child spans (via _client_list), + // ensuring child spans remain alive until the server span is submitted and dumped. + // Client spans are not submitted here because their lifetime is managed by their + // parent server span. + if (span->local_parent().expired()) { span->submit(cpuwide_time_us); } } @@ -497,6 +614,7 @@ static void Span2Proto(const Span* span, RpczSpan* out) { out->set_start_send_real_us(span->start_send_real_us()); out->set_sent_real_us(span->sent_real_us()); out->set_full_method_name(span->full_method_name()); + // info() returns by value for thread safety (see span.h for details). out->set_info(span->info()); out->set_error_code(span->error_code()); } @@ -571,7 +689,7 @@ SpanDB* SpanDB::Open() { return db; } -leveldb::Status SpanDB::Index(const Span* span, std::string* value_buf) { +leveldb::Status SpanDB::Index(std::shared_ptr span, std::string* value_buf) { leveldb::WriteOptions options; options.sync = false; @@ -637,20 +755,46 @@ leveldb::Status SpanDB::Index(const Span* span, std::string* value_buf) { ToBigEndian(span->span_id(), key_data + 2); leveldb::Slice key((char*)key_data, sizeof(key_data)); RpczSpan value_proto; - Span2Proto(span, &value_proto); - // client spans should be reversed. - size_t client_span_count = span->CountClientSpans(); - for (size_t i = 0; i < client_span_count; ++i) { - value_proto.add_client_spans(); - } - size_t i = 0; - span->traversal(const_cast(span), [&](Span* p) { - if (span == p) { - return; + Span2Proto(span.get(), &value_proto); + + std::vector> all_child_spans; + + std::function)> collect_all_spans = + [&](std::shared_ptr current_span) { + if (!current_span) { + return; + } + + std::vector> children; + { + BAIDU_SCOPED_LOCK(current_span->_client_list_spinlock); + children.reserve(current_span->_client_list.size()); + for (const auto& child_span : current_span->_client_list) { + if (child_span) { + children.push_back(child_span); + } + } + } + + for (const auto& child : children) { + collect_all_spans(child); + } + + all_child_spans.push_back(current_span); + }; + + collect_all_spans(span); + + // Traverse in reverse order and insert child elements. + // Only collect ended spans to avoid race conditions - active spans may still + // be modified by other threads, which could lead to inconsistent data when + // serializing to database. + for (auto it = all_child_spans.rbegin(); it != all_child_spans.rend(); ++it) { + if (*it && it->get() != span.get() && !(*it)->is_active()) { + RpczSpan* child_proto = value_proto.add_client_spans(); + Span2Proto((*it).get(), child_proto); } - Span2Proto(p, value_proto.mutable_client_spans(client_span_count - i - 1)); - ++i; - }); + } if (!value_proto.SerializeToString(value_buf)) { return leveldb::Status::InvalidArgument( leveldb::Slice("Fail to serialize RpczSpan")); @@ -691,7 +835,7 @@ leveldb::Status SpanDB::RemoveSpansBefore(int64_t tm) { break; } } else { - LOG(ERROR) << "Fail to parse from value"; + LOG(ERROR) << "Fail to parse value"; } rc = time_db->Delete(options, it->key()); if (!rc.ok()) { @@ -704,7 +848,7 @@ leveldb::Status SpanDB::RemoveSpansBefore(int64_t tm) { } // Write span into leveldb. -void Span::dump_and_destroy(size_t /*round*/) { +void Span::dump_to_db() { StartIndexingIfNeeded(); std::string value_buf; @@ -712,21 +856,18 @@ void Span::dump_and_destroy(size_t /*round*/) { butil::intrusive_ptr db; if (GetSpanDB(&db) != 0) { if (g_span_ending) { - destroy(); return; } SpanDB* db2 = SpanDB::Open(); if (db2 == NULL) { LOG(WARNING) << "Fail to open SpanDB"; - destroy(); return; } ResetSpanDB(db2); db.reset(db2); } - leveldb::Status st = db->Index(this, &value_buf); - destroy(); + leveldb::Status st = db->Index(shared_from_this(), &value_buf); if (!st.ok()) { LOG(WARNING) << st.ToString(); if (st.IsNotFound() || st.IsIOError() || st.IsCorruption()) { @@ -751,6 +892,42 @@ void Span::dump_and_destroy(size_t /*round*/) { } } +// ========== SpanContainer ============ + +// Destroy the span container without persisting to database. +// This is called in abnormal scenarios: +// 1. When the pending sample queue is full (to prevent memory explosion) +// 2. When grab_thread hasn't run for too long (system overload) +// In these cases, we discard the span quickly without expensive I/O. +void SpanContainer::destroy() { + delete this; +} + +// The round parameter is required by bvar::Collected interface but unused here. +// Other implementations (e.g., SampledRequest in rpc_dump.cpp) use it to detect +// new batches and trigger per-round operations like reloading gflags or switching +// output files. SpanContainer doesn't need batch-level operations since it writes +// directly to leveldb without buffering or configuration reloading. +void SpanContainer::dump_and_destroy(size_t round) { + if (_span) { + _span->dump_to_db(); + } + destroy(); +} + +void SpanContainer::submit(int64_t cpuwide_us) { + bvar::Collected::submit(cpuwide_us); +} + +bvar::CollectorSpeedLimit* SpanContainer::speed_limit() { + if (_span) { + return _span->speed_limit(); + } + return NULL; +} + +// ===================================== + int FindSpan(uint64_t trace_id, uint64_t span_id, RpczSpan* response) { butil::intrusive_ptr db; if (GetSpanDB(&db) != 0) { diff --git a/src/brpc/span.h b/src/brpc/span.h index 75d8e7fc05..70fdbf4d60 100644 --- a/src/brpc/span.h +++ b/src/brpc/span.h @@ -23,8 +23,11 @@ #include #include +#include #include #include +#include +#include #include "butil/macros.h" #include "butil/endpoint.h" #include "butil/string_splitter.h" @@ -37,28 +40,48 @@ namespace bthread { extern __thread bthread::LocalStorage tls_bls; } - namespace brpc { +class Span; + +void SetTlsParentSpan(std::shared_ptr span); +std::shared_ptr GetTlsParentSpan(); +void ClearTlsParentSpan(); +bool HasTlsParentSpan(); + +void* CreateBthreadSpanAsVoid(); +void DestroyRpczParentSpan(void* ptr); +void EndBthreadSpan(); + DECLARE_bool(enable_rpcz); +class Span; +class SpanContainer; + +// Deleter for Span. +struct SpanDeleter { + void operator()(Span* r) const; +}; + // Collect information required by /rpcz and tracing system whose idea is // described in http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/36356.pdf -class Span : public bvar::Collected { +class Span : public std::enable_shared_from_this { friend class SpanDB; - struct Forbidden {}; +friend struct SpanDeleter; +friend class SpanContainer; public: + struct Forbidden {}; // Call CreateServerSpan/CreateClientSpan instead. - Span(Forbidden) {} - ~Span() {} + Span(Forbidden); + ~Span(); // Create a span to track a request inside server. - static Span* CreateServerSpan( + static std::shared_ptr CreateServerSpan( const std::string& full_method_name, uint64_t trace_id, uint64_t span_id, uint64_t parent_span_id, int64_t base_real_us); // Create a span without name to track a request inside server. - static Span* CreateServerSpan( + static std::shared_ptr CreateServerSpan( uint64_t trace_id, uint64_t span_id, uint64_t parent_span_id, int64_t base_real_us); @@ -66,18 +89,24 @@ friend class SpanDB; void ResetServerSpanName(const std::string& name); // Create a span to track a request inside channel. - static Span* CreateClientSpan(const std::string& full_method_name, - int64_t base_real_us); + static std::shared_ptr CreateClientSpan(const std::string& full_method_name, + int64_t base_real_us); // Create a span to track start bthread - static Span* CreateBthreadSpan(const std::string& full_method_name, - int64_t base_real_us); - - static void Submit(Span* span, int64_t cpuwide_time_us); - - // Set tls parent. + static std::shared_ptr CreateBthreadSpan(const std::string& full_method_name, + int64_t base_real_us); + + static void Submit(std::shared_ptr span, int64_t cpuwide_time_us); + + // Set this span as the TLS parent for subsequent child span creation. + // Typical flow: + // 1. Server span calls AsParent() before user callback to enable tracing + // 2. Client spans created in user code automatically link to this parent + // 3. When client RPC completes, it restores its own parent via AsParent() + // to maintain the trace chain (see Controller::SubmitSpan) + // 4. Server span calls EndAsParent() when submitting to clear TLS parent void AsParent() { - bthread::tls_bls.rpcz_parent_span = this; + SetTlsParentSpan(shared_from_this()); } // Add log with time. @@ -96,6 +125,7 @@ friend class SpanDB; void set_log_id(uint64_t cid) { _log_id = cid; } void set_base_cid(bthread_id_t id) { _base_cid = id; } void set_ending_cid(bthread_id_t id) { _ending_cid = id; } + void set_ending_tid(bthread_t tid) { _ending_tid = tid; } void set_remote_side(const butil::EndPoint& pt) { _remote_side = pt; } void set_protocol(ProtocolType p) { _protocol = p; } void set_error_code(int error_code) { _error_code = error_code; } @@ -115,9 +145,20 @@ friend class SpanDB; void set_sent_us(int64_t tm) { _sent_real_us = tm + _base_real_us; } - Span* local_parent() const { return _local_parent; } - static Span* tls_parent() { - return static_cast(bthread::tls_bls.rpcz_parent_span); + bool is_active() const { + if (_type == SPAN_TYPE_BTHREAD) { + return _ending_tid == INVALID_BTHREAD; + } + return _ending_cid == INVALID_BTHREAD_ID; + } + + std::weak_ptr local_parent() const { return _local_parent; } + static std::shared_ptr tls_parent() { + auto parent = GetTlsParentSpan(); + if (parent && parent->is_active()) { + return parent; + } + return nullptr; } uint64_t trace_id() const { return _trace_id; } @@ -126,6 +167,7 @@ friend class SpanDB; uint64_t log_id() const { return _log_id; } bthread_id_t base_cid() const { return _base_cid; } bthread_id_t ending_cid() const { return _ending_cid; } + bthread_t ending_tid() const { return _ending_tid; } const butil::EndPoint& remote_side() const { return _remote_side; } SpanType type() const { return _type; } ProtocolType protocol() const { return _protocol; } @@ -139,20 +181,38 @@ friend class SpanDB; int64_t sent_real_us() const { return _sent_real_us; } bool async() const { return _async; } const std::string& full_method_name() const { return _full_method_name; } - const std::string& info() const { return _info; } + + // Returns a copy instead of a reference for thread safety. + // + // Current usage: Only called by Span2Proto() which immediately passes the result + // to protobuf's set_info(). In this specific scenario, returning a reference would + // also be safe because set_info() copies the string before the reference could be + // invalidated by concurrent Annotate() calls. + // + // However, returning by value is more robust: it prevents potential data races if + // future code holds the reference longer, and has no performance penalty due to + // C++11 move semantics (the temporary is moved, not copied, into protobuf). + std::string info() const { + BAIDU_SCOPED_LOCK(_info_spinlock); + return _info; + } private: DISALLOW_COPY_AND_ASSIGN(Span); - void dump_and_destroy(size_t round_index); - void destroy(); - void traversal(Span*, const std::function&) const; + void dump_to_db(); + void submit(int64_t cpuwide_us); bvar::CollectorSpeedLimit* speed_limit(); bvar::CollectorPreprocessor* preprocessor(); + // Clear this span from TLS parent if it's currently set as the parent. + // Called when server span is being submitted to prevent subsequent spans + // from incorrectly linking to an ended span. Only clears if the current + // TLS parent is this span (avoids clearing if another span has taken over). void EndAsParent() { - if (this == static_cast(bthread::tls_bls.rpcz_parent_span)) { - bthread::tls_bls.rpcz_parent_span = NULL; + std::shared_ptr current_parent = GetTlsParentSpan(); + if (current_parent.get() == this) { + ClearTlsParentSpan(); } } @@ -162,6 +222,7 @@ friend class SpanDB; uint64_t _log_id; bthread_id_t _base_cid; bthread_id_t _ending_cid; + bthread_t _ending_tid; // Used for bthread span to store the ending bthread tid butil::EndPoint _remote_side; SpanType _type; bool _async; @@ -181,11 +242,38 @@ friend class SpanDB; // time2_us \s annotation2 // ... std::string _info; + // Protects _info from concurrent modifications. + // Multiple threads may call Annotate() simultaneously (e.g., retry logic, + // network layer, user code via TRACEPRINTF), causing data corruption in + // string concatenation without synchronization. + mutable pthread_spinlock_t _info_spinlock; + + std::weak_ptr _local_parent; + std::list> _client_list; + // Protects _client_list from concurrent modifications. + // In some scenarios, multiple bthreads may simultaneously create child spans + // (e.g.,raft leader parallel RPCs to followers) and push_back to parent's _client_list. + // Also protects against concurrent iteration (e.g., CountClientSpans, SpanDB::Index) + // while the list is being modified. + mutable pthread_spinlock_t _client_list_spinlock; +}; + +class SpanContainer : public bvar::Collected { +public: + explicit SpanContainer(const std::shared_ptr& span) : _span(span) {} + ~SpanContainer() {} + + // Implementations of bvar::Collected + void dump_and_destroy(size_t round_index) override; + void destroy() override; + bvar::CollectorSpeedLimit* speed_limit() override; - Span* _local_parent; - Span* _next_client; - Span* _client_list; - Span* _tls_next; + void submit(int64_t cpuwide_us); + + const std::shared_ptr& span() const { return _span; } + +private: + std::shared_ptr _span; }; // Extract name and annotations from Span::info() @@ -198,11 +286,14 @@ class SpanInfoExtractor { butil::StringSplitter _sp; }; -// These two functions can be used for composing TRACEPRINT as well as hiding -// span implementations. -bool CanAnnotateSpan(); +// These two functions can be used for composing TRACEPRINT// Add an annotation to the current span. +// If current bthread is not tracing, this function does nothing. void AnnotateSpan(const char* fmt, ...); +// Add an annotation to the given span. +// If the span is NULL, this function does nothing. +void AnnotateSpanEx(std::shared_ptr span, const char* fmt, ...); + class SpanFilter { public: @@ -240,12 +331,6 @@ inline bool IsTraceable(bool is_upstream_traced) { (FLAGS_enable_rpcz && bvar::is_collectable(&g_span_sl)); } -inline void* CreateBthreadSpan() { - const int64_t received_us = butil::cpuwide_time_us(); - const int64_t base_realtime = butil::gettimeofday_us() - received_us; - return Span::CreateBthreadSpan("Bthread", base_realtime); -} - } // namespace brpc diff --git a/src/brpc/traceprintf.h b/src/brpc/traceprintf.h index 513daf2b00..47a8dcf33a 100644 --- a/src/brpc/traceprintf.h +++ b/src/brpc/traceprintf.h @@ -19,6 +19,7 @@ #ifndef BRPC_TRACEPRINTF_H #define BRPC_TRACEPRINTF_H +#include #include "butil/macros.h" // To brpc developers: This is a header included by user, don't depend @@ -27,9 +28,15 @@ namespace brpc { +// Forward declaration +class Span; + bool CanAnnotateSpan(); void AnnotateSpan(const char* fmt, ...); +// Declarations for AnnotateSpanEx used by TRACEPRINTF_SPAN macro +void AnnotateSpanEx(std::shared_ptr span, const char* fmt, ...); + } // namespace brpc @@ -43,4 +50,14 @@ void AnnotateSpan(const char* fmt, ...); } \ } while (0) + +// Use this macro to print log to a specific span. +// If span_ptr is NULL, arguments to this macro is NOT evaluated. +#define TRACEPRINTF_SPAN(span_ptr, fmt, args...) \ + do { \ + if ((span_ptr)) { \ + ::brpc::AnnotateSpanEx((span_ptr), "[" __FILE__ ":" BAIDU_SYMBOLSTR(__LINE__) "] " fmt, ##args); \ + } \ + } while (0) + #endif // BRPC_TRACEPRINTF_H diff --git a/src/bthread/bthread.cpp b/src/bthread/bthread.cpp index ac49f269d9..27ded27acd 100644 --- a/src/bthread/bthread.cpp +++ b/src/bthread/bthread.cpp @@ -90,7 +90,6 @@ extern BAIDU_THREAD_LOCAL TaskGroup* tls_task_group; EXTERN_BAIDU_VOLATILE_THREAD_LOCAL(TaskGroup*, tls_task_group); extern void (*g_worker_startfn)(); extern void (*g_tagged_worker_startfn)(bthread_tag_t); -extern void* (*g_create_span_func)(); inline TaskControl* get_task_control() { return g_task_control; @@ -597,14 +596,6 @@ int bthread_set_tagged_worker_startfn(void (*start_fn)(bthread_tag_t)) { return 0; } -int bthread_set_create_span_func(void* (*func)()) { - if (func == NULL) { - return EINVAL; - } - bthread::g_create_span_func = func; - return 0; -} - void bthread_stop_world() { bthread::TaskControl* c = bthread::get_task_control(); if (c != NULL) { @@ -668,6 +659,21 @@ uint64_t bthread_cpu_clock_ns(void) { return 0; } +int bthread_set_span_funcs(bthread_create_span_fn create_fn, + bthread_destroy_span_fn destroy_fn, + bthread_end_span_fn end_fn) { + if ((create_fn && destroy_fn && end_fn) || + (!create_fn && !destroy_fn && !end_fn)) { + bthread::g_create_bthread_span = create_fn; + bthread::g_rpcz_parent_span_dtor = destroy_fn; + bthread::g_end_bthread_span = end_fn; + return 0; + } + + errno = EINVAL; + return -1; +} + } // extern "C" void bthread_attr_set_name(bthread_attr_t* attr, const char* name) { diff --git a/src/bthread/bthread.h b/src/bthread/bthread.h index 603cf04d0e..402fe70cfe 100644 --- a/src/bthread/bthread.h +++ b/src/bthread/bthread.h @@ -429,6 +429,34 @@ extern int bthread_once(bthread_once_t* once_control, void (*init_routine)()); */ extern uint64_t bthread_cpu_clock_ns(void); +// Span callback function types for tracing bthread lifecycle. +// These callbacks are typically set by upper-layer frameworks (e.g., brpc) +// to integrate distributed tracing with bthread execution. +typedef void* (*bthread_create_span_fn)(void); +typedef void (*bthread_destroy_span_fn)(void*); +typedef void (*bthread_end_span_fn)(void); + +// Set span-related callbacks for bthread tracing. +// This should be called during framework initialization (e.g., in GlobalInitializeOrDie). +// +// Parameters: +// create_fn - Called when creating a bthread with BTHREAD_INHERIT_SPAN flag. +// Should return a heap-allocated span context (e.g., weak_ptr*). +// Returns NULL if span creation is disabled or fails. +// destroy_fn - Called to destroy the span context when bthread exits or cleans up. +// Receives the pointer returned by create_fn. +// end_fn - Called when bthread ends to finalize the span (e.g., set end time). +// +// All three callbacks must be provided together, or all NULL to disable span tracking. +// This function should only be called once during initialization. +// +// Returns: +// 0 on success +// -1 if parameters are invalid (sets errno to EINVAL) +extern int bthread_set_span_funcs(bthread_create_span_fn create_fn, + bthread_destroy_span_fn destroy_fn, + bthread_end_span_fn end_fn); + __END_DECLS #endif // BTHREAD_BTHREAD_H diff --git a/src/bthread/key.cpp b/src/bthread/key.cpp index 00215d7f3b..74945833d9 100644 --- a/src/bthread/key.cpp +++ b/src/bthread/key.cpp @@ -22,6 +22,7 @@ #include #include +#include "bthread/bthread.h" // bthread_create_span_fn and related types #include "bthread/errno.h" // EAGAIN #include "bthread/task_group.h" // TaskGroup #include "butil/atomicops.h" diff --git a/src/bthread/task_group.cpp b/src/bthread/task_group.cpp index 877a5d406e..579bb23120 100644 --- a/src/bthread/task_group.cpp +++ b/src/bthread/task_group.cpp @@ -48,6 +48,12 @@ namespace bthread { +// Global span function pointers for bthread lifecycle tracing. +// These are set by brpc layer via bthread_set_span_funcs(). +void* (*g_create_bthread_span)() = NULL; +void (*g_rpcz_parent_span_dtor)(void*) = NULL; +void (*g_end_bthread_span)() = NULL; + static const bthread_attr_t BTHREAD_ATTR_TASKGROUP = { BTHREAD_STACKTYPE_UNKNOWN, 0, NULL, BTHREAD_TAG_INVALID, {0} }; @@ -78,15 +84,6 @@ BAIDU_VOLATILE_THREAD_LOCAL(void*, tls_unique_user_ptr, NULL); const TaskStatistics EMPTY_STAT = { 0, 0, 0 }; -void* (*g_create_span_func)() = NULL; - -void* run_create_span_func() { - if (g_create_span_func) { - return g_create_span_func(); - } - return BAIDU_GET_VOLATILE_THREAD_LOCAL(tls_bls).rpcz_parent_span; -} - AtomicInteger128::Value AtomicInteger128::load() const { #if __x86_64__ || __ARM_NEON // Supress compiler warning. @@ -393,6 +390,12 @@ void TaskGroup::task_runner(intptr_t skip_remained) { thread_return = e.value(); } + if (m->attr.flags & BTHREAD_INHERIT_SPAN) { + if (g_end_bthread_span) { + g_end_bthread_span(); + } + } + // TODO: Save thread_return (void)thread_return; @@ -417,6 +420,15 @@ void TaskGroup::task_runner(intptr_t skip_remained) { m->local_storage.keytable = NULL; // optional } + // Clean up span if it exists. This must be done after keytable cleanup + // because span cleanup may use bthread local storage. + tls_bls_ptr = BAIDU_GET_PTR_VOLATILE_THREAD_LOCAL(tls_bls); + if (tls_bls_ptr->rpcz_parent_span && g_rpcz_parent_span_dtor) { + g_rpcz_parent_span_dtor(tls_bls_ptr->rpcz_parent_span); + tls_bls_ptr->rpcz_parent_span = NULL; + m->local_storage.rpcz_parent_span = NULL; + } + // During running the function in TaskMeta and deleting the KeyTable in // return_KeyTable, the group is probably changed. g = BAIDU_GET_VOLATILE_THREAD_LOCAL(tls_task_group); @@ -495,7 +507,11 @@ int TaskGroup::start_foreground(TaskGroup** pg, m->attr = using_attr; m->local_storage = LOCAL_STORAGE_INIT; if (using_attr.flags & BTHREAD_INHERIT_SPAN) { - m->local_storage.rpcz_parent_span = run_create_span_func(); + if (g_create_bthread_span) { + m->local_storage.rpcz_parent_span = g_create_bthread_span(); + } else { + m->local_storage.rpcz_parent_span = BAIDU_GET_VOLATILE_THREAD_LOCAL(tls_bls).rpcz_parent_span; + } } m->cpuwide_start_ns = start_ns; m->stat = EMPTY_STAT; @@ -560,7 +576,11 @@ int TaskGroup::start_background(bthread_t* __restrict th, m->attr = using_attr; m->local_storage = LOCAL_STORAGE_INIT; if (using_attr.flags & BTHREAD_INHERIT_SPAN) { - m->local_storage.rpcz_parent_span = run_create_span_func(); + if (g_create_bthread_span) { + m->local_storage.rpcz_parent_span = g_create_bthread_span(); + } else { + m->local_storage.rpcz_parent_span = BAIDU_GET_VOLATILE_THREAD_LOCAL(tls_bls).rpcz_parent_span; + } } m->cpuwide_start_ns = start_ns; m->stat = EMPTY_STAT; diff --git a/src/bthread/task_meta.h b/src/bthread/task_meta.h index 1b77c0b601..a2490b4553 100644 --- a/src/bthread/task_meta.h +++ b/src/bthread/task_meta.h @@ -28,6 +28,7 @@ #include "bthread/types.h" // bthread_attr_t #include "bthread/stack.h" // ContextualStack #include "bthread/timer_thread.h" +#include "butil/thread_local.h" namespace bthread { @@ -43,13 +44,15 @@ struct ButexWaiter; struct LocalStorage { KeyTable* keytable; void* assigned_data; - void* rpcz_parent_span; + void* rpcz_parent_span; // Points to std::weak_ptr* (managed by brpc) }; #define BTHREAD_LOCAL_STORAGE_INITIALIZER { NULL, NULL, NULL } const static LocalStorage LOCAL_STORAGE_INIT = BTHREAD_LOCAL_STORAGE_INITIALIZER; +EXTERN_BAIDU_VOLATILE_THREAD_LOCAL(LocalStorage, tls_bls); + enum TaskStatus { TASK_STATUS_UNKNOWN, TASK_STATUS_CREATED, @@ -149,6 +152,24 @@ struct TaskMeta { } }; +// Global callback for creating a new bthread span when creating a new bthread. +// This is set by brpc layer. When a bthread is created with BTHREAD_INHERIT_SPAN, +// this callback is invoked to create a new span for the bthread. +// The returned void* points to a heap-allocated weak_ptr* managed by brpc layer. +// Returns NULL if span creation is disabled or fails. +extern void* (*g_create_bthread_span)(); + +// Global destructor callback for rpcz_parent_span. +// This is set by brpc layer to clean up the heap-allocated weak_ptr. +// bthread layer doesn't know the concrete type, it just calls this function +// with the void* pointer when cleaning up LocalStorage. +extern void (*g_rpcz_parent_span_dtor)(void*); + +// Global callback invoked when a bthread ends (used by higher layers to +// observe and react to bthread end events, e.g., to finish spans). This +// pointer is set by the upper layer during initialization. +extern void (*g_end_bthread_span)(); + } // namespace bthread #endif // BTHREAD_TASK_META_H diff --git a/src/bthread/unstable.h b/src/bthread/unstable.h index 4580202f87..186d9ce65b 100644 --- a/src/bthread/unstable.h +++ b/src/bthread/unstable.h @@ -92,9 +92,6 @@ extern int bthread_set_worker_startfn(void (*start_fn)()); // Add a startup function with tag extern int bthread_set_tagged_worker_startfn(void (*start_fn)(bthread_tag_t)); -// Add a create span function -extern int bthread_set_create_span_func(void* (*func)()); - // Stop all bthread and worker pthreads. // You should avoid calling this function which may cause bthread after main() // suspend indefinitely. diff --git a/src/bvar/collector.cpp b/src/bvar/collector.cpp index 34713a4a00..a01f45fdbc 100644 --- a/src/bvar/collector.cpp +++ b/src/bvar/collector.cpp @@ -410,6 +410,11 @@ void Collector::dump_thread() { } } +// Submit a sample for asynchronous dumping. The Collector holds only the Collected* +// pointer (e.g., SpanContainer*). Regardless of which branch is taken below, the +// sample will eventually be destroyed via either dump_and_destroy() or destroy(), +// both of which call 'delete this' to release the container and decrement the +// reference count of any managed resources (e.g., shared_ptr). void Collected::submit(int64_t cpuwide_us) { Collector* d = butil::get_leaky_singleton(); // Destroy the sample in-place if the grab_thread did not run for twice diff --git a/test/brpc_channel_unittest.cpp b/test/brpc_channel_unittest.cpp index ad6670443a..de33b44393 100644 --- a/test/brpc_channel_unittest.cpp +++ b/test/brpc_channel_unittest.cpp @@ -47,13 +47,15 @@ namespace brpc { DECLARE_int32(idle_timeout_second); DECLARE_int32(max_connection_pool_size); class Server; +class Span; class MethodStatus; namespace policy { void SendRpcResponse(int64_t correlation_id, Controller* cntl, RpcPBMessages* messages, const Server* server_raw, - MethodStatus *, int64_t); + MethodStatus *, int64_t, + std::shared_ptr span); } // policy } // brpc @@ -301,9 +303,10 @@ class ChannelTest : public ::testing::Test{ int64_t, brpc::Controller*, brpc::RpcPBMessages*, const brpc::Server*, - brpc::MethodStatus*, int64_t>(&brpc::policy::SendRpcResponse, - meta.correlation_id(), cntl, - messages, &ts->_dummy, NULL, -1); + brpc::MethodStatus*, int64_t, std::shared_ptr>( + &brpc::policy::SendRpcResponse, + meta.correlation_id(), cntl, + messages, &ts->_dummy, NULL, -1, nullptr); ts->_svc.CallMethod(method, cntl, req, res, done); } diff --git a/test/bthread_unittest.cpp b/test/bthread_unittest.cpp index dcb8d87323..bd31a3c430 100644 --- a/test/bthread_unittest.cpp +++ b/test/bthread_unittest.cpp @@ -17,6 +17,7 @@ #include #include +#include #include "butil/time.h" #include "butil/macros.h" #include "butil/logging.h" @@ -566,6 +567,13 @@ void* create_span_func() { return (void*)targets[idx]; } +void destroy_span_func(void* span) { + LOG(INFO) << "Destroy span " << (uint64_t)span; +} + +void end_span_func() { +} + TEST_F(BthreadTest, test_span) { uint64_t p1 = 0; uint64_t p2 = 0; @@ -587,7 +595,7 @@ TEST_F(BthreadTest, test_span) { LOG(INFO) << "Test bthread create span"; - bthread_set_create_span_func(create_span_func); + ASSERT_EQ(0, bthread_set_span_funcs(create_span_func, destroy_span_func, end_span_func)); bthread_t multi_th1; bthread_t multi_th2; @@ -602,6 +610,8 @@ TEST_F(BthreadTest, test_span) { ASSERT_NE(multi_p1, multi_p2); ASSERT_NE(std::find(targets, targets + 4, multi_p1), targets + 4); ASSERT_NE(std::find(targets, targets + 4, multi_p2), targets + 4); + + ASSERT_EQ(0, bthread_set_span_funcs(NULL, NULL, NULL)); } void* dummy_thread(void*) { From 66497b8dcde3a6743cba7f0fc61594a5c04d2f7a Mon Sep 17 00:00:00 2001 From: Weibing Wang Date: Thu, 2 Apr 2026 15:00:43 +0800 Subject: [PATCH 49/84] Update release schedule (#3259) --- community/release_schedule.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/community/release_schedule.md b/community/release_schedule.md index 4f95a6bbd6..1cf4ee2315 100644 --- a/community/release_schedule.md +++ b/community/release_schedule.md @@ -19,3 +19,9 @@ |1.14.0|2025-07-xx|王伟冰| |1.15.0|2025-10-xx|刘帅| |1.16.0|2026-01-xx|王晓峰| +|1.17.0|2026-05-xx|胡希国| +|1.18.0|2026-09-xx|陈光明| +|1.19.0|2027-01-xx|李磊| +|1.20.0|2027-05-xx|王伟冰| +|1.21.0|2027-09-xx|刘帅| +|1.22.0|2028-01-xx|王晓峰| From ddb99727fccc0d9b0ea4b88e665d98904a0cf74c Mon Sep 17 00:00:00 2001 From: DavidKorczynski Date: Mon, 6 Apr 2026 04:02:15 +0100 Subject: [PATCH 50/84] Fix fuzz harness blockers (#3257) * Fix fuzz harness blockers Harden several code paths with increased error handling. The existing fuzzing harneses are running into various blockers stopping them from explore further code. This is an effort to harden the code so the fuzzers will run better without crashing. Signed-off-by: David Korczynski * Add fatal logging Signed-off-by: David Korczynski --------- Signed-off-by: David Korczynski --- src/brpc/policy/mongo_protocol.cpp | 7 +++++++ src/brpc/policy/streaming_rpc_protocol.cpp | 11 ++++++++++- src/brpc/redis_command.cpp | 12 ++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/brpc/policy/mongo_protocol.cpp b/src/brpc/policy/mongo_protocol.cpp index 5df304e1bc..ee416421d8 100644 --- a/src/brpc/policy/mongo_protocol.cpp +++ b/src/brpc/policy/mongo_protocol.cpp @@ -113,6 +113,13 @@ void SendMongoResponse::Run() { ParseResult ParseMongoMessage(butil::IOBuf* source, Socket* socket, bool /*read_eof*/, const void *arg) { const Server* server = static_cast(arg); + // arg may be NULL when the parser is invoked outside of a full Server + // context (e.g. during protocol probing or fuzz testing). Without this + // guard, server->options() dereferences a null pointer and crashes. + if (NULL == server) { + LOG(FATAL) << "Failed creating server"; + return MakeParseError(PARSE_ERROR_TRY_OTHERS); + } const MongoServiceAdaptor* adaptor = server->options().mongo_service_adaptor; if (NULL == adaptor) { // The server does not enable mongo adaptor. diff --git a/src/brpc/policy/streaming_rpc_protocol.cpp b/src/brpc/policy/streaming_rpc_protocol.cpp index 0921d005e7..b741acff5c 100644 --- a/src/brpc/policy/streaming_rpc_protocol.cpp +++ b/src/brpc/policy/streaming_rpc_protocol.cpp @@ -116,7 +116,16 @@ ParseResult ParseStreamingMessage(butil::IOBuf* source, break; } meta_buf.clear(); // to reduce memory resident - ((Stream*)ptr->conn())->OnReceived(fm, &payload, socket); + // ptr->conn() returns the connection-level context attached to the + // socket. It may be NULL when the socket was found by ID but has no + // Stream object associated (e.g. during protocol probing or fuzz + // testing). Calling OnReceived on a null pointer would crash. + Stream* stream_conn = (Stream*)ptr->conn(); + if (stream_conn == NULL) { + LOG(FATAL) << "No stream object found"; + break; + } + stream_conn->OnReceived(fm, &payload, socket); } while (0); // Hack input messenger diff --git a/src/brpc/redis_command.cpp b/src/brpc/redis_command.cpp index d5e76c39d8..4532b3c197 100644 --- a/src/brpc/redis_command.cpp +++ b/src/brpc/redis_command.cpp @@ -410,6 +410,12 @@ RedisCommandConsumeState RedisCommandParser::ConsumeImpl(butil::IOBuf& buf, } const size_t buf_size = buf.size(); const auto copy_str = static_cast(arena->allocate(buf_size + 1)); + // arena->allocate() may return NULL on allocation failure + if (copy_str == NULL) { + LOG(FATAL) << "Arena failed allocation"; + *err = PARSE_ERROR_ABSOLUTELY_WRONG; + return CONSUME_STATE_ERROR; + } buf.copy_to(copy_str, buf_size); if (*copy_str == ' ') { *err = PARSE_ERROR_ABSOLUTELY_WRONG; @@ -520,6 +526,12 @@ RedisCommandConsumeState RedisCommandParser::ConsumeImpl(butil::IOBuf& buf, } buf.pop_front(crlf_pos + 2/*CRLF*/); char* d = (char*)arena->allocate((len/8 + 1) * 8); + // Guard against allocation failure + if (d == NULL) { + LOG(FATAL) << "Arena failed allocation"; + *err = PARSE_ERROR_ABSOLUTELY_WRONG; + return CONSUME_STATE_ERROR; + } buf.cutn(d, len); d[len] = '\0'; _args[_index].set(d, len); From 767b2da512ad871c773085f1c55b3b93f7f16d34 Mon Sep 17 00:00:00 2001 From: darion-yaphet Date: Wed, 8 Apr 2026 16:19:10 +0800 Subject: [PATCH 51/84] feat(build): add progress output to config_brpc.sh (#3262) The configuration script previously ran silently with no visible output, making it hard to tell whether it was working or what it detected. Add colored progress messages for each stage: system info, dependency discovery, output file generation, and a final configuration summary. --- config_brpc.sh | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/config_brpc.sh b/config_brpc.sh index 7d03e224ad..4526d218a8 100755 --- a/config_brpc.sh +++ b/config_brpc.sh @@ -15,6 +15,22 @@ # See the License for the specific language governing permissions and # limitations under the License. +BOLD='\033[1m' +GREEN='\033[0;32m' +YELLOW='\033[0;33m' +CYAN='\033[0;36m' +NC='\033[0m' # No Color + +print_info() { + printf "${CYAN}[INFO]${NC} %s\n" "$1" +} +print_success() { + printf "${GREEN}[OK]${NC} %s\n" "$1" +} +print_step() { + printf "${BOLD}==> %s${NC}\n" "$1" +} + SYSTEM=$(uname -s) if [ "$SYSTEM" = "Darwin" ]; then if [ -z "$BASH" ] || [ "$BASH" = "/bin/sh" ] ; then @@ -83,6 +99,10 @@ while true; do esac done +print_step "Configuring brpc (${SYSTEM})" +print_info "Headers path: ${HDRS_IN}" +print_info "Libs path: ${LIBS_IN}" + if [ -z "$CC" ]; then if [ ! -z "$CXX" ]; then >&2 $ECHO "--cc and --cxx must be both set or unset" @@ -99,6 +119,8 @@ elif [ -z "$CXX" ]; then exit 1 fi +print_info "CC=$CC, CXX=$CXX" + GCC_VERSION=$(CXX=$CXX tools/print_gcc_version.sh) if [ $GCC_VERSION -gt 0 ] && [ $GCC_VERSION -lt 40800 ]; then >&2 $ECHO "GCC is too old, please install a newer version supporting C++11" @@ -175,11 +197,14 @@ if [ "$SYSTEM" = "Darwin" ]; then fi fi +print_step "Checking dependencies" + # User specified path of openssl, if not given it's empty OPENSSL_LIB=$(find_dir_of_lib ssl) # Inconvenient to check these headers in baidu-internal #PTHREAD_HDR=$(find_dir_of_header_or_die pthread.h) OPENSSL_HDR=$(find_dir_of_header_or_die openssl/ssl.h mesalink/openssl/ssl.h) +print_success "Found openssl: lib=${OPENSSL_LIB:-system}, hdr=${OPENSSL_HDR}" if [ $WITH_MESALINK != 0 ]; then MESALINK_HDR=$(find_dir_of_header_or_die mesalink/openssl/ssl.h) @@ -228,11 +253,14 @@ append_linking() { GFLAGS_LIB=$(find_dir_of_lib_or_die gflags) append_linking $GFLAGS_LIB gflags +print_success "Found gflags: $GFLAGS_LIB" PROTOBUF_LIB=$(find_dir_of_lib_or_die protobuf) append_linking $PROTOBUF_LIB protobuf +print_success "Found protobuf: $PROTOBUF_LIB" LEVELDB_LIB=$(find_dir_of_lib_or_die leveldb) +print_success "Found leveldb: $LEVELDB_LIB" # required by leveldb if [ -f $LEVELDB_LIB/libleveldb.a ]; then if [ -f $LEVELDB_LIB/libleveldb.$SO ]; then @@ -261,6 +289,7 @@ else fi PROTOC=$(find_bin_or_die protoc) +print_success "Found protoc: $PROTOC" GFLAGS_HDR=$(find_dir_of_header_or_die gflags/gflags.h) @@ -348,8 +377,10 @@ if [ "$PROTOBUF_VERSION" -ge 4022000 ]; then fi done CXXFLAGS="-std=c++17" + print_success "Found protobuf version $PROTOBUF_VERSION (>= v22, using C++17 with abseil)" else CXXFLAGS="-std=c++0x" + print_success "Found protobuf version $PROTOBUF_VERSION" fi CPPFLAGS= @@ -606,5 +637,22 @@ cat << EOF > src/butil/config.h #endif // BUTIL_CONFIG_H EOF +print_step "Generating output files" + # write to config.mk $ECHO "$OUTPUT_CONTENT" > config.mk +print_success "Generated config.mk" +print_success "Generated src/butil/config.h" + +printf "\n" +print_step "Configuration complete" +print_info "Compiler: $CC / $CXX" +print_info "C++ std: $CXXFLAGS" +print_info "System: $SYSTEM" +if [ $WITH_GLOG -ne 0 ]; then print_info "With glog: yes"; fi +if [ $WITH_THRIFT -ne 0 ]; then print_info "With thrift: yes"; fi +if [ $WITH_RDMA -ne 0 ]; then print_info "With RDMA: yes"; fi +if [ $WITH_MESALINK -ne 0 ]; then print_info "With MesaLink: yes"; fi +if [ $WITH_BTHREAD_TRACER -ne 0 ]; then print_info "With bthread tracer: yes"; fi +if [ $WITH_ASAN -ne 0 ]; then print_info "With ASAN: yes"; fi +printf "\n${GREEN}brpc is now configured. You can build it with 'make'.${NC}\n" From 866b3e40591c8103167949a62d58ad07db779135 Mon Sep 17 00:00:00 2001 From: darion-yaphet Date: Wed, 8 Apr 2026 20:01:58 +0800 Subject: [PATCH 52/84] docs(readme): update READMEs and add English doc placeholders (#3263) - Fix outdated Travis CI badge in README_cn.md to GitHub Actions - Unify all doc links in README.md to point to docs/en/ directory - Add references for bthread tracer, coroutine, circuit breaker, RDMA, Bazel support - Create 39 English placeholder docs pointing to Chinese versions - Create Chinese placeholder for couchbase_example --- README.md | 77 ++++++++++++++------------ README_cn.md | 9 ++- docs/cn/couchbase_example.md | 5 ++ docs/en/auto_concurrency_limiter.md | 7 +++ docs/en/baidu_std.md | 7 +++ docs/en/benchmark.md | 7 +++ docs/en/benchmark_http.md | 7 +++ docs/en/bthread.md | 7 +++ docs/en/bthread_id.md | 7 +++ docs/en/bthread_or_not.md | 7 +++ docs/en/bthread_tagged_task_group.md | 7 +++ docs/en/bthread_tracer.md | 7 +++ docs/en/case_apicontrol.md | 7 +++ docs/en/case_baidu_dsp.md | 7 +++ docs/en/case_elf.md | 7 +++ docs/en/case_ubrpc.md | 7 +++ docs/en/circuit_breaker.md | 7 +++ docs/en/connections.md | 7 +++ docs/en/consistent_hashing.md | 7 +++ docs/en/contention_profiler.md | 7 +++ docs/en/coroutine.md | 7 +++ docs/en/cpu_profiler.md | 7 +++ docs/en/endpoint.md | 7 +++ docs/en/execution_queue.md | 7 +++ docs/en/flags.md | 7 +++ docs/en/flatmap.md | 7 +++ docs/en/heap_profiler.md | 7 +++ docs/en/json2pb.md | 7 +++ docs/en/load_balancing.md | 7 +++ docs/en/mbvar_c++.md | 7 +++ docs/en/memory_management.md | 7 +++ docs/en/nshead_service.md | 7 +++ docs/en/parallel_http.md | 7 +++ docs/en/rpc_press.md | 7 +++ docs/en/rpc_replay.md | 7 +++ docs/en/rpc_view.md | 7 +++ docs/en/rpcz.md | 7 +++ docs/en/sanitizers.md | 7 +++ docs/en/thread_local.md | 7 +++ docs/en/timeout_concurrency_limiter.md | 7 +++ docs/en/timer_keeping.md | 7 +++ docs/en/ub_client.md | 7 +++ 42 files changed, 328 insertions(+), 36 deletions(-) create mode 100644 docs/cn/couchbase_example.md create mode 100644 docs/en/auto_concurrency_limiter.md create mode 100644 docs/en/baidu_std.md create mode 100644 docs/en/benchmark.md create mode 100644 docs/en/benchmark_http.md create mode 100644 docs/en/bthread.md create mode 100644 docs/en/bthread_id.md create mode 100644 docs/en/bthread_or_not.md create mode 100644 docs/en/bthread_tagged_task_group.md create mode 100644 docs/en/bthread_tracer.md create mode 100644 docs/en/case_apicontrol.md create mode 100644 docs/en/case_baidu_dsp.md create mode 100644 docs/en/case_elf.md create mode 100644 docs/en/case_ubrpc.md create mode 100644 docs/en/circuit_breaker.md create mode 100644 docs/en/connections.md create mode 100644 docs/en/consistent_hashing.md create mode 100644 docs/en/contention_profiler.md create mode 100644 docs/en/coroutine.md create mode 100644 docs/en/cpu_profiler.md create mode 100644 docs/en/endpoint.md create mode 100644 docs/en/execution_queue.md create mode 100644 docs/en/flags.md create mode 100644 docs/en/flatmap.md create mode 100644 docs/en/heap_profiler.md create mode 100644 docs/en/json2pb.md create mode 100644 docs/en/load_balancing.md create mode 100644 docs/en/mbvar_c++.md create mode 100644 docs/en/memory_management.md create mode 100644 docs/en/nshead_service.md create mode 100644 docs/en/parallel_http.md create mode 100644 docs/en/rpc_press.md create mode 100644 docs/en/rpc_replay.md create mode 100644 docs/en/rpc_view.md create mode 100644 docs/en/rpcz.md create mode 100644 docs/en/sanitizers.md create mode 100644 docs/en/thread_local.md create mode 100644 docs/en/timeout_concurrency_limiter.md create mode 100644 docs/en/timer_keeping.md create mode 100644 docs/en/ub_client.md diff --git a/README.md b/README.md index fc778acf4b..1c4f78528b 100644 --- a/README.md +++ b/README.md @@ -18,34 +18,36 @@ You can use it to: * hadoop_rpc (may be opensourced) * [rdma](https://en.wikipedia.org/wiki/Remote_direct_memory_access) support * [thrift](docs/en/thrift.md) support, thread-safe, more friendly and performant than the official clients. - * all sorts of protocols used in Baidu: [baidu_std](docs/cn/baidu_std.md), [streaming_rpc](docs/en/streaming_rpc.md), hulu_pbrpc, [sofa_pbrpc](https://github.com/baidu/sofa-pbrpc), nova_pbrpc, public_pbrpc, ubrpc and nshead-based ones. + * all sorts of protocols used in Baidu: [baidu_std](docs/en/baidu_std.md), [streaming_rpc](docs/en/streaming_rpc.md), hulu_pbrpc, [sofa_pbrpc](https://github.com/baidu/sofa-pbrpc), nova_pbrpc, public_pbrpc, ubrpc and nshead-based ones. * Build [HA](https://en.wikipedia.org/wiki/High_availability) distributed services using an industrial-grade implementation of [RAFT consensus algorithm](https://raft.github.io) which is opensourced at [braft](https://github.com/brpc/braft) * Servers can handle requests [synchronously](docs/en/server.md) or [asynchronously](docs/en/server.md#asynchronous-service). * Clients can access servers [synchronously](docs/en/client.md#synchronus-call), [asynchronously](docs/en/client.md#asynchronous-call), [semi-synchronously](docs/en/client.md#semi-synchronous-call), or use [combo channels](docs/en/combo_channel.md) to simplify sharded or parallel accesses declaratively. -* Debug services [via http](docs/en/builtin_service.md), and run [cpu](docs/cn/cpu_profiler.md), [heap](docs/cn/heap_profiler.md) and [contention](docs/cn/contention_profiler.md) profilers. +* Debug services [via http](docs/en/builtin_service.md), and run [cpu](docs/en/cpu_profiler.md), [heap](docs/en/heap_profiler.md) and [contention](docs/en/contention_profiler.md) profilers. * Get [better latency and throughput](docs/en/overview.md#better-latency-and-throughput). -* [Extend bRPC](docs/en/new_protocol.md) with the protocols used in your organization quickly, or customize components, including [naming services](docs/cn/load_balancing.md#命名服务) (dns, zk, etcd), [load balancers](docs/cn/load_balancing.md#负载均衡) (rr, random, consistent hashing) +* [Extend bRPC](docs/en/new_protocol.md) with the protocols used in your organization quickly, or customize components, including [naming services](docs/en/load_balancing.md) (dns, zk, etcd), [load balancers](docs/en/load_balancing.md) (rr, random, consistent hashing) # Try it! * Read [overview](docs/en/overview.md) to know where bRPC can be used and its advantages. -* Read [getting started](docs/cn/getting_started.md) for building steps and play with [examples](https://github.com/apache/brpc/tree/master/example/). +* Read [getting started](docs/en/getting_started.md) for building steps and play with [examples](https://github.com/apache/brpc/tree/master/example/). * Docs: - * [Performance benchmark](docs/cn/benchmark.md) + * [Performance benchmark](docs/en/benchmark.md) * [bvar](docs/en/bvar.md) - * [bvar_c++](docs/cn/bvar_c++.md) - * [bthread](docs/cn/bthread.md) - * [bthread or not](docs/cn/bthread_or_not.md) - * [thread-local](docs/cn/thread_local.md) - * [Execution Queue](docs/cn/execution_queue.md) + * [bvar_c++](docs/en/bvar_c++.md) + * [bthread](docs/en/bthread.md) + * [bthread or not](docs/en/bthread_or_not.md) + * [thread-local](docs/en/thread_local.md) + * [Execution Queue](docs/en/execution_queue.md) + * [bthread tracer](docs/en/bthread_tracer.md) + * [bthread tagged task group](docs/en/bthread_tagged_task_group.md) * Client * [Basics](docs/en/client.md) * [Error code](docs/en/error_code.md) * [Combo channels](docs/en/combo_channel.md) * [Access http/h2](docs/en/http_client.md) * [Access gRPC](docs/en/http_derivatives.md#h2grpc) - * [Access thrift](docs/en/thrift.md#client-accesses-thrift-server) - * [Access UB](docs/cn/ub_client.md) + * [Access thrift](docs/en/thrift.md#client-accesses-thrift-server) + * [Access UB](docs/en/ub_client.md) * [Streaming RPC](docs/en/streaming_rpc.md) * [Access redis](docs/en/redis_client.md) * [Access memcached](docs/en/memcache_client.md) @@ -56,32 +58,37 @@ You can use it to: * [Serve http/h2](docs/en/http_service.md) * [Serve gRPC](docs/en/http_derivatives.md#h2grpc) * [Serve thrift](docs/en/thrift.md#server-processes-thrift-requests) - * [Serve Nshead](docs/cn/nshead_service.md) - * [Debug server issues](docs/cn/server_debugging.md) + * [Serve Nshead](docs/en/nshead_service.md) + * [Debug server issues](docs/en/server_debugging.md) * [Server push](docs/en/server_push.md) - * [Avalanche](docs/cn/avalanche.md) - * [Auto ConcurrencyLimiter](docs/cn/auto_concurrency_limiter.md) + * [Avalanche](docs/en/avalanche.md) + * [Auto ConcurrencyLimiter](docs/en/auto_concurrency_limiter.md) * [Media Server](https://github.com/brpc/media-server) - * [json2pb](docs/cn/json2pb.md) + * [json2pb](docs/en/json2pb.md) * [Builtin Services](docs/en/builtin_service.md) * [status](docs/en/status.md) * [vars](docs/en/vars.md) - * [connections](docs/cn/connections.md) - * [flags](docs/cn/flags.md) - * [rpcz](docs/cn/rpcz.md) - * [cpu_profiler](docs/cn/cpu_profiler.md) - * [heap_profiler](docs/cn/heap_profiler.md) - * [contention_profiler](docs/cn/contention_profiler.md) + * [connections](docs/en/connections.md) + * [flags](docs/en/flags.md) + * [rpcz](docs/en/rpcz.md) + * [cpu_profiler](docs/en/cpu_profiler.md) + * [heap_profiler](docs/en/heap_profiler.md) + * [contention_profiler](docs/en/contention_profiler.md) * Tools - * [rpc_press](docs/cn/rpc_press.md) - * [rpc_replay](docs/cn/rpc_replay.md) - * [rpc_view](docs/cn/rpc_view.md) - * [benchmark_http](docs/cn/benchmark_http.md) - * [parallel_http](docs/cn/parallel_http.md) + * [rpc_press](docs/en/rpc_press.md) + * [rpc_replay](docs/en/rpc_replay.md) + * [rpc_view](docs/en/rpc_view.md) + * [benchmark_http](docs/en/benchmark_http.md) + * [parallel_http](docs/en/parallel_http.md) * Others * [IOBuf](docs/en/iobuf.md) * [Streaming Log](docs/en/streaming_log.md) - * [FlatMap](docs/cn/flatmap.md) + * [FlatMap](docs/en/flatmap.md) + * [Coroutine](docs/en/coroutine.md) + * [Circuit Breaker](docs/en/circuit_breaker.md) + * [RDMA](docs/en/rdma.md) + * [Bazel Support](docs/en/bazel_support.md) + * [Wireshark baidu_std dissector plugin](docs/en/wireshark_baidu_std.md) * [bRPC introduction](docs/cn/brpc_intro.pptx)(training material) * [A tutorial on building large-scale services](docs/en/tutorial_on_building_services.pptx)(training material) * [bRPC internal](docs/en/brpc_internal.pptx)(training material) @@ -90,12 +97,12 @@ You can use it to: * [Atomic instructions](docs/en/atomic_instructions.md) * [IO](docs/en/io.md) * [Threading Overview](docs/en/threading_overview.md) - * [Load Balancing](docs/cn/load_balancing.md) - * [Locality-aware](docs/cn/lalb.md) - * [Consistent Hashing](docs/cn/consistent_hashing.md) - * [Memory Management](docs/cn/memory_management.md) - * [Timer keeping](docs/cn/timer_keeping.md) - * [bthread_id](docs/cn/bthread_id.md) + * [Load Balancing](docs/en/load_balancing.md) + * [Locality-aware](docs/en/lalb.md) + * [Consistent Hashing](docs/en/consistent_hashing.md) + * [Memory Management](docs/en/memory_management.md) + * [Timer keeping](docs/en/timer_keeping.md) + * [bthread_id](docs/en/bthread_id.md) * Use cases * [User cases](community/cases.md) diff --git a/README_cn.md b/README_cn.md index bed6e8437f..6413f83fde 100644 --- a/README_cn.md +++ b/README_cn.md @@ -1,6 +1,7 @@ [English version](README.md) -[![Build Status](https://api.travis-ci.com/apache/brpc.svg?branch=master)](https://travis-ci.com/github/apache/brpc) +[![Linux Build Status](https://github.com/apache/brpc/actions/workflows/ci-linux.yml/badge.svg)](https://github.com/apache/brpc/actions/workflows/ci-linux.yml) +[![MacOs Build Status](https://github.com/apache/brpc/actions/workflows/ci-macos.yml/badge.svg)](https://github.com/apache/brpc/actions/workflows/ci-macos.yml) ![brpc logo (light)](docs/images/logo.png#gh-light-mode-only) ![brpc logo (dark)](docs/images/logo-white.png#gh-dark-mode-only) @@ -39,6 +40,7 @@ * [thread-local](docs/cn/thread_local.md) * [Execution Queue](docs/cn/execution_queue.md) * [bthread tracer](docs/cn/bthread_tracer.md) + * [bthread tagged task group](docs/cn/bthread_tagged_task_group.md) * Client * [基础功能](docs/cn/client.md) * [错误码](docs/cn/error_code.md) @@ -83,6 +85,11 @@ * [IOBuf](docs/cn/iobuf.md) * [Streaming Log](docs/cn/streaming_log.md) * [FlatMap](docs/cn/flatmap.md) + * [协程](docs/cn/coroutine.md) + * [熔断](docs/cn/circuit_breaker.md) + * [RDMA](docs/cn/rdma.md) + * [Bazel构建支持](docs/cn/bazel_support.md) + * [Wireshark baidu_std协议解析插件](docs/cn/wireshark_baidu_std.md) * [bRPC外功修炼宝典](docs/cn/brpc_intro.pptx)(培训材料) * [搭建大型服务入门](docs/en/tutorial_on_building_services.pptx)(培训材料) * [bRPC内功修炼宝典](docs/en/brpc_internal.pptx)(培训材料) diff --git a/docs/cn/couchbase_example.md b/docs/cn/couchbase_example.md new file mode 100644 index 0000000000..f7ed6d7733 --- /dev/null +++ b/docs/cn/couchbase_example.md @@ -0,0 +1,5 @@ +# Couchbase 示例 + +本文档尚未翻译为中文。 + +请参阅[英文版](../en/couchbase_example.md)获取完整内容。 diff --git a/docs/en/auto_concurrency_limiter.md b/docs/en/auto_concurrency_limiter.md new file mode 100644 index 0000000000..deaaa5885d --- /dev/null +++ b/docs/en/auto_concurrency_limiter.md @@ -0,0 +1,7 @@ +# auto concurrency limiter + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/auto_concurrency_limiter.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/baidu_std.md b/docs/en/baidu_std.md new file mode 100644 index 0000000000..c3fcc1f396 --- /dev/null +++ b/docs/en/baidu_std.md @@ -0,0 +1,7 @@ +# uaidu std + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/baidu_std.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/benchmark.md b/docs/en/benchmark.md new file mode 100644 index 0000000000..1c5cbe7ffa --- /dev/null +++ b/docs/en/benchmark.md @@ -0,0 +1,7 @@ +# uenchmark + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/benchmark.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/benchmark_http.md b/docs/en/benchmark_http.md new file mode 100644 index 0000000000..905bb7e74d --- /dev/null +++ b/docs/en/benchmark_http.md @@ -0,0 +1,7 @@ +# uenchmark http + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/benchmark_http.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/bthread.md b/docs/en/bthread.md new file mode 100644 index 0000000000..7c627f1b5b --- /dev/null +++ b/docs/en/bthread.md @@ -0,0 +1,7 @@ +# uthread + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/bthread.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/bthread_id.md b/docs/en/bthread_id.md new file mode 100644 index 0000000000..ff67747ff0 --- /dev/null +++ b/docs/en/bthread_id.md @@ -0,0 +1,7 @@ +# uthread id + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/bthread_id.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/bthread_or_not.md b/docs/en/bthread_or_not.md new file mode 100644 index 0000000000..2450c3c04a --- /dev/null +++ b/docs/en/bthread_or_not.md @@ -0,0 +1,7 @@ +# uthread or not + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/bthread_or_not.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/bthread_tagged_task_group.md b/docs/en/bthread_tagged_task_group.md new file mode 100644 index 0000000000..b509b31e33 --- /dev/null +++ b/docs/en/bthread_tagged_task_group.md @@ -0,0 +1,7 @@ +# uthread tagged task group + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/bthread_tagged_task_group.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/bthread_tracer.md b/docs/en/bthread_tracer.md new file mode 100644 index 0000000000..6df0d8d742 --- /dev/null +++ b/docs/en/bthread_tracer.md @@ -0,0 +1,7 @@ +# uthread tracer + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/bthread_tracer.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/case_apicontrol.md b/docs/en/case_apicontrol.md new file mode 100644 index 0000000000..3a9fcf928e --- /dev/null +++ b/docs/en/case_apicontrol.md @@ -0,0 +1,7 @@ +# case apicontrol + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/case_apicontrol.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/case_baidu_dsp.md b/docs/en/case_baidu_dsp.md new file mode 100644 index 0000000000..1b47fd3df8 --- /dev/null +++ b/docs/en/case_baidu_dsp.md @@ -0,0 +1,7 @@ +# case uaidu dsp + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/case_baidu_dsp.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/case_elf.md b/docs/en/case_elf.md new file mode 100644 index 0000000000..5f710a7383 --- /dev/null +++ b/docs/en/case_elf.md @@ -0,0 +1,7 @@ +# case elf + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/case_elf.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/case_ubrpc.md b/docs/en/case_ubrpc.md new file mode 100644 index 0000000000..bd1705d1ef --- /dev/null +++ b/docs/en/case_ubrpc.md @@ -0,0 +1,7 @@ +# case uurpc + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/case_ubrpc.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/circuit_breaker.md b/docs/en/circuit_breaker.md new file mode 100644 index 0000000000..a541f1128a --- /dev/null +++ b/docs/en/circuit_breaker.md @@ -0,0 +1,7 @@ +# circuit ureaker + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/circuit_breaker.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/connections.md b/docs/en/connections.md new file mode 100644 index 0000000000..24ab0c6e77 --- /dev/null +++ b/docs/en/connections.md @@ -0,0 +1,7 @@ +# connections + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/connections.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/consistent_hashing.md b/docs/en/consistent_hashing.md new file mode 100644 index 0000000000..3fff6ced33 --- /dev/null +++ b/docs/en/consistent_hashing.md @@ -0,0 +1,7 @@ +# consistent hashing + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/consistent_hashing.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/contention_profiler.md b/docs/en/contention_profiler.md new file mode 100644 index 0000000000..a393b2b190 --- /dev/null +++ b/docs/en/contention_profiler.md @@ -0,0 +1,7 @@ +# contention profiler + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/contention_profiler.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/coroutine.md b/docs/en/coroutine.md new file mode 100644 index 0000000000..a53414c8c0 --- /dev/null +++ b/docs/en/coroutine.md @@ -0,0 +1,7 @@ +# coroutine + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/coroutine.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/cpu_profiler.md b/docs/en/cpu_profiler.md new file mode 100644 index 0000000000..2be417a87b --- /dev/null +++ b/docs/en/cpu_profiler.md @@ -0,0 +1,7 @@ +# cpu profiler + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/cpu_profiler.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/endpoint.md b/docs/en/endpoint.md new file mode 100644 index 0000000000..ad10963d42 --- /dev/null +++ b/docs/en/endpoint.md @@ -0,0 +1,7 @@ +# endpoint + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/endpoint.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/execution_queue.md b/docs/en/execution_queue.md new file mode 100644 index 0000000000..194251f15a --- /dev/null +++ b/docs/en/execution_queue.md @@ -0,0 +1,7 @@ +# execution queue + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/execution_queue.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/flags.md b/docs/en/flags.md new file mode 100644 index 0000000000..74ea5b446c --- /dev/null +++ b/docs/en/flags.md @@ -0,0 +1,7 @@ +# flags + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/flags.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/flatmap.md b/docs/en/flatmap.md new file mode 100644 index 0000000000..256fb2bd40 --- /dev/null +++ b/docs/en/flatmap.md @@ -0,0 +1,7 @@ +# flatmap + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/flatmap.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/heap_profiler.md b/docs/en/heap_profiler.md new file mode 100644 index 0000000000..76eeaeb59c --- /dev/null +++ b/docs/en/heap_profiler.md @@ -0,0 +1,7 @@ +# heap profiler + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/heap_profiler.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/json2pb.md b/docs/en/json2pb.md new file mode 100644 index 0000000000..fe29483a02 --- /dev/null +++ b/docs/en/json2pb.md @@ -0,0 +1,7 @@ +# json2pb + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/json2pb.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/load_balancing.md b/docs/en/load_balancing.md new file mode 100644 index 0000000000..4c975da9e5 --- /dev/null +++ b/docs/en/load_balancing.md @@ -0,0 +1,7 @@ +# load ualancing + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/load_balancing.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/mbvar_c++.md b/docs/en/mbvar_c++.md new file mode 100644 index 0000000000..f31d86c6d7 --- /dev/null +++ b/docs/en/mbvar_c++.md @@ -0,0 +1,7 @@ +# muvar c++ + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/mbvar_c++.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/memory_management.md b/docs/en/memory_management.md new file mode 100644 index 0000000000..a0a7c4d114 --- /dev/null +++ b/docs/en/memory_management.md @@ -0,0 +1,7 @@ +# memory management + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/memory_management.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/nshead_service.md b/docs/en/nshead_service.md new file mode 100644 index 0000000000..54651c2a33 --- /dev/null +++ b/docs/en/nshead_service.md @@ -0,0 +1,7 @@ +# nshead service + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/nshead_service.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/parallel_http.md b/docs/en/parallel_http.md new file mode 100644 index 0000000000..1448d6ba7f --- /dev/null +++ b/docs/en/parallel_http.md @@ -0,0 +1,7 @@ +# parallel http + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/parallel_http.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/rpc_press.md b/docs/en/rpc_press.md new file mode 100644 index 0000000000..69e0391bff --- /dev/null +++ b/docs/en/rpc_press.md @@ -0,0 +1,7 @@ +# rpc press + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/rpc_press.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/rpc_replay.md b/docs/en/rpc_replay.md new file mode 100644 index 0000000000..e592738916 --- /dev/null +++ b/docs/en/rpc_replay.md @@ -0,0 +1,7 @@ +# rpc replay + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/rpc_replay.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/rpc_view.md b/docs/en/rpc_view.md new file mode 100644 index 0000000000..31c59317a0 --- /dev/null +++ b/docs/en/rpc_view.md @@ -0,0 +1,7 @@ +# rpc view + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/rpc_view.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/rpcz.md b/docs/en/rpcz.md new file mode 100644 index 0000000000..89e6abc532 --- /dev/null +++ b/docs/en/rpcz.md @@ -0,0 +1,7 @@ +# rpcz + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/rpcz.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/sanitizers.md b/docs/en/sanitizers.md new file mode 100644 index 0000000000..f7ef588b73 --- /dev/null +++ b/docs/en/sanitizers.md @@ -0,0 +1,7 @@ +# sanitizers + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/sanitizers.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/thread_local.md b/docs/en/thread_local.md new file mode 100644 index 0000000000..e817ff696d --- /dev/null +++ b/docs/en/thread_local.md @@ -0,0 +1,7 @@ +# thread local + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/thread_local.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/timeout_concurrency_limiter.md b/docs/en/timeout_concurrency_limiter.md new file mode 100644 index 0000000000..e592df9da0 --- /dev/null +++ b/docs/en/timeout_concurrency_limiter.md @@ -0,0 +1,7 @@ +# timeout concurrency limiter + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/timeout_concurrency_limiter.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/timer_keeping.md b/docs/en/timer_keeping.md new file mode 100644 index 0000000000..f9bb8c79c6 --- /dev/null +++ b/docs/en/timer_keeping.md @@ -0,0 +1,7 @@ +# timer keeping + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/timer_keeping.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. diff --git a/docs/en/ub_client.md b/docs/en/ub_client.md new file mode 100644 index 0000000000..670a586d3b --- /dev/null +++ b/docs/en/ub_client.md @@ -0,0 +1,7 @@ +# uu client + +This document has not yet been translated into English. + +Please refer to the [Chinese version](../cn/ub_client.md) for the full content. + +Contributions to translate this document are welcome. See [TRANSLATING](TRANSLATING) for guidelines. From 86db9aeeee84331490fefbcc9a2e62534896fac4 Mon Sep 17 00:00:00 2001 From: Bright Chen Date: Fri, 10 Apr 2026 13:38:38 +0800 Subject: [PATCH 53/84] Fix use after free issue of EventDispatcher LatencyRecorder (#3267) --- src/brpc/event_dispatcher.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/brpc/event_dispatcher.cpp b/src/brpc/event_dispatcher.cpp index a5265b8ccd..d4316beff2 100644 --- a/src/brpc/event_dispatcher.cpp +++ b/src/brpc/event_dispatcher.cpp @@ -54,13 +54,11 @@ static void StopAndJoinGlobalDispatchers() { g_edisp[i * FLAGS_event_dispatcher_num + j].Join(); } } - delete g_edisp_read_lantency; - delete g_edisp_write_lantency; } void InitializeGlobalDispatchers() { - g_edisp_read_lantency = new bvar::LatencyRecorder("event_dispatcher_read_latency"); - g_edisp_write_lantency = new bvar::LatencyRecorder("event_dispatcher_write_latency"); + g_edisp_read_lantency = new bvar::LatencyRecorder("event_dispatcher_read"); + g_edisp_write_lantency = new bvar::LatencyRecorder("event_dispatcher_write"); g_edisp = new EventDispatcher[FLAGS_task_group_ntags * FLAGS_event_dispatcher_num]; for (int i = 0; i < FLAGS_task_group_ntags; ++i) { From e2f882dae582267a6a9b6bc369cbaf5ddd962cc8 Mon Sep 17 00:00:00 2001 From: zchuango Date: Mon, 13 Apr 2026 16:00:41 +0800 Subject: [PATCH 54/84] add ubring transport --- CMakeLists.txt | 22 +- src/brpc/input_messenger.cpp | 2 +- src/brpc/input_messenger.h | 1 + src/brpc/rdma_transport.cpp | 2 +- src/brpc/socket.h | 5 +- src/brpc/socket_mode.h | 3 +- src/brpc/transport_factory.cpp | 11 + src/brpc/ub/common/common.h | 163 +++ src/brpc/ub/common/thread_lock.h | 105 ++ src/brpc/ub/rack_mem/declare_shm_ubs.h.tmpl | 40 + src/brpc/ub/rack_mem/ubs_mem.h | 197 ++++ src/brpc/ub/rack_mem/ubs_mem_def.h | 150 +++ src/brpc/ub/rack_mem/ubshmem_stub.cpp | 91 ++ src/brpc/ub/shm/shm_def.h | 46 + src/brpc/ub/shm/shm_ipc.cpp | 171 +++ src/brpc/ub/shm/shm_ipc.h | 21 + src/brpc/ub/shm/shm_mgr.cpp | 241 +++++ src/brpc/ub/shm/shm_mgr.h | 34 + src/brpc/ub/shm/shm_ubs.cpp | 544 ++++++++++ src/brpc/ub/shm/shm_ubs.h | 34 + src/brpc/ub/timer/timer_mgr.cpp | 367 +++++++ src/brpc/ub/timer/timer_mgr.h | 41 + src/brpc/ub/ub_endpoint.cpp | 878 ++++++++++++++++ src/brpc/ub/ub_endpoint.h | 221 ++++ src/brpc/ub/ub_helper.cpp | 120 +++ src/brpc/ub/ub_helper.h | 49 + src/brpc/ub/ub_ring.cpp | 1031 +++++++++++++++++++ src/brpc/ub/ub_ring.h | 185 ++++ src/brpc/ub/ub_ring_manager.cpp | 246 +++++ src/brpc/ub/ub_ring_manager.h | 75 ++ src/brpc/ub/ubr_msg.h | 40 + src/brpc/ub/ubr_trx.h | 149 +++ src/brpc/ub_transport.cpp | 224 ++++ src/brpc/ub_transport.h | 51 + src/butil/iobuf.cpp | 58 ++ src/butil/iobuf.h | 3 + 36 files changed, 5616 insertions(+), 5 deletions(-) create mode 100644 src/brpc/ub/common/common.h create mode 100644 src/brpc/ub/common/thread_lock.h create mode 100644 src/brpc/ub/rack_mem/declare_shm_ubs.h.tmpl create mode 100644 src/brpc/ub/rack_mem/ubs_mem.h create mode 100644 src/brpc/ub/rack_mem/ubs_mem_def.h create mode 100644 src/brpc/ub/rack_mem/ubshmem_stub.cpp create mode 100644 src/brpc/ub/shm/shm_def.h create mode 100644 src/brpc/ub/shm/shm_ipc.cpp create mode 100644 src/brpc/ub/shm/shm_ipc.h create mode 100644 src/brpc/ub/shm/shm_mgr.cpp create mode 100644 src/brpc/ub/shm/shm_mgr.h create mode 100644 src/brpc/ub/shm/shm_ubs.cpp create mode 100644 src/brpc/ub/shm/shm_ubs.h create mode 100644 src/brpc/ub/timer/timer_mgr.cpp create mode 100644 src/brpc/ub/timer/timer_mgr.h create mode 100644 src/brpc/ub/ub_endpoint.cpp create mode 100644 src/brpc/ub/ub_endpoint.h create mode 100644 src/brpc/ub/ub_helper.cpp create mode 100644 src/brpc/ub/ub_helper.h create mode 100644 src/brpc/ub/ub_ring.cpp create mode 100644 src/brpc/ub/ub_ring.h create mode 100644 src/brpc/ub/ub_ring_manager.cpp create mode 100644 src/brpc/ub/ub_ring_manager.h create mode 100644 src/brpc/ub/ubr_msg.h create mode 100644 src/brpc/ub/ubr_trx.h create mode 100644 src/brpc/ub_transport.cpp create mode 100644 src/brpc/ub_transport.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 77703a4661..d84ba69343 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,6 +27,7 @@ option(WITH_THRIFT "With thrift framed protocol supported" OFF) option(WITH_BTHREAD_TRACER "With bthread tracer supported" OFF) option(WITH_SNAPPY "With snappy" OFF) option(WITH_RDMA "With RDMA" OFF) +option(WITH_UBRING "With UB" OFF) option(WITH_DEBUG_BTHREAD_SCHE_SAFETY "With debugging bthread sche safety" OFF) option(WITH_DEBUG_LOCK "With debugging lock" OFF) option(WITH_ASAN "With AddressSanitizer" OFF) @@ -104,6 +105,11 @@ if(WITH_RDMA) set(WITH_RDMA_VAL "1") endif() +set(WITH_UBRING_VAL "0") +if(WITH_UBRING) + set(WITH_UBRING_VAL "1") +endif() + set(WITH_DEBUG_BTHREAD_SCHE_SAFETY_VAL "0") if(WITH_DEBUG_BTHREAD_SCHE_SAFETY) set(WITH_DEBUG_BTHREAD_SCHE_SAFETY_VAL "1") @@ -136,7 +142,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} -Wno-deprecated-declarations -Wno-inconsistent-missing-override") endif() -set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} ${DEFINE_CLOCK_GETTIME} -DBRPC_WITH_GLOG=${WITH_GLOG_VAL} -DBRPC_WITH_RDMA=${WITH_RDMA_VAL} -DBRPC_DEBUG_BTHREAD_SCHE_SAFETY=${WITH_DEBUG_BTHREAD_SCHE_SAFETY_VAL} -DBRPC_DEBUG_LOCK=${WITH_DEBUG_LOCK_VAL}") +set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} ${DEFINE_CLOCK_GETTIME} -DBRPC_WITH_GLOG=${WITH_GLOG_VAL} -DBRPC_WITH_RDMA=${WITH_RDMA_VAL} -DBRPC_WITH_UBRING=${WITH_UBRING_VAL} -DBRPC_DEBUG_BTHREAD_SCHE_SAFETY=${WITH_DEBUG_BTHREAD_SCHE_SAFETY_VAL} -DBRPC_DEBUG_LOCK=${WITH_DEBUG_LOCK_VAL}") if (WITH_ASAN) set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} -fsanitize=address") set(CMAKE_C_FLAGS "${CMAKE_CPP_FLAGS} -fsanitize=address") @@ -270,6 +276,15 @@ if(WITH_RDMA) endif() endif() +if(WITH_UBRING) + message("brpc compile with ub") + # find_path(RDMA_INCLUDE_PATH NAMES infiniband/verbs.h) + # find_library(UB_LIB NAMES ibverbs) + # if((NOT RDMA_INCLUDE_PATH) OR (NOT UB_LIB)) + # message(FATAL_ERROR "Fail to find ibverbs") + # endif() +endif() + find_library(PROTOC_LIB NAMES protoc) if(NOT PROTOC_LIB) message(FATAL_ERROR "Fail to find protoc lib") @@ -322,6 +337,10 @@ if(WITH_RDMA) list(APPEND DYNAMIC_LIB ${RDMA_LIB}) endif() +if(WITH_UBRING) + list(APPEND DYNAMIC_LIB ${UB_LIB}) +endif() + set(BRPC_PRIVATE_LIBS "-lgflags -lprotobuf -lleveldb -lprotoc -lssl -lcrypto -ldl -lz") if(WITH_GLOG) @@ -564,6 +583,7 @@ set(SOURCES ${MCPACK2PB_SOURCES} ${BRPC_SOURCES} ${THRIFT_SOURCES} + ${BRPC_C_SOURCES} ) add_subdirectory(src) diff --git a/src/brpc/input_messenger.cpp b/src/brpc/input_messenger.cpp index c249cca22c..fa05423640 100644 --- a/src/brpc/input_messenger.cpp +++ b/src/brpc/input_messenger.cpp @@ -312,7 +312,7 @@ int InputMessenger::ProcessNewMessage( // not in the bthread where the polling bthread is located, because the // method for processing messages may call synchronization primitives, // causing the polling bthread to be scheduled out. - if (m->_socket_mode == SOCKET_MODE_RDMA) { + if (m->_socket_mode == SOCKET_MODE_RDMA || m->_socket_mode == SOCKET_MODE_UBRING) { m->_transport->QueueMessage(last_msg, &num_bthread_created, true); } if (num_bthread_created) { diff --git a/src/brpc/input_messenger.h b/src/brpc/input_messenger.h index 8482c3f3fc..2982996239 100644 --- a/src/brpc/input_messenger.h +++ b/src/brpc/input_messenger.h @@ -93,6 +93,7 @@ class InputMessenger : public SocketUser { friend class Socket; friend class TcpTransport; friend class rdma::RdmaEndpoint; +friend class ub::UBShmEndpoint; public: explicit InputMessenger(size_t capacity = 128); ~InputMessenger(); diff --git a/src/brpc/rdma_transport.cpp b/src/brpc/rdma_transport.cpp index 88d89a7b06..4cd93994e2 100644 --- a/src/brpc/rdma_transport.cpp +++ b/src/brpc/rdma_transport.cpp @@ -50,7 +50,7 @@ void RdmaTransport::Init(Socket *socket, const SocketOptions &options) { if (options.need_on_edge_trigger && _on_edge_trigger == NULL) { _on_edge_trigger = rdma::RdmaEndpoint::OnNewDataFromTcp; } - _tcp_transport = std::make_shared(); + _tcp_transport = std::unique_ptr(); _tcp_transport->Init(socket, options); } diff --git a/src/brpc/socket.h b/src/brpc/socket.h index 816fccdf27..0ff2b8a5d0 100644 --- a/src/brpc/socket.h +++ b/src/brpc/socket.h @@ -57,7 +57,10 @@ namespace rdma { class RdmaEndpoint; class RdmaConnect; } - +namespace ub { + class UBShmEndpoint; + class UBConnect; +} class Socket; class AuthContext; class EventDispatcher; diff --git a/src/brpc/socket_mode.h b/src/brpc/socket_mode.h index b5d42be4aa..b4ac7dfbca 100644 --- a/src/brpc/socket_mode.h +++ b/src/brpc/socket_mode.h @@ -20,7 +20,8 @@ namespace brpc { enum SocketMode { SOCKET_MODE_TCP = 0, - SOCKET_MODE_RDMA = 1 + SOCKET_MODE_RDMA = 1, + SOCKET_MODE_UBRING = 2 }; } // namespace brpc #endif //BRPC_SOCKET_MODE_H \ No newline at end of file diff --git a/src/brpc/transport_factory.cpp b/src/brpc/transport_factory.cpp index b689e2edd2..0dfb55e6b8 100644 --- a/src/brpc/transport_factory.cpp +++ b/src/brpc/transport_factory.cpp @@ -18,6 +18,7 @@ #include "brpc/transport_factory.h" #include "brpc/tcp_transport.h" #include "brpc/rdma_transport.h" +#include "brpc/ub_transport.h" namespace brpc { int TransportFactory::ContextInitOrDie(SocketMode mode, bool serverOrNot, const void* _options) { @@ -28,6 +29,11 @@ int TransportFactory::ContextInitOrDie(SocketMode mode, bool serverOrNot, const else if (mode == SOCKET_MODE_RDMA) { return RdmaTransport::ContextInitOrDie(serverOrNot, _options); } +#endif +#if BRPC_WITH_UBRING + else if (mode == SOCKET_MODE_UBRING) { + return UBShmTransport::ContextInitOrDie(serverOrNot, _options); + } #endif else { LOG(ERROR) << "unknown transport type " << mode; @@ -43,6 +49,11 @@ std::unique_ptr TransportFactory::CreateTransport(SocketMode mode) { else if (mode == SOCKET_MODE_RDMA) { return std::unique_ptr(new RdmaTransport()); } +#endif +#if BRPC_WITH_UBRING + else if (mode == SOCKET_MODE_UBRING) { + return std::unique_ptr(new UBShmTransport()); + } #endif else { LOG(ERROR) << "socket_mode set error"; diff --git a/src/brpc/ub/common/common.h b/src/brpc/ub/common/common.h new file mode 100644 index 0000000000..bbb87ff45e --- /dev/null +++ b/src/brpc/ub/common/common.h @@ -0,0 +1,163 @@ +// +// Created by z00926396 on 2026/4/11. +// + +#ifndef BRPC_COMMON_H +#define BRPC_COMMON_H +#include +#include +#include +#include +#include "butil/logging.h" + +#define LIKELY(x) __builtin_expect(!!(x), 1) +#define UNLIKELY(x) __builtin_expect(!!(x), 0) + +#ifndef UNREFERENCE_PARAM +#define UNREFERENCE_PARAM(x) ((void)(x)) +#endif + +#ifdef UT +#define STATIC +#define INLINE +#define HLC_STATISTICS_PATH ROOT_PATH "/hlc/run" +#else +#define STATIC static +#define INLINE inline +#define HLC_STATISTICS_PATH "/opt/hlc/run" +#endif + +#ifdef __cplusplus +#include +using AtomicInt = std::atomic; +using AtomicBool = std::atomic; +using AtomicUintFast64 = std::atomic; +using AtomicUintFast8 = std::atomic; +#define ATOMIC_INIT(var, value) var.store(value) +#define ATOMIC_STORE(var, value) var.store(value) +#define ATOMIC_LOAD(var) var.load() +#define ATOMIC_ADD(var, value) var.fetch_add(value) +#define ATOMIC_SUB(var, value) var.fetch_sub(value) +#define ATOMIC_COMPARE_EXCHANGE_STRONG(var, expected, desired) var.compare_exchange_strong((expected), (desired)) +#else +#include +typedef atomic_int AtomicInt; +typedef atomic_bool AtomicBool; +typedef atomic_uint_fast64_t AtomicUintFast64; +typedef atomic_uint_fast8_t AtomicUintFast8; +#define ATOMIC_INIT(var, value) atomic_init(&(var), value) +#define ATOMIC_STORE(var, value) atomic_store(&(var), value) +#define ATOMIC_LOAD(var) atomic_load(&(var)) +#define ATOMIC_ADD(var, value) atomic_fetch_add(&(var), value) +#define ATOMIC_SUB(var, value) atomic_fetch_sub(&(var), value) +#define ATOMIC_COMPARE_EXCHANGE_STRONG(var, expected, desired) \ + atomic_compare_exchange_strong(&(var), &(expected), (desired)) +#endif +#define ISB() __asm__ __volatile__("isb" ::: "memory") +#define DSB() __asm__ __volatile__("dsb sy" ::: "memory") + +#ifndef errno_t +typedef int errno_t; +#endif +#ifndef EOK +#define EOK 0 +#endif +#define MAX_NODE_NUM 8 +#define IPV4_FIRST_BYTE_OFFSET 24 +#define COPY_ALIGNED_DATA_BYTES 64 +static inline int Copy64Byte(int8_t *dst, int8_t *src) +{ +#ifdef LS64 + asm volatile ( + "mov x12, %0\n" + "mov x13, %1\n" + "ldr x4, [x12]\n" + "ldr x5, [x12, #8]\n" + "ldr x6, [x12, #16]\n" + "ldr x7, [x12, #24]\n" + "ldr x8, [x12, #32]\n" + "ldr x9, [x12, #40]\n" + "ldr x10, [x12, #48]\n" + "ldr x11, [x12, #56]\n" + "ST64B x4, [x13]\n" + : + : "r" (src), "r" (dst) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13" + ); + return EOK; +#else + memcpy(dst, src, COPY_ALIGNED_DATA_BYTES); + return EOK; +#endif +} + +#define SEC_TO_NSEC 1000000000 +#define MSEC_TO_NSEC 1000000 +#define USEC_TO_NSEC 1000 +#define MSEC_TO_SEC 1000 +#define MAX_IP_PORT_STR_LEN 23 +#define DECIMAL_BASE 10 +static inline uint64_t GetCurNanoSeconds(void) +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + uint64_t timestamp = (uint64_t)ts.tv_sec * SEC_TO_NSEC + (uint64_t)ts.tv_nsec; + return timestamp; +} + +#define FREE_PTR(ptr) \ + do { \ + if ((ptr) != NULL) { \ + free(ptr); \ + (ptr) = NULL; \ + } \ + } while (0) + +typedef enum { + HLC_OK = 0, + HLC_ERR = -1, + HLC_RETRY = -2, + HLC_REENTRY = -3, + HLC_ERR_TIMEOUT = -4, + // SHM Module + SHM_ERR = -100, + SHM_ERR_INPUT_INVALID = -101, + SHM_ERR_EXIST = -102, + SHM_ERR_RESOURCE_ATTACHED = -103, + SHM_ERR_NOT_FOUND = -104, + SHM_ERR_UBSM_NET_ERR = -105, + + // MPA模块 + MPA_UDP_ERR = -200, + MPA_UDP_NO_TRX = -201, + MPA_UDP_STATUS_NOT_JOINED = -202, + MPA_MUXER_NOT_READY = -203, + MPA_PORT_FULL = -204, + MPA_PORT_OUTRANGE = -205, + MPA_PORT_TAKEN = -206, + MPA_UDP_STATUS_NOT_CONNECTED = -207, + MPA_UDP_STATUS_ALREADY_CONNECTED = -208, + MPA_UDP_OLD_RDLIST = -209, + MPA_UDP_RDLIST_FULL = -210, + // ubr模块 + UBR_NOT_CONNECTED = -300, + UBR_ERR_ADDR_IN_USE = -301, +} RETURN_CODE; + +#define ALIGN_BYTES 0x40 +#define CHECKED_ALIGN_BITS (ALIGN_BYTES - 1) +static inline size_t Aligned64Offset(uint8_t *addr) +{ + return ((ALIGN_BYTES - (((size_t)(addr)) & CHECKED_ALIGN_BITS)) & CHECKED_ALIGN_BITS); +} + +static inline RETURN_CODE HasTimedOut(const uint64_t startTime, const uint32_t timeout) +{ + uint64_t endTime = startTime + (uint64_t)timeout * SEC_TO_NSEC; + if (GetCurNanoSeconds() > endTime) { + LOG(ERROR) << "task time out " << timeout << " seconds."; + return HLC_ERR; + } + return HLC_OK; +} +#endif //BRPC_COMMON_H \ No newline at end of file diff --git a/src/brpc/ub/common/thread_lock.h b/src/brpc/ub/common/thread_lock.h new file mode 100644 index 0000000000..f8aaf4b20d --- /dev/null +++ b/src/brpc/ub/common/thread_lock.h @@ -0,0 +1,105 @@ +// +// Created by z00926396 on 2026/4/11. +// + +#ifndef BRPC_THREAD_LOCK_H +#define BRPC_THREAD_LOCK_H +#include +#include +#include +#include +#include +#include "brpc/ub/common/common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static inline void UnlockMutex(pthread_mutex_t **mtx) +{ + if (LIKELY(mtx != NULL && *mtx != NULL)) { + pthread_mutex_unlock(*mtx); + } else { + LOG(ERROR) << "Invalid input for mtx."; + } +} + +#define LOCK_GUARD(mtxPtr) \ + pthread_mutex_t *__attribute__((cleanup(UnlockMutex))) _mtxPtr = ({ \ + pthread_mutex_lock(&(mtxPtr)); \ + &(mtxPtr); \ + }) + +static inline void UnlockSpinLock(pthread_spinlock_t **spinLock) +{ + if (LIKELY(spinLock != NULL && *spinLock != NULL)) { + pthread_spin_unlock(*spinLock); + } else { + LOG(ERROR) << "Invalid input for spinLock."; + } +} + +#define SPIN_LOCK_GUARD(spinLockPtr) \ + pthread_spinlock_t *__attribute__((cleanup(UnlockSpinLock))) _spinLockPtr = ({ \ + pthread_spin_lock(&(spinLockPtr)); \ + &(spinLockPtr); \ + }) + +static inline void UnlockRWLock(pthread_rwlock_t **rwLock) +{ + if (LIKELY(rwLock != NULL && *rwLock != NULL)) { + pthread_rwlock_unlock(*rwLock); + } else { + LOG(ERROR) << "Invalid input for rwLock."; + } +} + +#define R_LOCK_GUARD(readLockPtr) \ + pthread_rwlock_t *__attribute__((cleanup(UnlockRWLock))) _readLockPtr = ({ \ + pthread_rwlock_rdlock(&(readLockPtr)); \ + &(readLockPtr); \ + }) + +#define W_LOCK_GUARD(writeLockPtr) \ + pthread_rwlock_t *__attribute__((cleanup(UnlockRWLock))) _writeLockPtr = ({ \ + pthread_rwlock_wrlock(&(writeLockPtr)); \ + &(writeLockPtr); \ + }) + +static inline void PostSemWithClose(sem_t **sem) +{ + if (LIKELY(sem != NULL && *sem != NULL)) { + sem_post(*sem); + sem_close(*sem); + *sem = NULL; + sem = NULL; + } else { + LOG(ERROR) << "Invalid input for semaphore."; + } +} + +static inline void PostSem(sem_t **sem) +{ + if (LIKELY(sem != NULL && *sem != NULL)) { + sem_post(*sem); + } else { + LOG(ERROR) << "Invalid input for semaphore."; + } +} + +#define SEMAPHORE_WAIT_GUARD_WITH_CLOSE(semPtr) \ + sem_t *__attribute__((cleanup(PostSemWithClose))) _semPtr = ({ \ + sem_wait(semPtr); \ + semPtr; \ + }) + +#define SEMAPHORE_WAIT_GUARD(semPtr) \ + sem_t *__attribute__((cleanup(PostSem))) _semPtr = ({ \ + sem_wait(semPtr); \ + semPtr; \ + }) + +#ifdef __cplusplus +} +#endif +#endif //BRPC_THREAD_LOCK_H \ No newline at end of file diff --git a/src/brpc/ub/rack_mem/declare_shm_ubs.h.tmpl b/src/brpc/ub/rack_mem/declare_shm_ubs.h.tmpl new file mode 100644 index 0000000000..fc1d3d9835 --- /dev/null +++ b/src/brpc/ub/rack_mem/declare_shm_ubs.h.tmpl @@ -0,0 +1,40 @@ +#ifndef HLC_MK_UBSM +#error Do not include this file unless you know what you are doing. +#endif + +#ifndef HLC_MK_UBSM_OPTIONAL +#define HLC_MK_UBSM_OPTIONAL HLC_MK_UBSM +#endif + +HLC_MK_UBSM(int, ubsmem_init_attributes, (ubsmem_options_t *ubsm_shmem_opts)); + +HLC_MK_UBSM(int, ubsmem_initialize, (const ubsmem_options_t *ubsm_shmem_opts)); + +HLC_MK_UBSM(int, ubsmem_finalize, (void)); + +HLC_MK_UBSM(int, ubsmem_set_logger_level, (int level)); + +HLC_MK_UBSM(int, ubsmem_set_extern_logger, (void (*func)(int level, const char *msg))); + +HLC_MK_UBSM(int, ubsmem_lookup_regions, (ubsmem_regions_t* regions)); + +HLC_MK_UBSM(int, ubsmem_create_region, (const char *region_name, size_t size, const ubsmem_region_attributes_t *reg_attr)); + +HLC_MK_UBSM(int, ubsmem_destroy_region, (const char *region_name)); + +HLC_MK_UBSM(int, ubsmem_shmem_allocate,(const char *region_name, const char *name, size_t size, mode_t mode, + uint64_t flags)); + +HLC_MK_UBSM(int, ubsmem_shmem_deallocate, (const char *name)); + +HLC_MK_UBSM(int, ubsmem_shmem_map, (void *addr, size_t length, int prot, int flags, const char *name, off_t offset, + void **local_ptr)); + +HLC_MK_UBSM(int, ubsmem_shmem_unmap, (void *local_ptr, size_t length)); + +HLC_MK_UBSM(int, ubsmem_shmem_faults_register, (shmem_faults_func registerFunc)); + +HLC_MK_UBSM(int, ubsmem_local_nid_query, (uint32_t *nid)); + +#undef HLC_MK_UBSM_OPTIONAL +#undef HLC_MK_UBSM \ No newline at end of file diff --git a/src/brpc/ub/rack_mem/ubs_mem.h b/src/brpc/ub/rack_mem/ubs_mem.h new file mode 100644 index 0000000000..a1a986ca18 --- /dev/null +++ b/src/brpc/ub/rack_mem/ubs_mem.h @@ -0,0 +1,197 @@ +// +// Created by z00926396 on 2026/4/11. +// + +#ifndef BRPC_UBS_MEM_H +#define BRPC_UBS_MEM_H +#include "ubs_mem_def.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Initialize the UBSMSHMEM attributes + * + * @param ubsm_shmem_opts - [out] shmem attributes + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_init_attributes(ubsmem_options_t *ubsm_shmem_opts); + +/** + * Initialize the UBSMSHMEM library. + * Required to be the first called when a process uses the UBSMSHMEM library. + * @param ubsm_shmem_opts - options structure containing initialization choices + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_initialize(const ubsmem_options_t *ubsm_shmem_opts); + +/** + * Finalize the UBSMSHMEM library. + * Once finalized, the process can continue work,but it is disconnected from the UBSMSHMEM library functions. + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_finalize(void); + +/** + * @brief Set log level + * @return - 0 on success and other on failure + * @param level - level to be set, debug(0), info(1), warning(2), error(3), closed(4) + */ +SHMEM_API int ubsmem_set_logger_level(int level); + +/** + * @brief Set external log function, user can set customized logger function, + * in the customized logger function, user can use unified logger utility, + * then the log message can be written into the same log file as caller's, + * if it is not set, log message will be printed to stdout. + * @param func - [in] external logger function + * @return 0 on success and other on failure + */ +SHMEM_API int ubsmem_set_extern_logger(void (*func)(int level, const char *msg)); + +/** + * Look up regions in UBSMSHMEM associated with the local node. + * @param regions - [out] The descriptor to the regions. + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_lookup_regions(ubsmem_regions_t* regions); + +/** + * Create a large region of UBSMSHMEM. + * Regions are primarily used as large containers within which additional memory may be allocated and managed by + * the program. + * @param region_name - name of the region + * @param size - size (in bytes) requested for the region, 930 no use, default 0. + * Note that implementations may round up the size to implementation-dependent sizes, + * and may impose system-wide (or user-dependent) limits on individual and total size allocated to a given user. + * @param reg_attr - details of UBSMSHMEM region attributes + * @param region_desc - [out] Region_Descriptor for the created region + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_create_region(const char *region_name, size_t size, const ubsmem_region_attributes_t *reg_attr); + +/** + * Look up a region in UBSMSHMEM by name in the name service. + * @param region_name - name of the region. + * @param region_desc - [out] The descriptor to the region. + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_lookup_region(const char *region_name, ubsmem_region_desc_t *region_desc); + +/** + * Destroy a region, and all contents within the region. Note that this + * method call will trigger a delayed free operation to permit other + * instances currently using the region to finish. + * @param region_name - name of the region. + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_destroy_region(const char *region_name); + +/** + * Allocate some named space within a region. Allocates an area of UBSMSHMEM within a region + * @param region_name - name of the region. + * @param name - name of the share memory object + * @param size - size of the space to allocate in bytes. + * @param mode - mode associated with this space. + * @param flags - Special marking for this object, MXMEM_FLAG_WITH_LOCK etc. + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_shmem_allocate(const char *region_name, const char *name, size_t size, mode_t mode, + uint64_t flags); + +/** + * Deallocate allocated space in memory + * @param name - name of the share memory object + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_shmem_deallocate(const char *name); + +/** + * Map item in UBSMSHMEM to the local virtual address space, and return its pointer. + * @param addr - The starting address for the new mapping is specified in addr, If addr is NULL, then + * the kernel chooses the (page-aligned) address at which to create the mapping + * @param length - The length argument specifies the length of the mapping (which must be greater than 0) + * @param prot - same as mmap, describes the desired memory protection of the mapping (and must not conflict with + * the open mode of the file). + * @param flags - same as mmap + * @param name - name of the share memory object which to be mapped, same as mmap's fd + * @param offset - same as mmap, offset must be a multiple of the page size + * @param local_ptr - [out] within the process virtual address space that can be used to directly access the + * data item in UBSMSHMEM + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_shmem_map(void *addr, size_t length, int prot, int flags, const char *name, off_t offset, + void **local_ptr); + +/** + * Unmap a data item in UBSMSHMEM from the local virtual address space. + * @param local_ptr - pointer within the process virtual address space to be unmapped + * @param length - the size to be unmapped + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_shmem_unmap(void *local_ptr, size_t length); + +/** + * Change permissions associated with a data item descriptor. + * @param name - descriptor associated with some data item + * @param perm - new permissions for the data item + * @return - 0 on success and other on failure,other return described in UBSM_SHMEM_RETURN. + */ +SHMEM_API int ubsmem_shmem_set_ownership(const char *name, void *start, size_t length, int prot); + +/** + * shmem lock - Set the lock, status, and data consistency of the shmem item + * @param name - descriptor associated with share memory object + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_shmem_write_lock(const char *name); +SHMEM_API int ubsmem_shmem_read_lock(const char *name); +SHMEM_API int ubsmem_shmem_unlock(const char *name); + +SHMEM_API int ubsmem_shmem_list_lookup(const char *prefix, ubsmem_shmem_desc_t *shm_list, uint32_t *shm_cnt); +SHMEM_API int ubsmem_shmem_lookup(const char *name, ubsmem_shmem_info_t *shm_info); +SHMEM_API int ubsmem_shmem_attach(const char *name); +SHMEM_API int ubsmem_shmem_detach(const char *name); + +/** + * Alloc an area from the resource pool and use it only within the scope of the current process. + * @param region_name - name of the region. + * @param size - size of the space to allocate in bytes. + * Note that implementations may round up the size to implementation-dependent sizes. + * @param mem_distance - Describe the performance distance between memory resources and local nodes. + * Note that described in perf_desc_distance + * @param is_numa - is numa or fd malloc, true: numa, false: fd + * @param local_ptr - [out] pointer within the process virtual address space that can be used to directly access. + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_lease_malloc(const char *region_name, size_t size, ubsmem_distance_t mem_distance, bool is_numa, + void **local_ptr); + +/** + * Release the pointer. + * @param local_ptr - The pointer returned by the malloc function. + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_lease_free(void *local_ptr); + +SHMEM_API int ubsmem_lookup_cluster_statistic(ubsmem_cluster_info_t *info); + +/** + * Subscribes to shared memory UB Event. + * @param registerFunc - Shared Memory UB Event Response Handling Function. + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_shmem_faults_register(shmem_faults_func registerFunc); + +/** + * Query the supernode ID of this node within the supernode domain. + * @param nid - The supernode ID of this node within the supernode domain. + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_local_nid_query(uint32_t *nid); + +#ifdef __cplusplus +} // end of extern "C" +#endif +#endif //BRPC_UBS_MEM_H \ No newline at end of file diff --git a/src/brpc/ub/rack_mem/ubs_mem_def.h b/src/brpc/ub/rack_mem/ubs_mem_def.h new file mode 100644 index 0000000000..cc63cee6f5 --- /dev/null +++ b/src/brpc/ub/rack_mem/ubs_mem_def.h @@ -0,0 +1,150 @@ +// +// Created by z00926396 on 2026/4/11. +// + +#ifndef BRPC_UBS_MEM_DEF_H +#define BRPC_UBS_MEM_DEF_H +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef SHMEM_API +#define SHMEM_API __attribute__((visibility("default"))) +#endif + +// 先修改为48,与旧版本对齐 +#define MAX_HOST_NUM 16 +#define MAX_NUMA_NUM 32 +#define MAX_NUMA_RESV_LEN 16 + +#define MAX_HOST_NAME_DESC_LENGTH 64 +#define MAX_SHM_NAME_LENGTH 48 +#define MAX_REGION_NAME_DESC_LENGTH 48 +#define MAX_REGION_NODE_NUM 16 +#define MAX_REGIONS_NUM 6 +#define MAX_OBMM_SHMDEV_PATH_LEN 64 + +#define MAX_MEMID_NUM 2048 +#define MAX_SHM_CNT 300 + +#define UBSM_FLAG_CACHE 0x0UL +#define UBSM_FLAG_WITH_LOCK 0x1UL +#define UBSM_FLAG_NONCACHE 0x2UL // open O_SYNC +#define UBSM_FLAG_WR_DELAY_COMP 0x4UL // obmm import with wr_delay_comp +#define UBSM_FLAG_ONLY_IMPORT_NONCACHE 0x8UL // only import open O_SYNC +#define UBSM_FLAG_MEM_ANONYMOUS 0x10UL // auto cleanup when all references in domain drop to zero + +typedef enum { + UBSM_OK = 0, + // common error + UBSM_ERR_PARAM_INVALID = 6010, + UBSM_ERR_NOPERM = 6011, // no permision + UBSM_ERR_MEMORY = 6012, // memcpy or other mem func failed + UBSM_ERR_UNIMPL = 6013, // not implement + UBSM_CHECK_RESOURCE_ERROR = 6014, // resource check failed. + UBSM_ERR_MEMLIB = 6015, // mem lib failed + UBSM_ERR_NO_NEEDED = 6016, // default region no need to create + + // resource error + UBSM_ERR_NOT_FOUND = 6020, + UBSM_ERR_ALREADY_EXIST = 6021, + UBSM_ERR_MALLOC_FAIL = 6022, + UBSM_ERR_RECORD = 6023, + UBSM_ERR_IN_USING = 6024, // shm is in use (usrNum > 0) + + // net error + UBSM_ERR_NET = 6040, + + // under api + UBSM_ERR_UBSE = 6050, + UBSM_ERR_OBMM = 6051, + + // cc lock error + UBSM_ERR_LOCK_NOT_SUPPORTED = 6060, + UBSM_ERR_LOCK_ALREADY_LOCKED = 6061, + UBSM_ERR_DLOCK = 6062, + + UBSM_ERR_BUFF = 6099, +} ubsmshmem_ret_t; +/** + * Memory distance, describes the physical memory resource distance relative to the current PE. + */ +typedef enum { + /** direct connect node is provided, same as PerfLevel::L0 */ + DISTANCE_DIRECT_NODE = 0, + /** one hop connect node is provided, same as PerfLevel::L1, not support 930 */ + DISTANCE_HOP_NODE = 1, +} ubsmem_distance_t; + +typedef struct { + // todo +} ubsmem_options_t; + +typedef struct { + char host_name[MAX_HOST_NAME_DESC_LENGTH]; // include '\0' + bool affinity; +} ubsmem_region_node_desc_t; + +typedef struct { + int host_num; + ubsmem_region_node_desc_t hosts[MAX_REGION_NODE_NUM]; +} ubsmem_region_attributes_t; + +typedef struct { + int num; + ubsmem_region_attributes_t region[MAX_REGIONS_NUM]; +} ubsmem_regions_t; + +typedef struct { + char region_name[MAX_REGION_NAME_DESC_LENGTH]; + size_t size; + ubsmem_region_attributes_t region_attr; +} ubsmem_region_desc_t; + +typedef struct { + uint32_t slot_id; // 节点唯一标识, 采用slotid, 与lcne保持一致 + uint32_t socket_id; // socket id + uint32_t numa_id; // 节点中的numa id + uint32_t mem_lend_ratio; // 池化内存借出比例上限 + uint64_t mem_total; // 内存总量, 单位字节 + uint64_t mem_free; // 内存空闲量, 单位字节 + uint64_t mem_borrow; // 借用的内存,单位字节 + uint64_t mem_lend; // 借出的内存,单位字节 + uint8_t resv[MAX_NUMA_RESV_LEN]; +} ubsmem_numa_mem_t; + +typedef struct { + char host_name[MAX_HOST_NAME_DESC_LENGTH]; + int numa_num; + ubsmem_numa_mem_t numa[MAX_NUMA_NUM]; +} ubsmem_host_info_t; + +typedef struct { + int host_num; // 集群可用节点数量 + ubsmem_host_info_t host[MAX_HOST_NUM]; +} ubsmem_cluster_info_t; + +typedef struct { + char name[MAX_SHM_NAME_LENGTH + 1]; + size_t size; +} ubsmem_shmem_desc_t; + +typedef struct { + char name[MAX_SHM_NAME_LENGTH + 1]; + size_t size; + uint32_t mem_num; + uint64_t mem_unit_size; + uint64_t mem_id_list[MAX_MEMID_NUM]; +} ubsmem_shmem_info_t; + +typedef int32_t (*shmem_faults_func)(const char *shm_name); + +#ifdef __cplusplus +} +#endif +#endif //BRPC_UBS_MEM_DEF_H \ No newline at end of file diff --git a/src/brpc/ub/rack_mem/ubshmem_stub.cpp b/src/brpc/ub/rack_mem/ubshmem_stub.cpp new file mode 100644 index 0000000000..ad238a3b0f --- /dev/null +++ b/src/brpc/ub/rack_mem/ubshmem_stub.cpp @@ -0,0 +1,91 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ubs_mem.h" + +int ubsmem_init_attributes(ubsmem_options_t *ubsm_shmem_opts) +{ + return UBSM_OK; +} + +int ubsmem_initialize(const ubsmem_options_t *ubsm_shmem_opts) +{ + return UBSM_OK; +} + +int ubsmem_finalize(void) +{ + return UBSM_OK; +} + +int ubsmem_set_logger_level(int level) +{ + return UBSM_OK; +} + +int ubsmem_set_extern_logger(void (*func)(int level, const char *msg)) +{ + return UBSM_OK; +} + +int ubsmem_lookup_regions(ubsmem_regions_t* regions) +{ + regions->num = 1; + regions->region[0].host_num = 1; + regions->region[0].hosts[0].affinity = true; + regions->region[0].hosts[0].host_name[0] = 'h'; + regions->region[0].hosts[0].host_name[1] = '1'; + regions->region[0].hosts[0].host_name[2] = '\0'; // 2号位置使用\0 + return UBSM_OK; +} + +int ubsmem_create_region(const char *region_name, size_t size, const ubsmem_region_attributes_t *reg_attr) +{ + return UBSM_OK; +} + + +int ubsmem_destroy_region(const char *region_name) +{ + return UBSM_OK; +} + +int ubsmem_shmem_allocate(const char *region_name, const char *name, size_t size, mode_t mode, uint64_t flags) +{ + return UBSM_OK; +} + +int ubsmem_shmem_deallocate(const char *name) +{ + return UBSM_OK; +} + +int ubsmem_shmem_map(void *addr, size_t length, int prot, int flags, const char *name, off_t offset, + void **local_ptr) +{ + return UBSM_OK; +} + +int ubsmem_shmem_unmap(void *local_ptr, size_t length) +{ + return UBSM_OK; +} + +int ubsmem_shmem_faults_register(shmem_faults_func registerFunc) +{ + return UBSM_OK; +} + +int ubsmem_local_nid_query(uint32_t *nid) +{ + *nid = 1; // stub + return UBSM_OK; +} \ No newline at end of file diff --git a/src/brpc/ub/shm/shm_def.h b/src/brpc/ub/shm/shm_def.h new file mode 100644 index 0000000000..c76ca3b962 --- /dev/null +++ b/src/brpc/ub/shm/shm_def.h @@ -0,0 +1,46 @@ +#ifndef BRPC_SHM_DEF_H +#define BRPC_SHM_DEF_H +#include +#include +#include + +#define PROT_READ 0x1 /* Page can be read. */ +#define PROT_WRITE 0x2 /* Page can be written. */ +#define PROT_EXEC 0x4 /* Page can be executed. */ +#define PROT_NONE 0x0 /* Page can not be accessed. */ +#define PROT_GROWSDOWN 0x01000000 /* Extend change to start of growsdown vma (mprotect only). */ +#define PROT_GROWSUP 0x02000000 /* Extend change to start of growsup vma (mprotect only). */ +/* Sharing types (must choose one and only one of these). */ +#define MAP_SHARED 0x01 /* Share changes. */ +#define MAP_PRIVATE 0x02 /* Changes are private. */ +#define SHM_MAX_NAME_BUFF_LEN 48 // byte, buffer size, ubsm_sdk need name to be below 48byte +#define SHM_MAX_NAME_LEN (SHM_MAX_NAME_BUFF_LEN - 1) // byte, string length +#define SHM_ALLOC_UNIT_SIZE (4 * 1024 * 1024) // 4MB + +namespace brpc { + namespace ub { + typedef enum { SHM_TYPE_UB, SHM_TYPE_IPC, SHM_TYPE_UBS, SHM_TYPE_UNSUPPORT } SHM_TYPE; + + typedef struct { + uint8_t *addr; + size_t len; + uint64_t memid; + char name[SHM_MAX_NAME_BUFF_LEN]; + uint32_t fd; + } SHM; + + typedef struct ShmListNode { + SHM shm; + struct ShmListNode *next; + struct ShmListNode *prev; + } ShmListNode; + + typedef struct { + ShmListNode* head; + ShmListNode* tail; + size_t size; + pthread_mutex_t shmLock; + } ShmList; + } +} +#endif //BRPC_SHM_DEF_H \ No newline at end of file diff --git a/src/brpc/ub/shm/shm_ipc.cpp b/src/brpc/ub/shm/shm_ipc.cpp new file mode 100644 index 0000000000..cc1597d1c7 --- /dev/null +++ b/src/brpc/ub/shm/shm_ipc.cpp @@ -0,0 +1,171 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "brpc/ub/common/common.h" +#include "brpc/ub/shm/shm_def.h" +#include "brpc/ub/shm/shm_ipc.h" + +namespace brpc { +namespace ub { +RETURN_CODE IpcShmLocalMalloc(SHM *shm) +{ + int fd = shm_open(shm->name, O_CREAT | O_EXCL | O_RDWR, SHM_IPC_MODE); + if (fd < 0) { + if (errno == EEXIST) { + LOG(ERROR) << "IPC Create shm=" << shm->name << " failed, shm exists."; + return SHM_ERR_EXIST; + } + + LOG(ERROR) << "IPC Open shm=" << shm->name << " failed, ret(" << errno << ")."; + return SHM_ERR; + } + + int ret = ftruncate(fd, (off_t)shm->len); + if (ret < 0) { + LOG(ERROR) << "IPC Set shm=" << shm->name << " length=" << shm->len << " failed, ret(" << errno << ")."; + close(fd); + shm_unlink(shm->name); + return SHM_ERR; + } + + shm->addr = (uint8_t*)mmap(NULL, shm->len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (shm->addr == (uint8_t*)MAP_FAILED) { + LOG(ERROR) << "IPC map shm=" << shm->name << " length=" << shm->len << " failed, ret(" << errno << ")."; + close(fd); + shm_unlink(shm->name); + return SHM_ERR; + } + + close(fd); + LOG(DEBUG) << "IPC Create shm=" << shm->name << " length=" << shm->len << " success."; + return HLC_OK; +} + +RETURN_CODE IpcShmMunmap(SHM *shm) +{ + if (shm->addr == NULL) { + LOG(ERROR) << "Input shm param is invalid, addr is NULL."; + return SHM_ERR_INPUT_INVALID; + } + + int ret = munmap(shm->addr, shm->len); + if (ret != HLC_OK) { + LOG(ERROR) << "IPC unmap shm=" << shm->name << " failed, errno=" << errno; + return SHM_ERR; + } + + LOG(DEBUG) << "IPC unmap shm=" << shm->name << " length=" << shm->len << " success."; + return HLC_OK; +} + +RETURN_CODE IpcShmFree(SHM *shm) +{ + if (shm->addr == NULL) { + LOG(ERROR) << "Input shm param is invalid, addr is NULL."; + return SHM_ERR_INPUT_INVALID; + } + + // free + int ret = shm_unlink(shm->name); + if (ret != HLC_OK) { + if (errno == EBUSY) { + LOG_EVERY_SECOND(ERROR) << "IPC free shm=" << shm->name << " failed, errno=" << errno; + return SHM_ERR_RESOURCE_ATTACHED; + } + LOG_EVERY_SECOND(ERROR) << "IPC free shm=" << shm->name << " failed, errno=" << errno; + return SHM_ERR; + } + shm->addr = NULL; + LOG(DEBUG) << "IPC free shm=" << shm->name << " success."; + return HLC_OK; +} + +RETURN_CODE IpcShmLocalFree(SHM *shm) +{ + if (shm->addr == NULL) { + LOG(ERROR) << "Input shm param is invalid, addr is NULL."; + return SHM_ERR_INPUT_INVALID; + } + + int ret = munmap(shm->addr, shm->len); + if (ret != HLC_OK) { + LOG(WARNING) << "IPC unmap shm=" << shm->name << " failed, ret=" << ret; + } + + ret = shm_unlink(shm->name); + if (ret != HLC_OK) { + if (errno == EBUSY) { + LOG_EVERY_SECOND(ERROR) << "IPC delete shm=" << shm->name << " failed, ret=" << ret; + return SHM_ERR_RESOURCE_ATTACHED; + } + LOG_EVERY_SECOND(ERROR) << "IPC delete shm=" << shm->name << " failed, ret=" << ret; + return SHM_ERR; + } + shm->addr = NULL; + LOG(DEBUG) << "IPC free local shm=" << shm->name << " success."; + return HLC_OK; +} + +RETURN_CODE IpcShmRemoteMalloc(SHM *shm) +{ + int fd = shm_open(shm->name, O_RDWR, SHM_IPC_MODE); + if (fd < 0) { + LOG(ERROR) << "IPC open shm=" << shm->name << " failed, ret=" << errno; + return SHM_ERR; + } + + shm->addr = (uint8_t*)mmap(NULL, shm->len, PROT_WRITE, MAP_SHARED, fd, 0); + if (shm->addr == (uint8_t*)MAP_FAILED) { + LOG(ERROR) << "IPC map shm=" << shm->name << " failed, ret=" << errno; + close(fd); + return SHM_ERR; + } + + close(fd); + LOG(DEBUG) << "IPC malloc remote shm=" << shm->name << " length=" << shm->len << " success."; + return HLC_OK; +} + +RETURN_CODE IpcShmLocalMmap(SHM *shm, int prot) +{ + int fd = shm_open(shm->name, O_RDWR, SHM_IPC_MODE); + if (fd < 0) { + LOG(ERROR) << "IPC open shm=" << shm->name << " failed, ret=" << errno; + return SHM_ERR; + } + + shm->addr = (uint8_t*)mmap(NULL, shm->len, prot, MAP_SHARED, fd, 0); + if (shm->addr == (uint8_t*)MAP_FAILED) { + LOG(ERROR) << "IPC map shm=" << shm->name << " failed, ret=" << errno; + close(fd); + return SHM_ERR; + } + + close(fd); + LOG(DEBUG) << "IPC mmap remote shm=" << shm->name << " length=" << shm->len << " success."; + return HLC_OK; +} + +RETURN_CODE IpcShmRemoteFree(SHM *shm) +{ + if (shm->addr == NULL) { + LOG(ERROR) << "Input shm param is invalid, addr is NULL."; + return SHM_ERR_INPUT_INVALID; + } + + int ret = munmap(shm->addr, shm->len); + if (ret != HLC_OK) { + LOG(ERROR) << "IPC unmap shm=" << shm->name << " failed, ret=" << ret; + return SHM_ERR; + } + + LOG(DEBUG) << "IPC free remote shm=" << shm->name << " success."; + return HLC_OK; +} +} +} \ No newline at end of file diff --git a/src/brpc/ub/shm/shm_ipc.h b/src/brpc/ub/shm/shm_ipc.h new file mode 100644 index 0000000000..e71ed454c0 --- /dev/null +++ b/src/brpc/ub/shm/shm_ipc.h @@ -0,0 +1,21 @@ +#ifndef BRPC_SHM_IPC_H +#define BRPC_SHM_IPC_H + + +#include "shm_def.h" + +#define SHM_IPC_MODE 0666 + +namespace brpc { + namespace ub { + RETURN_CODE IpcShmLocalMalloc(SHM *shm); + RETURN_CODE IpcShmMunmap(SHM *shm); + RETURN_CODE IpcShmFree(SHM *shm); + RETURN_CODE IpcShmLocalFree(SHM *shm); + RETURN_CODE IpcShmRemoteMalloc(SHM *shm); + RETURN_CODE IpcShmRemoteFree(SHM *shm); + RETURN_CODE IpcShmLocalMmap(SHM *shm, int prot); + } +} + +#endif //BRPC_SHM_IPC_H \ No newline at end of file diff --git a/src/brpc/ub/shm/shm_mgr.cpp b/src/brpc/ub/shm/shm_mgr.cpp new file mode 100644 index 0000000000..a3061bcc53 --- /dev/null +++ b/src/brpc/ub/shm/shm_mgr.cpp @@ -0,0 +1,241 @@ +#include +#include +#include +#include +#include +#include "brpc/ub/common/common.h" +#include "brpc/ub/shm/shm_ipc.h" +#include "brpc/ub/shm/shm_ubs.h" +#include "brpc/ub/shm/shm_mgr.h" + +namespace brpc { +namespace ub { +DEFINE_int32(ub_shm_type, 1, "shm type: 1-ipc; 2-ub_ring"); +static SHM_TYPE g_shmType; + +static bool CheckInputShmParam(SHM *shm) +{ + if (shm == NULL) { + LOG(ERROR) << "Input Param shm is NULL."; + return false; + } + + size_t nameLen = strlen(shm->name); + if (nameLen <= 0 || nameLen > SHM_MAX_NAME_LEN) { + LOG(ERROR) << "Shm name=" << shm->name << ", length=" << shm->len << ", which is not between 1 and " << SHM_MAX_NAME_LEN; + return false; + } + + if (shm->len <= 0) { + LOG(ERROR) << "Shm length=" << shm->len << " is invalid."; + return false; + } + + if (shm->len < SHM_ALLOC_UNIT_SIZE || (shm->len & (SHM_ALLOC_UNIT_SIZE - 1)) != 0) { + LOG(ERROR) << "Shm length=" << shm->len << " need to be (1..n) * 4MB."; + return false; + } + + return true; +} + +RETURN_CODE ShmMgrInit(void) +{ + if (UNLIKELY(FLAGS_ub_shm_type >= (uint32_t)SHM_TYPE_UNSUPPORT)) { + LOG(ERROR) << "Shm type config=" << FLAGS_ub_shm_type << " is not supported."; + return HLC_ERR; + } + + g_shmType = (SHM_TYPE)FLAGS_ub_shm_type; + if (g_shmType == SHM_TYPE_UBS) { + if (UbsShmInit() != HLC_OK) { + LOG(ERROR) << "Init beiming ubs shm failed."; + return HLC_ERR; + } + } + LOG(DEBUG) << "shm mgr init success, shm type=" << g_shmType; + return HLC_OK; +} + +void ShmMgrFini(void) +{ + if (g_shmType == SHM_TYPE_UBS) { + if (UbsShmFini() != HLC_OK) { + LOG(ERROR) << "Fini beiming ubs shm failed."; + return; + } + } + LOG(DEBUG) << "shm mgr fini success, shm type=" << g_shmType; +} + +void SetShmType(SHM_TYPE type) +{ + g_shmType = type; +} + +RETURN_CODE ShmLocalMalloc(SHM *shm) +{ + if (UNLIKELY(!CheckInputShmParam(shm))) { + LOG(ERROR) << "Input param shm is invalid."; + return SHM_ERR_INPUT_INVALID; + } + + RETURN_CODE rc = HLC_OK; + switch (g_shmType) { + case SHM_TYPE_IPC: + rc = IpcShmLocalMalloc(shm); + break; + case SHM_TYPE_UBS: + rc = UbsShmLocalMalloc(shm); + break; + default: + rc = SHM_ERR; + LOG(ERROR) << "Unsupported shm type."; + } + return rc; +} + +RETURN_CODE ShmLocalCalloc(SHM *shm) +{ + RETURN_CODE rc = ShmLocalMalloc(shm); + if (UNLIKELY(rc != HLC_OK)) { + LOG(ERROR) << "Failed to alloc local shm."; + return rc; + } + memset(shm->addr, 0, shm->len); + return HLC_OK; +} + +RETURN_CODE ShmLocalFree(SHM *shm) +{ + if (UNLIKELY(!CheckInputShmParam(shm))) { + LOG(ERROR) << "Input param shm is invalid."; + return SHM_ERR_INPUT_INVALID; + } + + RETURN_CODE rc = HLC_OK; + switch (g_shmType) { + case SHM_TYPE_IPC: + rc = IpcShmLocalFree(shm); + break; + case SHM_TYPE_UBS: + rc = UbsShmLocalFree(shm); + break; + default: + rc = SHM_ERR; + LOG(ERROR) << "Unsupported shm type."; + } + return rc; +} + +RETURN_CODE ShmRemoteMalloc(SHM *shm) +{ + if (UNLIKELY(!CheckInputShmParam(shm))) { + LOG(ERROR) << "Input param shm is invalid."; + return SHM_ERR_INPUT_INVALID; + } + + RETURN_CODE rc = HLC_OK; + switch (g_shmType) { + case SHM_TYPE_IPC: + rc = IpcShmRemoteMalloc(shm); + break; + case SHM_TYPE_UBS: + rc = UbsShmRemoteMalloc(shm); + break; + default: + rc = SHM_ERR; + LOG(ERROR) << "Unsupported shm type."; + } + return rc; +} + +RETURN_CODE ShmRemoteFree(SHM *shm) +{ + if (UNLIKELY(!CheckInputShmParam(shm))) { + LOG(ERROR) << "Input param shm is invalid."; + return SHM_ERR_INPUT_INVALID; + } + + RETURN_CODE rc = HLC_OK; + switch (g_shmType) { + case SHM_TYPE_IPC: + rc = IpcShmRemoteFree(shm); + break; + case SHM_TYPE_UBS: + rc = UbsShmRemoteFree(shm); + break; + default: + rc = SHM_ERR; + LOG(ERROR) << "Unsupported shm type."; + } + return rc; +} + +RETURN_CODE ShmLocalMmap(SHM *shm, int prot) +{ + if (UNLIKELY(!CheckInputShmParam(shm))) { + LOG(ERROR) << "Input param shm is invalid."; + return SHM_ERR_INPUT_INVALID; + } + + RETURN_CODE rc = HLC_OK; + switch (g_shmType) { + case SHM_TYPE_IPC: + rc = IpcShmLocalMmap(shm, prot); + break; + case SHM_TYPE_UBS: + rc = UbsShmLocalMmap(shm, prot); + break; + default: + rc = SHM_ERR; + LOG(ERROR) << "Unsupported shm type."; + } + return rc; +} + +RETURN_CODE ShmMunmap(SHM *shm) +{ + if (UNLIKELY(!CheckInputShmParam(shm))) { + LOG(ERROR) << "Input param shm is invalid."; + return SHM_ERR_INPUT_INVALID; + } + + RETURN_CODE rc = HLC_OK; + switch (g_shmType) { + case SHM_TYPE_IPC: + rc = IpcShmMunmap(shm); + break; + case SHM_TYPE_UBS: + rc = UbsShmMunmap(shm); + break; + default: + rc = SHM_ERR; + LOG(ERROR) << "Unsupported shm type."; + } + return rc; +} + +RETURN_CODE ShmFree(SHM *shm) +{ + if (UNLIKELY(!CheckInputShmParam(shm))) { + LOG(ERROR) << "Input param shm is invalid."; + return SHM_ERR_INPUT_INVALID; + } + + RETURN_CODE rc = HLC_OK; + switch (g_shmType) { + case SHM_TYPE_IPC: + rc = IpcShmFree(shm); + break; + case SHM_TYPE_UBS: + rc = UbsShmFree(shm); + break; + default: + rc = SHM_ERR; + LOG(ERROR) << "Unsupported shm type."; + } + return rc; +} +} +} \ No newline at end of file diff --git a/src/brpc/ub/shm/shm_mgr.h b/src/brpc/ub/shm/shm_mgr.h new file mode 100644 index 0000000000..2268bd980a --- /dev/null +++ b/src/brpc/ub/shm/shm_mgr.h @@ -0,0 +1,34 @@ +#ifndef BRPC_SHM_MGR_H +#define BRPC_SHM_MGR_H + +#include +#include "brpc/ub/common/common.h" +#include "brpc/ub/shm/shm_def.h" + +namespace brpc { +namespace ub { +void SetShmType(SHM_TYPE type); + +RETURN_CODE ShmMgrInit(void); + +void ShmMgrFini(void); + +RETURN_CODE ShmLocalMalloc(SHM *shm); + +RETURN_CODE ShmLocalCalloc(SHM *shm); + +RETURN_CODE ShmLocalFree(SHM *shm); + +RETURN_CODE ShmRemoteMalloc(SHM *shm); + +RETURN_CODE ShmRemoteFree(SHM *shm); + +RETURN_CODE ShmLocalMmap(SHM *shm, int prot); + +RETURN_CODE ShmMunmap(SHM *shm); + +RETURN_CODE ShmFree(SHM *shm); +} +} + +#endif //BRPC_SHM_MGR_H \ No newline at end of file diff --git a/src/brpc/ub/shm/shm_ubs.cpp b/src/brpc/ub/shm/shm_ubs.cpp new file mode 100644 index 0000000000..1ae4d8c295 --- /dev/null +++ b/src/brpc/ub/shm/shm_ubs.cpp @@ -0,0 +1,544 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include "brpc/ub/timer/timer_mgr.h" +#include "brpc/ub/common/thread_lock.h" +#include "brpc/ub/common/common.h" +#include "brpc/ub/shm/shm_def.h" +#include "brpc/ub/ub_ring_manager.h" +#include "brpc/ub/rack_mem/ubs_mem.h" +#include "brpc/ub/rack_mem/ubs_mem_def.h" +#ifdef UT +#include "ubs_mem.h" +#endif +#include "shm_ubs.h" + +namespace brpc { +namespace ub { +#define HLC_MK_UBSM(ret, fn, args) ret (*fn) args = NULL +#include "brpc/ub/rack_mem/declare_shm_ubs.h.tmpl" +#define SHM_RIGHT_MODE 0666 +#define HLC_REGION_NAME_PREFIX "HlcONE2ALLRegion" +DEFINE_uint32(node_location, 1, "Location of the ub machine."); +DEFINE_bool(shm_wr_delay_comp, true, "Indicates whether to enable the write relay." + "0: relay; 1: non-relay."); +DEFINE_int32(ub_flying_io_timeout, 1, "Waiting time for stopping data" + "sending and receiving when the link is disconnected."); +char g_regionName[MAX_REGION_NAME_DESC_LENGTH] = {0}; +int g_shmTimerFd = 0; +ShmList *g_shmList = NULL; +static RETURN_CODE UbsShmInterfacesLoad(void); +char hostname[MAX_HOST_NAME_DESC_LENGTH]; + +RETURN_CODE UbsShmInterfacesLoad(void) +{ +#ifndef UT + const char *ubsmSdkLocation = "/usr/local/ubs_mem/lib/libubsm_sdk.so"; + void* dlhandler = dlmopen(LM_ID_NEWLM, ubsmSdkLocation, RTLD_NOW | RTLD_LOCAL | RTLD_NODELETE | RTLD_DEEPBIND); + if (dlhandler == NULL) { + LOG(ERROR) << "Dlmopen libubsm_sdk.so in " << ubsmSdkLocation << " failed, error:" << dlerror(); + return HLC_ERR; + } + +#define HLC_MK_UBSM_OPTIONAL(ret, fn, args) \ + do { \ + fn = (decltype(fn))dlsym(dlhandler, #fn); \ + } while (0) + +#define HLC_MK_UBSM(ret, fn, args) \ + do { \ + if ((fn) != NULL) { \ + break; \ + } \ + HLC_MK_UBSM_OPTIONAL(ret, fn, args); \ + if ((fn) == NULL) { \ + LOG(ERROR) << "Fail load ubs_mem func " << #fn <<" error:" << dlerror(); \ + return HLC_ERR; \ + } \ + } while (0) +#include "brpc/ub/rack_mem/declare_shm_ubs.h.tmpl" + + dlclose(dlhandler); + dlhandler = NULL; +#endif + return HLC_OK; +} + +static RETURN_CODE CreateUbsShmRegion(const char *regionName) +{ + int ret = snprintf(g_regionName, MAX_REGION_NAME_DESC_LENGTH, "%s_%u", + HLC_REGION_NAME_PREFIX, FLAGS_node_location); + if (ret < 0) { + LOG(ERROR) << "Snprintf_s region name failed, ret=" << ret; + return HLC_ERR; + } + + ubsmem_regions_t regions = {0}; // 16 * (48 + 1) bytes, 约0.8k + ret = ubsmem_lookup_regions(®ions); + if (ret != UBSM_OK || regions.region[0].host_num <= 0) { + LOG(ERROR) << "Ubs lookup share region failed, ret=" << ret << ", region.num=" << regions.region[0].host_num; + return HLC_ERR; + } + ubsmem_region_attributes_t regionAttr = {0}; + regionAttr.host_num = regions.region[0].host_num; + for (int i = 0; i < regionAttr.host_num; i++) { + strcpy(regionAttr.hosts[i].host_name, regions.region[0].hosts[i].host_name); + regionAttr.hosts[i].affinity = (strcmp(regionAttr.hosts[i].host_name, hostname) == 0) ? + true : false; + } + + ret = ubsmem_create_region(regionName, 0, ®ionAttr); + if (ret == UBSM_ERR_ALREADY_EXIST) { + LOG(WARNING) << "Ubs region exists, region_name=" << regionName; + return HLC_OK; + } else if (ret != UBSM_OK) { + LOG(ERROR) << "Ubsmem create region failed, ret=" << ret; + return HLC_ERR; + } + + return HLC_OK; +} + +static uint64_t AquireFlagIfWrDelayComp(const uint64_t flag) +{ + if (FLAGS_shm_wr_delay_comp == 0) { + return flag; + } + return flag | UBSM_FLAG_WR_DELAY_COMP; +} + +RETURN_CODE UbsShmLocalMalloc(SHM *shm) +{ + int ret = ubsmem_shmem_allocate(g_regionName, shm->name, shm->len, SHM_RIGHT_MODE, + AquireFlagIfWrDelayComp(UBSM_FLAG_ONLY_IMPORT_NONCACHE | UBSM_FLAG_MEM_ANONYMOUS)); +do { + if (ret == UBSM_ERR_ALREADY_EXIST) { + if (ubsmem_shmem_deallocate(shm->name) != UBSM_OK) { + LOG(ERROR) << "Ubs create shm name=" << shm->name << " failed, shm exists, ret=" << ret; + return SHM_ERR_EXIST; + } + LOG(INFO) << "Ubs delete shm name=" << shm->name << " success, try to recreate."; + ret = ubsmem_shmem_allocate(g_regionName, shm->name, shm->len, SHM_RIGHT_MODE, + AquireFlagIfWrDelayComp(UBSM_FLAG_ONLY_IMPORT_NONCACHE | UBSM_FLAG_MEM_ANONYMOUS)); + if (ret != UBSM_OK) { + LOG(ERROR) << "Ubs recreate shm name=" << shm->name << " failed, ret=" << ret; + return SHM_ERR; + } + } else if (ret != UBSM_OK) { + LOG(ERROR) << "Ubs create shm name=" << shm->name << " failed, ret=" << ret; + return SHM_ERR; + } +} while (0); + + ret = ubsmem_shmem_map(NULL, shm->len, PROT_READ | PROT_WRITE, MAP_SHARED, shm->name, 0, (void**)&(shm->addr)); + if (ret != UBSM_OK) { + LOG(ERROR) << "Ubs map shm=" << shm->name << " failed, ret=" << ret; + if (ret == UBSM_ERR_NOT_FOUND) { + return SHM_ERR_NOT_FOUND; + } + ubsmem_shmem_deallocate(shm->name); + return SHM_ERR; + } + + // 通过MXE获取memid + shm->memid = 1; // 暂时打桩 + LOG(DEBUG) << "Ubs malloc local shm=" << shm->name << " length=" << shm->len << " memid=" << shm->memid << " success."; + return HLC_OK; +} + +RETURN_CODE UbsShmMunmap(SHM *shm) +{ + // unmap + if (shm->addr == NULL) { + LOG(ERROR) << "Ubs input shm param is invalid, addr is NULL."; + return SHM_ERR_INPUT_INVALID; + } + + int ret = ubsmem_shmem_unmap(shm->addr, shm->len); + if (ret != UBSM_OK) { + if (ret == UBSM_ERR_NET) { + LOG(ERROR) << "Ubs unmap shm=" << shm->name << " failed, ubsm net err=" << ret; + AddShmToList(g_shmList, shm); + return SHM_ERR_UBSM_NET_ERR; + } + LOG(ERROR) << "Ubs unmap shm=" << shm->name << " length=" << shm->len << " failed, ret=" << ret; + return SHM_ERR; + } + + LOG(DEBUG) << "Ubs unmap shm=" << shm->name << " length=" << shm->len << " success."; + return HLC_OK; +} + +RETURN_CODE UbsShmFree(SHM *shm) +{ + if (shm->addr == NULL) { + LOG(ERROR) << "Ubs input shm param is invalid, addr is NULL."; + return SHM_ERR_INPUT_INVALID; + } + + // free + int ret = ubsmem_shmem_deallocate(shm->name); + if (ret != UBSM_OK) { + if (ret == UBSM_ERR_IN_USING) { + LOG(DEBUG) << "Ubs free shm=" << shm->name << " failed, resource attached=" << ret; + return SHM_ERR_RESOURCE_ATTACHED; + } else if (ret == UBSM_ERR_NOT_FOUND) { + LOG(DEBUG) << "Ubs free shm=" << shm->name << " failed, resource not found=" << ret; + return SHM_ERR_NOT_FOUND; + } + LOG(ERROR) << "Ubs free shm="<< shm->name << " failed, ret=" << ret; + return SHM_ERR; + } + shm->addr = NULL; + LOG(DEBUG) << "Ubs free shm=" << shm->name << " length=" << shm->len << " success."; + return HLC_OK; +} + +RETURN_CODE UbsShmLocalFree(SHM *shm) +{ + // unmap + if (shm->addr == NULL) { + LOG(ERROR) << "Ubs input shm param is invalid, addr is NULL."; + return SHM_ERR_INPUT_INVALID; + } + + int ret = ubsmem_shmem_unmap(shm->addr, shm->len); + if (ret != UBSM_OK) { + if (ret == UBSM_ERR_NET) { + LOG(ERROR) << "Ubs unmap shm=" << shm->name << " failed, ubsm net err=" << ret; + AddShmToList(g_shmList, shm); + return SHM_ERR_UBSM_NET_ERR; + } + LOG(WARNING) << "Ubs unmap shm=" << shm->name << " length=" << shm->len << " failed, ret=" << ret; + } + + // free + ret = ubsmem_shmem_deallocate(shm->name); + if (ret != UBSM_OK) { + if (ret == UBSM_ERR_IN_USING) { + LOG_EVERY_SECOND(INFO) << "Ubs delete shm=" << shm->name << " failed, resource attached=" << ret; + return SHM_ERR_RESOURCE_ATTACHED; + } + LOG(ERROR) << "Ubs delete shm=" << shm->name << " failed, ret=" << ret; + return SHM_ERR; + } + shm->addr = NULL; + LOG(DEBUG) << "Ubs free local shm=" << shm->name << " length=" << shm->len << " success."; + return HLC_OK; +} + +RETURN_CODE UbsShmRemoteMalloc(SHM *shm) +{ + int ret = ubsmem_shmem_map(NULL, shm->len, PROT_READ | PROT_WRITE, MAP_SHARED, shm->name, 0, (void**)&(shm->addr)); + if (ret != UBSM_OK) { + LOG(ERROR) << "Ubs map Shm=" << shm->name << " failed, ret=" << ret; + return SHM_ERR; + } + + LOG(INFO) << "Ubs malloc remote shm=" << shm->name << " length=" << shm->len << " success."; + return HLC_OK; +} + +RETURN_CODE UbsShmLocalMmap(SHM *shm, int prot) +{ + int ret = ubsmem_shmem_map(NULL, shm->len, prot, MAP_SHARED, shm->name, 0, (void**)&(shm->addr)); + if (ret != UBSM_OK) { + LOG(ERROR) << "Ubs map Shm=" << shm->name << " failed, ret=" << ret; + return SHM_ERR; + } + + LOG(DEBUG) << "Ubs mmap remote shm=" << shm->name << " length=" << shm->len << " success."; + return HLC_OK; +} + +RETURN_CODE UbsShmRemoteFree(SHM *shm) +{ + // unmap + if (shm->addr == NULL) { + LOG(ERROR) << "Ubs input shm param is invalid, addr is NULL."; + return SHM_ERR_INPUT_INVALID; + } + + int ret = ubsmem_shmem_unmap(shm->addr, shm->len); + if (ret != UBSM_OK) { + if (ret == UBSM_ERR_NET) { + LOG(ERROR) << "Ubs unmap shm=" << shm->name << " failed, ubsm net err=" << ret; + AddShmToList(g_shmList, shm); + return SHM_ERR_UBSM_NET_ERR; + } + LOG(ERROR) << "Ubs unmap shm=" << shm->name << " length=" << shm->len << " failed, ret=" << ret; + return SHM_ERR; + } + + LOG(DEBUG) << "Ubs free Remote shm=" << shm->name << " length=" << shm->len << " success."; + return HLC_OK; +} + +void UbsMemLoggerPrint(int level, const char *msg) +{ + if (level == UBSM_LOG_ERROR_LEVEL) { + LOG(ERROR) << msg; + } else if (level == UBSM_LOG_WARN_LEVEL) { + LOG(WARNING) << msg; + } else { + LOG(INFO) << msg; + } + return; +} + +RETURN_CODE UbsShmInit(void) +{ + // 加载libubsm_sdk.so函数指针 + RETURN_CODE retCode = UbsShmInterfacesLoad(); + if (retCode != HLC_OK) { + LOG(ERROR) << "Load ubs shm functions failed, ret=" << retCode; + return HLC_ERR; + } + + if (gethostname(hostname, MAX_HOST_NAME_DESC_LENGTH) != 0) { + LOG(ERROR) << "hlc config gethostname failed, errno=" << errno; + return HLC_ERR; + } + + int ret = ubsmem_set_extern_logger(UbsMemLoggerPrint); + if (ret != UBSM_OK) { + LOG(ERROR) << "Ubs set logger failed, ret=" << ret; + return HLC_ERR; + } + + ret = ubsmem_set_logger_level(UBSM_LOG_INFO_LEVEL); + if (ret != UBSM_OK) { + LOG(ERROR) << "Ubs set logger level failed, ret=" << ret; + return HLC_ERR; + } + + ubsmem_options_t options = {}; + ret = ubsmem_init_attributes(&options); + if (ret != UBSM_OK) { + LOG(ERROR) << "Ubs shm init attributes failed, ret=" << ret; + return HLC_ERR; + } + + ret = ubsmem_initialize(&options); + if (ret != UBSM_OK) { + LOG(ERROR) << "Ubs shm initialize failed, ret=" << ret; + return HLC_ERR; + } + + if (UNLIKELY(ubsmem_local_nid_query(&FLAGS_node_location) != UBSM_OK)) { + LOG(ERROR) << "Get local nid failed."; + return HLC_ERR; + } + + if (UNLIKELY(ubsmem_shmem_faults_register(brpc::ub::UBRingManager::UbEventCallback) != UBSM_OK)) { + LOG(ERROR) << "Failed to register the ub event callback function."; + return HLC_ERR; + } + + if (CreateUbsShmRegion(g_regionName) != HLC_OK) { + LOG(ERROR) << "Create Ubs region failed."; + return HLC_ERR; + } + + if (InitShmTimer(&g_shmList) != HLC_OK) { + LOG(ERROR) << "Ubs shm list init failed."; + return HLC_ERR; + } + + LOG(INFO) << "Ubs shm init success."; + return HLC_OK; +} + +RETURN_CODE UbsShmFini(void) +{ + int ret = ubsmem_finalize(); + if (ret != UBSM_OK) { + LOG(ERROR) << "Ubs shm finalize fail, ret=" << ret; + return HLC_ERR; + } + + if (UNLIKELY(DestroyShmTimer(g_shmList) != HLC_OK)) { + LOG(ERROR) << "Ubs shm list finalize failed."; + return HLC_ERR; + } + + LOG(INFO) << "Ubs shm finalize success."; + return HLC_OK; +} + +static void DeleteShmToList(ShmList* shmList) +{ + if (shmList == NULL || shmList->head == NULL) { + return; + } + + ShmListNode *curNode = shmList->head; + shmList->head = curNode->next; + if (shmList->head != NULL) { + shmList->head->prev = NULL; + } else { + shmList->tail = NULL; + } + LOG(DEBUG) << "Delete shm to list, name=" << curNode->shm.name << " size=" << shmList->size; + FREE_PTR(curNode); + shmList->size--; +} + +void *UbsShmCallback(void* args) +{ + ShmList *shmList = (ShmList*)args; + if (UNLIKELY(shmList == NULL)) { + LOG(ERROR) << "Shm list is null."; + return NULL; + } + + LOCK_GUARD(shmList->shmLock); + while (shmList->head != NULL) { + SHM shm = shmList->head->shm; + if (shm.addr == NULL) { + LOG(ERROR) << "Ubs input shm param is invalid, addr is NULL."; + return NULL; + } + + int ret = ubsmem_shmem_unmap(shm.addr, shm.len); + if (ret != UBSM_OK) { + if (ret == UBSM_ERR_NET) { + return NULL; + } + LOG(ERROR) << "Ubs unmap shm=" << shm.name << " length=" << shm.len << " failed, ret=" << ret; + return NULL; + } + LOG(DEBUG) << "Ubs unmap shm=" << shm.name << " length=" << shm.len << " success."; + + ret = ubsmem_shmem_deallocate(shm.name); + if (ret != UBSM_OK) { + DeleteShmToList(shmList); + LOG(ERROR) << "Ubs delete shm=" << shm.name << " failed, ret=" << ret; + return NULL; + } + DeleteShmToList(shmList); + LOG(DEBUG) << "Ubs free local shm=" << shm.name << " length=" << shm.len << " success."; + } + + return NULL; +} + +RETURN_CODE UbsShmAddTimer(ShmList *shmList) +{ + uint32_t timerInterval = FLAGS_ub_flying_io_timeout; + struct itimerspec timeSpec = { + .it_interval = {.tv_sec = timerInterval, .tv_nsec = 0}, + .it_value = {.tv_sec = 0, .tv_nsec = 1} + }; + int timerFd = TimerStart(&timeSpec, UbsShmCallback, (void*)shmList); + if (UNLIKELY(timerFd == -1)) { + LOG(ERROR) << "Start shm timer failed."; + return HLC_ERR; + } + g_shmTimerFd = timerFd; + + return HLC_OK; +} + +RETURN_CODE InitShmTimer(ShmList **shmList) +{ + *shmList = (ShmList *)malloc(sizeof(ShmList)); + if (*shmList == NULL) { + LOG(ERROR) << "Malloc shm list failed."; + return HLC_ERR; + } + (*shmList)->head = NULL; + (*shmList)->tail = NULL; + (*shmList)->size = 0; + + if (pthread_mutex_init(&(*shmList)->shmLock, NULL) != 0) { + LOG(ERROR) << "Init shm list mutex failed."; + FREE_PTR(*shmList); + return HLC_ERR; + } + + if (UbsShmAddTimer(*shmList) == HLC_ERR) { + LOG(ERROR) << "Ubs add timer failed."; + FREE_PTR(*shmList); + return HLC_ERR; + } + return HLC_OK; +} + +RETURN_CODE DestroyShmTimer(ShmList *shmList) +{ + DeleteTimerSafe((uint32_t)g_shmTimerFd); + if (shmList == NULL) { + LOG(WARNING) << "Shm list is null."; + return HLC_ERR; + } + ShmListNode* current = shmList->head; + ShmListNode* next; + + while (current != NULL) { + next = current->next; + free(current); + current = next; + } + pthread_mutex_destroy(&shmList->shmLock); + FREE_PTR(shmList); + return HLC_OK; +} + +RETURN_CODE IsExistInShmList(ShmList *shmList, const SHM *shm) +{ + LOCK_GUARD(shmList->shmLock); + if (UNLIKELY(shmList == NULL)) { + LOG(ERROR) << "Shm list is null."; + return HLC_ERR; + } + + ShmListNode *curNode = shmList->head; + while (curNode != NULL) { + if (strcmp(curNode->shm.name, shm->name) == 0 && curNode->shm.len == shm->len) { + return HLC_OK; + } + curNode = curNode->next; + } + return HLC_ERR; +} + +RETURN_CODE AddShmToList(ShmList *shmList, SHM *shm) +{ + if (shmList == NULL || shm == NULL) { + LOG(ERROR) << "Shm list or shm is null."; + return HLC_ERR; + } + + if (IsExistInShmList(shmList, shm) == HLC_OK) { + LOG(ERROR) << "Shm name=" << shm->name << " is exist in shm list."; + return HLC_ERR; + } + + ShmListNode *newShmNode = (ShmListNode *)malloc(sizeof(ShmListNode)); + if (newShmNode == NULL) { + LOG(ERROR) << "Malloc shm node failed."; + return HLC_ERR; + } + + memcpy(&newShmNode->shm, shm, sizeof(SHM)); + LOCK_GUARD(shmList->shmLock); + newShmNode->next = NULL; + newShmNode->prev = shmList->tail; + if (shmList->tail) { + shmList->tail->next = newShmNode; + shmList->tail = newShmNode; + } else { + shmList->head = newShmNode; + shmList->tail = newShmNode; + } + shmList->size++; + LOG(DEBUG) << "Add shm to list success, shm name=" << shm->name << " size=" << shmList->size; + return HLC_OK; +} +} +} \ No newline at end of file diff --git a/src/brpc/ub/shm/shm_ubs.h b/src/brpc/ub/shm/shm_ubs.h new file mode 100644 index 0000000000..a1a9c8b289 --- /dev/null +++ b/src/brpc/ub/shm/shm_ubs.h @@ -0,0 +1,34 @@ +#ifndef BRPC_SHM_UBS_H +#define BRPC_SHM_UBS_H +namespace brpc { +namespace ub { +DECLARE_int32(ub_flying_io_timeout); + +typedef enum TagUbsLogLevel { + UBSM_LOG_DEBUG_LEVEL = 0, + UBSM_LOG_INFO_LEVEL = 1, + UBSM_LOG_WARN_LEVEL = 2, + UBSM_LOG_ERROR_LEVEL = 3, + UBSM_LOG_CLOSED_LEVEL = 4 +} UbsLogLevel; + +RETURN_CODE UbsShmLocalMalloc(SHM *shm); +RETURN_CODE UbsShmMunmap(SHM *shm); +RETURN_CODE UbsShmFree(SHM *shm); +RETURN_CODE UbsShmLocalFree(SHM *shm); +RETURN_CODE UbsShmRemoteMalloc(SHM *shm); +RETURN_CODE UbsShmRemoteFree(SHM *shm); +RETURN_CODE UbsShmInit(void); +RETURN_CODE UbsShmFini(void); +RETURN_CODE UbsShmLocalMmap(SHM *shm, int prot); +void UbsMemLoggerPrint(int level, const char *msg); + +void *UbsShmCallback(void* args); +RETURN_CODE UbsShmAddTimer(ShmList *shmList); +RETURN_CODE InitShmTimer(ShmList **shmList); +RETURN_CODE DestroyShmTimer(ShmList *shmList); +RETURN_CODE AddShmToList(ShmList *shmList, SHM *shm); +RETURN_CODE IsExistInShmList(ShmList *shmList, const SHM *shm); +} +} +#endif //BRPC_SHM_UBS_H \ No newline at end of file diff --git a/src/brpc/ub/timer/timer_mgr.cpp b/src/brpc/ub/timer/timer_mgr.cpp new file mode 100644 index 0000000000..683ce9bb62 --- /dev/null +++ b/src/brpc/ub/timer/timer_mgr.cpp @@ -0,0 +1,367 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include "brpc/ub/timer/timer_mgr.h" + +namespace brpc { +namespace ub { +int32_t g_epollFd = -1; +std::atomic g_totalTimerNum; +TimerFdCtx *g_timerFdCtxMap = NULL; +uint32_t maxSystemFd; +static pthread_t g_epollExecuteThread; +static int32_t g_timerModuleInitialized; + +static RETURN_CODE DeleteTimerInner(uint32_t fd) +{ + if (g_timerFdCtxMap == NULL) { + LOG(WARNING) << "The timer is not initialized."; + return HLC_OK; + } + + if (g_timerFdCtxMap[fd].status == TIMER_CONTEXT_NOT_USING) { + LOG(WARNING) << "The timer is not using, timerFd=" << fd; + return HLC_OK; + } + + if (epoll_ctl(g_epollFd, EPOLL_CTL_DEL, (int)fd, NULL) != 0) { + LOG(ERROR) << "Failed to delete the timer fd=" << fd << " with errno=" << errno; + } + + CloseTimerFd(fd); + atomic_fetch_sub(&g_totalTimerNum, 1); + return HLC_OK; +} + +static RETURN_CODE StartTimeEpoll(void) +{ + g_epollFd = epoll_create1(0); + if (UNLIKELY(g_epollFd == -1)) { + LOG(ERROR) << "Failed to create epoll. errno=" << errno; + return HLC_ERR; + } + + int ret = pthread_create(&g_epollExecuteThread, NULL, TimerEpoll, NULL); + if (UNLIKELY(ret != 0)) { + LOG(ERROR) << "Failed to create thread err=" << ret; + return HLC_ERR; + } + return HLC_OK; +} + +static RETURN_CODE TimerSpinLocksInit(void) +{ + if (g_timerFdCtxMap == NULL) { + LOG(ERROR) << "Timer module is not fully initialized."; + return HLC_ERR; + } + + for (uint32_t fd = 0; fd < maxSystemFd; fd++) { + int ret = pthread_spin_init(&g_timerFdCtxMap[fd].spinLock, PTHREAD_PROCESS_PRIVATE); + if (ret != EOK) { + LOG(ERROR) << "Failed to initialize spin lock for fd=" << fd; + for (uint32_t cleanupFd = 0; cleanupFd < fd; cleanupFd++) { + pthread_spin_destroy(&g_timerFdCtxMap[cleanupFd].spinLock); + } + return HLC_ERR; + } + } + return HLC_OK; +} + +static RETURN_CODE ExecuteCallback(int32_t timerFd) +{ + pthread_attr_t attr; + pthread_attr_init(&attr); + error_t err = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + if (err != 0) { + LOG(ERROR) << "Failed to set thread detach status when executing callback"; + } + + pthread_t cbThread; + err = pthread_create(&cbThread, &attr, UnifiedCallback, (void *)(&g_timerFdCtxMap[timerFd])); + if (err != 0) { + pthread_attr_destroy(&attr); + LOG(ERROR) << "Failed to create thread while executing callback due to errno=" << err; + return HLC_ERR; + } + pthread_attr_destroy(&attr); + return HLC_OK; +} + +static RETURN_CODE TimerCtxMapCompletion(void) +{ + memset(g_timerFdCtxMap, 0, + sizeof(TimerFdCtx) * maxSystemFd); + + RETURN_CODE ret = TimerSpinLocksInit(); + if (ret != HLC_OK) { + LOG(ERROR) << "Failed to init spin locks for timer module."; + return HLC_ERR; + } + return HLC_OK; +} + +RETURN_CODE TimerInit(void) +{ + if (g_timerModuleInitialized > 0) { + return HLC_OK; + } + + g_totalTimerNum.store(0); + + struct rlimit rlim; + if (getrlimit(RLIMIT_NOFILE, &rlim) != HLC_OK) { + LOG(ERROR) << "Failed to get fd"; + return HLC_ERR; + } + maxSystemFd = (uint32_t)rlim.rlim_cur; + + if (g_timerFdCtxMap == NULL) { + g_timerFdCtxMap = (TimerFdCtx *)malloc(sizeof(TimerFdCtx) * maxSystemFd); + if (UNLIKELY(!g_timerFdCtxMap)) { + LOG(ERROR) << "Fail to malloc space for timer modules. errno=%d", errno; + return HLC_ERR; + } + + RETURN_CODE ret = TimerCtxMapCompletion(); + if (ret != HLC_OK) { + LOG(ERROR) << "Failed to init main data structure of Time Module. ret=" << ret; + free(g_timerFdCtxMap); + g_timerFdCtxMap = NULL; + return HLC_ERR; + } + } + + RETURN_CODE ret = StartTimeEpoll(); + if (ret != HLC_OK) { + LOG(ERROR) << "Failed to start Timer Epoll. ret=" << ret; + if (LIKELY(g_timerFdCtxMap != NULL)) { + FREE_PTR(g_timerFdCtxMap); + } + return HLC_ERR; + } + g_timerModuleInitialized = 1; + return HLC_OK; +} + +void *UnifiedCallback(void *args) +{ + TimerFdCtx *ctx = (TimerFdCtx *)args; + if (pthread_spin_trylock(&ctx->spinLock) == 0) { + if (ctx->status == TIMER_CONTEXT_NOT_USING) { + pthread_spin_unlock(&ctx->spinLock); + return NULL; + } + ctx->status = TIMER_CONTEXT_CALLBACK_ONGOING; + ctx->cb(ctx->args); + if (ctx->periodical != 1) { + DeleteTimerInner((uint32_t)ctx->fd); + } + pthread_spin_unlock(&ctx->spinLock); + } else { + LOG_EVERY_SECOND(WARNING) << "The context status is " << ctx->status; + return NULL; + } + return NULL; +} + +void *TimerEpoll(void *args) +{ + UNREFERENCE_PARAM(args); + struct epoll_event readyEvents[MAX_TIMER]; + while (1) { + if (g_timerModuleInitialized <= 0) { + LOG(ERROR) << "The Timer module is not initialized."; + break; + } + + int32_t readyNum = epoll_wait(g_epollFd, readyEvents, MAX_TIMER, TIMER_EPOLL_WAIT_TIMEOUT); + if (UNLIKELY(readyNum == -1)) { + error_t err = errno; + if (err == EINTR) { + LOG_EVERY_SECOND(WARNING) << "Epoll wait was interrupted. errno=" << err; + continue; + } else if (err == EBADF) { + LOG(WARNING) << "The Timer module is destroyed."; + break; + } + LOG(ERROR) << "Epoll wait internal error. errno=" << err; + break; + } + + for (int32_t i = 0; i < readyNum; i++) { + struct epoll_event *event = &readyEvents[i]; + int32_t timerFd = event->data.fd; + uint64_t exp = 0; + if (read(timerFd, &exp, sizeof(exp)) < 0) { + LOG(ERROR) << "Failed to read timerfd=" << timerFd << " errno=" << errno; + continue; + } + if (TimerFdCtxValidate((uint32_t)timerFd) != HLC_OK) { + LOG(ERROR) << "Timer ctx is not valid=" << timerFd; + continue; + } + + RETURN_CODE ret = ExecuteCallback(timerFd); + if (ret != HLC_OK) { + LOG(ERROR) << "Failed execute callback ret=" << ret; + DeleteTimerInner((uint32_t)timerFd); + continue; + } + } + } + return NULL; +} + +void DeleteTimerSafe(uint32_t fd) +{ + if (g_timerFdCtxMap == NULL) { + LOG(WARNING) << "The timer is not initialized."; + return; + } + + if (pthread_spin_lock(&g_timerFdCtxMap[fd].spinLock) != 0) { + LOG(ERROR) << "Failed to lock while deleting timer=" << fd << " errno=" << errno; + return; + } + + if (g_timerFdCtxMap[fd].status == TIMER_CONTEXT_NOT_USING) { + LOG(WARNING) << "The timer is not using, timerFd=" << fd; + pthread_spin_unlock(&g_timerFdCtxMap[fd].spinLock); + return; + } + + if (epoll_ctl(g_epollFd, EPOLL_CTL_DEL, (int)fd, NULL) != 0) { + LOG(ERROR) << "Failed to delete the timer fd=" << fd << " with errno=" << errno; + } + + CloseTimerFd(fd); + atomic_fetch_sub(&g_totalTimerNum, 1); + + pthread_spin_unlock(&g_timerFdCtxMap[fd].spinLock); +} +void DeleteTimer(uint32_t fd) +{ + if (g_timerFdCtxMap == NULL) { + LOG(WARNING) << "The timer is not initialized."; + return; + } + + g_timerFdCtxMap[fd].periodical = 0; +} + +int32_t TimerStart(const struct itimerspec *time, void *(*cb)(void *), void *args) +{ + if (g_epollFd == -1) { + LOG(ERROR) << "Timer epoll encountered internal error."; + return -1; + } + + int timerFd = timerfd_create(CLOCK_MONOTONIC, 0); + if (UNLIKELY(timerFd >= (int)maxSystemFd || timerFd == -1)) { + LOG(ERROR) << "Failed to create timerfd=" << timerFd << " errno=" << errno; + return -1; + } + + g_timerFdCtxMap[timerFd].status = TIMER_CONTEXT_EPOLL_WAITING; + g_timerFdCtxMap[timerFd].cb = cb; + g_timerFdCtxMap[timerFd].args = args; + g_timerFdCtxMap[timerFd].fd = (uint32_t)timerFd; + + if (LIKELY(time->it_interval.tv_sec > 0 || time->it_interval.tv_nsec > 0)) { + g_timerFdCtxMap[timerFd].periodical = 1; + } + + struct epoll_event event = { + .events = EPOLLIN, + .data = {.fd = timerFd} + }; + + int32_t ret = epoll_ctl(g_epollFd, EPOLL_CTL_ADD, timerFd, &event); + if (UNLIKELY(ret != 0)) { + CloseTimerFd((uint32_t)timerFd); + LOG(ERROR) << "Failed to add event to epoll. errno=" << errno; + return -1; + } + + atomic_fetch_add(&g_totalTimerNum, 1); + + ret = timerfd_settime(timerFd, 0, time, NULL); + if (UNLIKELY(ret != 0)) { + if (epoll_ctl(g_epollFd, EPOLL_CTL_DEL, timerFd, NULL) != 0) { + LOG(ERROR) << "Failed to delete the timer fd=" << timerFd << " with errno=" << errno; + } + CloseTimerFd((uint32_t)timerFd); + atomic_fetch_sub(&g_totalTimerNum, 1); + LOG(ERROR) << "Failed to set timer"; + return -1; + } + + return timerFd; +} + +uint32_t GetActiveTimerNum(void) +{ + return atomic_load(&g_totalTimerNum); +} + +void CloseTimerFd(uint32_t fd) +{ + g_timerFdCtxMap[fd].cb = NULL; + g_timerFdCtxMap[fd].args = NULL; + g_timerFdCtxMap[fd].status = TIMER_CONTEXT_NOT_USING; + g_timerFdCtxMap[fd].fd = 0; + g_timerFdCtxMap[fd].periodical = 0; + if (close((int)fd) != 0) { + LOG(ERROR) << "Failed to close timer fd=" << fd << " errno=" << errno; + return; + } +} + +void TimerModuleDestroy(void) +{ + uint32_t maxFd = maxSystemFd; + if (g_timerFdCtxMap) { + for (uint32_t fd = 0; fd < maxFd; fd++) { + if (g_timerFdCtxMap[fd].status != TIMER_CONTEXT_NOT_USING) { + DeleteTimerSafe(fd); + } + } + } + close(g_epollFd); + g_epollFd = -1; + g_totalTimerNum = 0; + g_timerModuleInitialized = 0; + int32_t ret = pthread_join(g_epollExecuteThread, NULL); + if (ret != EOK) { + LOG(ERROR) << "Failed to join pthread, during destroying timer module. ret=" << ret; + return; + } +} + +RETURN_CODE TimerFdCtxValidate(uint32_t fd) +{ + if (fd >= maxSystemFd) { + LOG(ERROR) << "TimerFd=" << fd << " is out of range=" << maxSystemFd; + return HLC_ERR; + } + if (g_timerFdCtxMap[fd].status == TIMER_CONTEXT_NOT_USING) { + LOG(ERROR) << "TimerFd=" << fd << " has wrong status=" << g_timerFdCtxMap[fd].status; + return HLC_ERR; + } + if (g_timerFdCtxMap[fd].cb == NULL) { + LOG(ERROR) << "The callback is not set."; + return HLC_ERR; + } + + return HLC_OK; +} +} +} \ No newline at end of file diff --git a/src/brpc/ub/timer/timer_mgr.h b/src/brpc/ub/timer/timer_mgr.h new file mode 100644 index 0000000000..be7a646488 --- /dev/null +++ b/src/brpc/ub/timer/timer_mgr.h @@ -0,0 +1,41 @@ +#ifndef BRPC_TIMER_MGR_H +#define BRPC_TIMER_MGR_H +#include +#include +#include +#include "brpc/ub/common/common.h" + +#define MAX_TIMER 1024 +#define TIMER_EPOLL_WAIT_TIMEOUT 1000 + +namespace brpc { +namespace ub { +typedef enum { + TIMER_CONTEXT_NOT_USING, + TIMER_CONTEXT_EPOLL_WAITING, + TIMER_CONTEXT_CALLBACK_ONGOING +} TimerFdCtxStatus; + +typedef struct { + void *(*cb)(void*); + void *args; + uint32_t fd; + TimerFdCtxStatus status; + uint32_t periodical; + pthread_spinlock_t spinLock; +} TimerFdCtx; + +RETURN_CODE TimerInit(void); +void TimerModuleDestroy(void); +void *UnifiedCallback(void *args); +void *TimerEpoll(void *args); +int32_t TimerStart(const struct itimerspec *time, void *(*cb)(void *), void *args); +uint32_t GetActiveTimerNum(void); +void CloseTimerFd(uint32_t fd); + +void DeleteTimerSafe(uint32_t fd); +void DeleteTimer(uint32_t fd); +RETURN_CODE TimerFdCtxValidate(uint32_t fd); +} +} +#endif //BRPC_TIMER_MGR_H \ No newline at end of file diff --git a/src/brpc/ub/ub_endpoint.cpp b/src/brpc/ub/ub_endpoint.cpp new file mode 100644 index 0000000000..fe8da1d6c1 --- /dev/null +++ b/src/brpc/ub/ub_endpoint.cpp @@ -0,0 +1,878 @@ +#if BRPC_WITH_UBRING + +#include +#include "butil/fd_utility.h" +#include "butil/logging.h" // CHECK, LOG +#include "butil/sys_byteorder.h" // HostToNet,NetToHost +#include "bthread/bthread.h" +#include "brpc/errno.pb.h" +#include "brpc/event_dispatcher.h" +#include "brpc/input_messenger.h" +#include "brpc/socket.h" +#include "brpc/reloadable_flags.h" +#include "brpc/ub/ub_helper.h" +#include "brpc/ub/ub_endpoint.h" +#include "brpc/ub_transport.h" +#include "brpc/ub/ubr_trx.h" + +DECLARE_int32(task_group_ntags); + +namespace brpc { +DECLARE_bool(log_connection_close); +namespace ub { + +extern bool g_skip_ub_init; +DEFINE_int32(data_queue_size, 4, "data queue size for UB"); +DEFINE_bool(ub_trace_verbose, false, "Print log message verbosely"); +BRPC_VALIDATE_GFLAG(ub_trace_verbose, brpc::PassValidate); +DEFINE_int32(ub_poller_num, 1, "Poller number in ub polling mode."); +DEFINE_bool(ub_poller_yield, false, "Yield thread in RDMA polling mode."); +DEFINE_bool(ub_edisp_unsched, false, "Disable event dispatcher schedule"); +DEFINE_bool(ub_disable_bthread, false, "Disable bthread in RDMA"); + +static const size_t MIN_ONCE_READ = 4096; +static const size_t MAX_ONCE_READ = 524288; +static const size_t IOBUF_IOV_MAX = 256; + +static const char* MAGIC_STR = "UB"; +static const size_t MAGIC_STR_LEN = 2; +static const size_t HELLO_MSG_LEN_MIN = 64; +static const size_t ACK_MSG_LEN = 4; +static uint16_t g_ub_hello_msg_len = 64; +static uint16_t g_ub_hello_version = 2; +static uint16_t g_ub_impl_version = 1; + +static const uint32_t ACK_MSG_UB_OK = 0x1; + +static butil::Mutex* g_rdma_resource_mutex = NULL; + +struct HelloMessage { + void Serialize(void* data) const; + void Deserialize(void* data); + + uint16_t msg_len; + uint16_t hello_ver; + uint16_t impl_ver; + uint64_t len; + char shm_name[SHM_MAX_NAME_BUFF_LEN]; +}; + +void HelloMessage::Serialize(void* data) const { + uint16_t* current_pos = (uint16_t*)data; + *(current_pos++) = butil::HostToNet16(msg_len); + *(current_pos++) = butil::HostToNet16(hello_ver); + *(current_pos++) = butil::HostToNet16(impl_ver); + uint64_t* len_pos = (uint64_t*)current_pos; + *len_pos = butil::HostToNet64(len); + current_pos += 4; + memcpy(current_pos, shm_name, SHM_MAX_NAME_BUFF_LEN); +} + +void HelloMessage::Deserialize(void* data) { + uint16_t* current_pos = (uint16_t*)data; + msg_len = butil::NetToHost16(*current_pos++); + hello_ver = butil::NetToHost16(*current_pos++); + impl_ver = butil::NetToHost16(*current_pos++); + len = butil::NetToHost64(*(uint64_t*)current_pos); + current_pos += 4; // move forward 4 Bytes + memcpy(shm_name, current_pos, SHM_MAX_NAME_BUFF_LEN); +} + +UBShmEndpoint::UBShmEndpoint(Socket* s) + : _socket(s) + , _ub_ring(nullptr) + , _cq_sid(INVALID_SOCKET_ID) +{ + _read_butex = bthread::butex_create_checked>(); +} + +UBShmEndpoint::~UBShmEndpoint() { + Reset(); + bthread::butex_destroy(_read_butex); +} + +void UBShmEndpoint::Reset() { + DeallocateResources(); + + delete _ub_ring; + _ub_ring = nullptr; + _cq_sid = INVALID_SOCKET_ID; +} + +void UBConnect::StartConnect(const Socket* socket, + void (*done)(int err, void* data), + void* data) { + auto* ub_transport = static_cast(socket->_transport.get()); + CHECK(ub_transport->_ub_ep != NULL); + SocketUniquePtr s; + if (Socket::Address(socket->id(), &s) != 0) { + return; + } + if (!IsUBAvailable()) { + ub_transport->_ub_ep->_state = UBShmEndpoint::FALLBACK_TCP; + ub_transport->_ub_state = UBShmTransport::UB_OFF; + done(0, data); + return; + } + _done = done; + _data = data; + bthread_t tid; + bthread_attr_t attr = BTHREAD_ATTR_NORMAL; + bthread_attr_set_name(&attr, "UBProcessHandshakeAtClient"); + if (bthread_start_background(&tid, &attr, + UBShmEndpoint::ProcessHandshakeAtClient, ub_transport->_ub_ep) < 0) { + LOG(FATAL) << "Fail to start handshake bthread"; + Run(); + } else { + s.release(); + } +} + +void UBConnect::StopConnect(Socket* socket) { } + +void UBConnect::Run() { + _done(errno, _data); +} + +static void TryReadOnTcpDuringRdmaEst(Socket* s) { + int progress = Socket::PROGRESS_INIT; + while (true) { + uint8_t tmp; + ssize_t nr = read(s->fd(), &tmp, 1); + if (nr < 0) { + if (errno != EAGAIN) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to read from " << s; + s->SetFailed(saved_errno, "Fail to read from %s: %s", + s->description().c_str(), berror(saved_errno)); + return; + } + if (!s->MoreReadEvents(&progress)) { + break; + } + } else if (nr == 0) { + s->SetEOF(); + return; + } else { + LOG(WARNING) << "Read unexpected data from " << s; + s->SetFailed(EPROTO, "Read unexpected data from %s", + s->description().c_str()); + return; + } + } +} + +void UBShmEndpoint::OnNewDataFromTcp(Socket* m) { + auto* ub_transport = static_cast(m->_transport.get()); + UBShmEndpoint* ep = ub_transport->GetUBShmEp(); + CHECK(ep != NULL); + + int progress = Socket::PROGRESS_INIT; + while (true) { + if (ep->_state == UNINIT) { + if (!m->CreatedByConnect()) { + if (!IsUBAvailable()) { + ep->_state = FALLBACK_TCP; + ub_transport->_ub_state = UBShmTransport::UB_OFF; + continue; + } + bthread_t tid; + ep->_state = S_HELLO_WAIT; + SocketUniquePtr s; + m->ReAddress(&s); + bthread_attr_t attr = BTHREAD_ATTR_NORMAL; + bthread_attr_set_name(&attr, "UBProcessHandshakeAtServer"); + if (bthread_start_background(&tid, &attr, + ProcessHandshakeAtServer, ep) < 0) { + ep->_state = UNINIT; + LOG(FATAL) << "Fail to start handshake bthread"; + } else { + s.release(); + } + } else { + // The connection may be closed or reset before the client + // starts handshake. This will be handled by client handshake. + // Ignore the exception here. + } + } else if (ep->_state < ESTABLISHED) { // during handshake + ep->_read_butex->fetch_add(1, butil::memory_order_release); + bthread::butex_wake(ep->_read_butex); + } else if (ep->_state == FALLBACK_TCP){ // handshake finishes + InputMessenger::OnNewMessages(m); + return; + } else if (ep->_state == ESTABLISHED) { + TryReadOnTcpDuringRdmaEst(ep->_socket); + return; + } + if (!m->MoreReadEvents(&progress)) { + break; + } + } +} +bool HelloNegotiationValid(HelloMessage& msg) { + if (msg.hello_ver == g_ub_hello_version && + msg.impl_ver == g_ub_impl_version) { + // This can be modified for future compatibility + return true; + } + return false; +} + +static const int WAIT_TIMEOUT_MS = 50; + +int UBShmEndpoint::ReadFromFd(void* data, size_t len) { + CHECK(data != NULL); + int nr = 0; + size_t received = 0; + do { + const int expected_val = _read_butex->load(butil::memory_order_acquire); + const timespec duetime = butil::milliseconds_from_now(WAIT_TIMEOUT_MS); + nr = read(_socket->fd(), (uint8_t*)data + received, len - received); + if (nr < 0) { + if (errno == EAGAIN) { + if (bthread::butex_wait(_read_butex, expected_val, &duetime) < 0) { + if (errno != EWOULDBLOCK && errno != ETIMEDOUT) { + return -1; + } + } + } else { + return -1; + } + } else if (nr == 0) { + errno = EEOF; + return -1; + } else { + received += nr; + } + } while (received < len); + return 0; +} + +int UBShmEndpoint::WriteToFd(void* data, size_t len) { + CHECK(data != NULL); + int nw = 0; + size_t written = 0; + do { + const timespec duetime = butil::milliseconds_from_now(WAIT_TIMEOUT_MS); + nw = write(_socket->fd(), (uint8_t*)data + written, len - written); + if (nw < 0) { + if (errno == EAGAIN) { + if (_socket->WaitEpollOut(_socket->fd(), true, &duetime) < 0) { + if (errno != ETIMEDOUT) { + return -1; + } + } + } else { + return -1; + } + } else { + written += nw; + } + } while (written < len); + return 0; +} + +inline void UBShmEndpoint::TryReadOnTcp() { + if (_socket->_nevent.fetch_add(1, butil::memory_order_acq_rel) == 0) { + if (_state == FALLBACK_TCP) { + InputMessenger::OnNewMessages(_socket); + } else if (_state == ESTABLISHED) { + TryReadOnTcpDuringRdmaEst(_socket); + } + } +} + +void* UBShmEndpoint::ProcessHandshakeAtClient(void* arg) { + UBShmEndpoint* ep = static_cast(arg); + SocketUniquePtr s(ep->_socket); + UBConnect::RunGuard rg((UBConnect*)s->_app_connect.get()); + + LOG_IF(INFO, FLAGS_ub_trace_verbose) + << "Start handshake on " << s->_local_side; + + uint8_t data[g_ub_hello_msg_len]; + + ep->_state = C_ALLOC_SHM; + auto* ub_transport = static_cast(s->_transport.get()); + size_t local_shm_len = (size_t)(FLAGS_data_queue_size) * MB_TO_BYTE; + SHM local_trx_shm = {NULL, local_shm_len, 0, {0}, (uint32_t)s->fd()}; + const char* shm_name = butil::endpoint2str(s->local_side()).c_str(); + if (ep->AllocateClientResources(&local_trx_shm, shm_name) < 0) { + LOG(WARNING) << "Fallback to tcp:" << s->description(); + ub_transport->_ub_state = UBShmTransport::UB_OFF; + ep->_state = FALLBACK_TCP; + return NULL; + } + + ep->_state = C_HELLO_SEND; + HelloMessage local_msg; + local_msg.msg_len = g_ub_hello_msg_len; + local_msg.hello_ver = g_ub_hello_version; + local_msg.impl_ver = g_ub_impl_version; + local_msg.len = local_shm_len; + memcpy(local_msg.shm_name, local_trx_shm.name, SHM_MAX_NAME_BUFF_LEN); + memcpy(data, MAGIC_STR, MAGIC_STR_LEN); + local_msg.Serialize((char*)data + MAGIC_STR_LEN); + if (ep->WriteToFd(data, g_ub_hello_msg_len) < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to send hello message to server:" << s->description(); + s->SetFailed(saved_errno, "Fail to complete rdma handshake from %s: %s", + s->description().c_str(), berror(saved_errno)); + ep->_state = FAILED; + return NULL; + } + + ep->_state = C_HELLO_WAIT; + if (ep->ReadFromFd(data, MAGIC_STR_LEN) < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to get hello message from server:" << s->description(); + s->SetFailed(saved_errno, "Fail to complete rdma handshake from %s: %s", + s->description().c_str(), berror(saved_errno)); + ep->_state = FAILED; + return NULL; + } + if (memcmp(data, MAGIC_STR, MAGIC_STR_LEN) != 0) { + LOG(WARNING) << "Read unexpected data during handshake:" << s->description(); + s->SetFailed(EPROTO, "Fail to complete rdma handshake from %s: %s", + s->description().c_str(), berror(EPROTO)); + ep->_state = FAILED; + return NULL; + } + + if (ep->ReadFromFd(data, HELLO_MSG_LEN_MIN - MAGIC_STR_LEN) < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to get Hello Message from server:" << s->description(); + s->SetFailed(saved_errno, "Fail to complete rdma handshake from %s: %s", + s->description().c_str(), berror(saved_errno)); + ep->_state = FAILED; + return NULL; + } + HelloMessage remote_msg; + remote_msg.Deserialize(data); + if (remote_msg.msg_len < HELLO_MSG_LEN_MIN) { + LOG(WARNING) << "Fail to parse Hello Message length from server:" + << s->description(); + s->SetFailed(EPROTO, "Fail to complete rdma handshake from %s: %s", + s->description().c_str(), berror(EPROTO)); + ep->_state = FAILED; + return NULL; + } + + if (remote_msg.msg_len > HELLO_MSG_LEN_MIN) { + // TODO: Read Hello Message customized data + // Just for future use, should not happen now + } + + if (!HelloNegotiationValid(remote_msg)) { + LOG(WARNING) << "Fail to negotiate with server, fallback to tcp:" + << s->description(); + ub_transport->_ub_state = UBShmTransport::UB_OFF; + } else { + ep->_state = C_MAP_REMOTE_SHM; + if (ep->_ub_ring->UbrMapRemoteShm(&local_trx_shm, shm_name) < 0) { + LOG(WARNING) << "Fail to map the remote shm, fallback to tcp:" << s->description(); + ub_transport->_ub_state = UBShmTransport::UB_OFF; + } else { + ub_transport->_ub_state = UBShmTransport::UB_ON; + } + } + + ep->_state = C_ACK_SEND; + uint32_t flags = 0; + if (ub_transport->_ub_state != UBShmTransport::UB_OFF) { + flags |= ACK_MSG_UB_OK; + } + uint32_t* tmp = (uint32_t*)data; + *tmp = butil::HostToNet32(flags); + if (ep->WriteToFd(data, ACK_MSG_LEN) < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to send Ack Message to server:" << s->description(); + s->SetFailed(saved_errno, "Fail to complete rdma handshake from %s: %s", + s->description().c_str(), berror(saved_errno)); + ep->_state = FAILED; + return NULL; + } + + if (ub_transport->_ub_state == UBShmTransport::UB_ON) { + ep->_state = ESTABLISHED; + LOG_IF(INFO, FLAGS_ub_trace_verbose) + << "Client handshake ends (use rdma) on " << s->description(); + } else { + ep->_state = FALLBACK_TCP; + LOG_IF(INFO, FLAGS_ub_trace_verbose) + << "Client handshake ends (use tcp) on " << s->description(); + } + + errno = 0; + + return NULL; +} + +void* UBShmEndpoint::ProcessHandshakeAtServer(void* arg) { + UBShmEndpoint* ep = static_cast(arg); + SocketUniquePtr s(ep->_socket); + + LOG_IF(INFO, FLAGS_ub_trace_verbose) + << "Start handshake on " << s->description(); + + uint8_t data[g_ub_hello_msg_len]; + + ep->_state = S_HELLO_WAIT; + if (ep->ReadFromFd(data, MAGIC_STR_LEN) < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to read Hello Message from client:" << s->description() << " " << s->_remote_side; + s->SetFailed(saved_errno, "Fail to complete rdma handshake from %s: %s", + s->description().c_str(), berror(saved_errno)); + ep->_state = FAILED; + return NULL; + } + auto* ub_transport = static_cast(s->_transport.get()); + if (memcmp(data, MAGIC_STR, MAGIC_STR_LEN) != 0) { + LOG_IF(INFO, FLAGS_ub_trace_verbose) << "It seems that the " + << "client does not use RDMA, fallback to TCP:" + << s->description(); + s->_read_buf.append(data, MAGIC_STR_LEN); + ep->_state = FALLBACK_TCP; + ub_transport->_ub_state = UBShmTransport::UB_OFF; + ep->TryReadOnTcp(); + return NULL; + } + + if (ep->ReadFromFd(data, g_ub_hello_msg_len - MAGIC_STR_LEN) < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to read Hello Message from client:" << s->description(); + s->SetFailed(saved_errno, "Fail to complete rdma handshake from %s: %s", + s->description().c_str(), berror(saved_errno)); + ep->_state = FAILED; + return NULL; + } + + HelloMessage remote_msg; + remote_msg.Deserialize(data); + if (remote_msg.msg_len < HELLO_MSG_LEN_MIN) { + LOG(WARNING) << "Fail to parse Hello Message length from client:" + << s->description(); + s->SetFailed(EPROTO, "Fail to complete rdma handshake from %s: %s", + s->description().c_str(), berror(EPROTO)); + ep->_state = FAILED; + return NULL; + } + if (remote_msg.msg_len > HELLO_MSG_LEN_MIN) { + // TODO: Read Hello Message customized header + // Just for future use, should not happen now + } + + if (!HelloNegotiationValid(remote_msg)) { + LOG(WARNING) << "Fail to negotiate with client, fallback to tcp:" + << s->description(); + ub_transport->_ub_state = UBShmTransport::UB_OFF; + } else { + ep->_state = S_ALLOC_SHM; + SHM remote_trx_shm = {NULL, remote_msg.len, 0, {0}, (uint8_t)ep->_socket->fd()}; + strncpy(remote_trx_shm.name, remote_msg.shm_name, SHM_MAX_NAME_BUFF_LEN); + + size_t local_shm_len = (size_t)(FLAGS_data_queue_size) * MB_TO_BYTE; + // server端共享内存名称 + SHM local_trx_shm = {NULL, local_shm_len, 0, {0}, (uint8_t)ep->_socket->fd()}; + char clientName[SHM_MAX_NAME_BUFF_LEN]; + strncpy(clientName, remote_msg.shm_name, SHM_MAX_NAME_BUFF_LEN); + + char *clientIpPort = strrchr(clientName, '_'); + if (clientIpPort != NULL) { + *clientIpPort = '\0'; + } + int result = snprintf(local_trx_shm.name, SHM_MAX_NAME_BUFF_LEN, "%s_%s", + clientName, SERVER_SHM_NAME_SUFFIX); + if (UNLIKELY(result < 0)) { + LOG(WARNING) << "Copy client shared memory name failed, ret=" << result; + ub_transport->_ub_state = UBShmTransport::UB_OFF; + } + if (result >= 0 && ep->AllocateServerResources(&remote_trx_shm, &local_trx_shm) < 0) { + LOG(WARNING) << "Fail to allocate ub resources, fallback to tcp:" + << s->description(); + ub_transport->_ub_state = UBShmTransport::UB_OFF; + } + } + + ep->_state = S_HELLO_SEND; + HelloMessage local_msg; + local_msg.msg_len = g_ub_hello_msg_len; + if (ub_transport->_ub_state == UBShmTransport::UB_OFF) { + local_msg.impl_ver = 0; + local_msg.hello_ver = 0; + } else { + local_msg.hello_ver = g_ub_hello_version; + local_msg.impl_ver = g_ub_impl_version; + local_msg.len = (FLAGS_data_queue_size) * MB_TO_BYTE; + memcpy(local_msg.shm_name, remote_msg.shm_name, SHM_MAX_NAME_BUFF_LEN); + } + memcpy(data, MAGIC_STR, MAGIC_STR_LEN); + local_msg.Serialize((char*)data + MAGIC_STR_LEN); + if (ep->WriteToFd(data, g_ub_hello_msg_len) < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to send Hello Message to client:" << s->description(); + s->SetFailed(saved_errno, "Fail to complete ub handshake from %s: %s", + s->description().c_str(), berror(saved_errno)); + ep->_state = FAILED; + return NULL; + } + + ep->_state = S_ACK_WAIT; + if (ep->ReadFromFd(data, ACK_MSG_LEN) < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to read ack message from client:" << s->description(); + s->SetFailed(saved_errno, "Fail to complete rdma handshake from %s: %s", + s->description().c_str(), berror(saved_errno)); + ep->_state = FAILED; + return NULL; + } + + uint32_t* tmp = (uint32_t*)data; + uint32_t flags = butil::NetToHost32(*tmp); + if (flags & ACK_MSG_UB_OK) { + if (ub_transport->_ub_state == UBShmTransport::UB_OFF) { + LOG(WARNING) << "Fail to parse Hello Message length from client:" + << s->description(); + s->SetFailed(EPROTO, "Fail to complete ub handshake from %s: %s", + s->description().c_str(), berror(EPROTO)); + ep->_state = FAILED; + return NULL; + } else { + ub_transport->_ub_state = UBShmTransport::UB_ON; + ep->_state = ESTABLISHED; + LOG_IF(INFO, FLAGS_ub_trace_verbose) + << "Server handshake ends (use rdma) on " << s->description(); + } + } else { + ub_transport->_ub_state = UBShmTransport::UB_OFF; + ep->_state = FALLBACK_TCP; + LOG_IF(INFO, FLAGS_ub_trace_verbose) + << "Server handshake ends (use tcp) on " << s->description(); + } + ep->TryReadOnTcp(); + + return NULL; +} + +bool UBShmEndpoint::IsWritable() const { + if (BAIDU_UNLIKELY(g_skip_ub_init)) { + // Just for UT + return false; + } + auto ret = _ub_ring->IsUbrTrxWriteable(EPOLLET); + if (ret == 0) { + return true; + } + return false; +} + +ssize_t UBShmEndpoint::CutFromIOBufList(butil::IOBuf** from, size_t ndata) { + if (BAIDU_UNLIKELY(g_skip_ub_init)) { + // Just for UT + errno = EAGAIN; + return -1; + } + if (BAIDU_UNLIKELY(ndata == 0)) { + return 0; + } + struct iovec vec[IOBUF_IOV_MAX]; + size_t nvec = 0; + for (size_t i = 0; i < ndata; ++i) { + const butil::IOBuf* p = from[i]; + const size_t nref = p->_ref_num(); + for (size_t j = 0; j < nref && nvec < IOBUF_IOV_MAX; ++j, ++nvec) { + butil::IOBuf::BlockRef const& r = p->_ref_at(j); + vec[nvec].iov_base = r.block->data + r.offset; + vec[nvec].iov_len = r.length; + } + } + + ssize_t nw = 0; + nw = _ub_ring->UbrTrxWritev(vec, nvec); + if (UNLIKELY(nw == -1)) { + LOG(ERROR) << "Non-blocking send msg in failed, connection has been closed."; + errno = EPIPE; + } else if (UNLIKELY(nw == HLC_RETRY)) { + errno = EAGAIN; + nw = -1; + } + if (nw <= 0) { + return nw; + } + size_t npop_all = nw; + for (size_t i = 0; i < ndata; ++i) { + npop_all -= from[i]->pop_front(npop_all); + if (npop_all == 0) { + break; + } + } + return nw; +} + +int UBShmEndpoint::AllocateClientResources(SHM* local_trx_shm, const char* shm_name) { + if (BAIDU_UNLIKELY(g_skip_ub_init)) { + // For UT + return 0; + } + + CHECK(_ub_ring == NULL); + // TODO: Pooling management + _ub_ring = new UBRing(); + + SocketOptions options; + options.user = this; + options.keytable_pool = _socket->_keytable_pool; + if (Socket::Create(options, &_cq_sid) < 0) { + PLOG(WARNING) << "Fail to create socket for cq"; + return -1; + } + int ret = _ub_ring->UbrAllocateLocalShm(local_trx_shm, shm_name); + if (ret != 0) { + return ret; + } + PollerRegisterEvent(CqSidOp::ADD, EPOLLIN); + return 0; +} + +int UBShmEndpoint::AllocateServerResources(SHM* remote_trx_shm, SHM* local_trx_shm) { + if (BAIDU_UNLIKELY(g_skip_ub_init)) { + // For UT + return 0; + } + + CHECK(_ub_ring == NULL); + // TODO: Pooling management + _ub_ring = new UBRing(); + + SocketOptions options; + options.user = this; + options.keytable_pool = _socket->_keytable_pool; + if (Socket::Create(options, &_cq_sid) < 0) { + PLOG(WARNING) << "Fail to create socket for cq"; + return -1; + } + int ret = _ub_ring->UbrAllocateServerShm(remote_trx_shm, local_trx_shm); + if (ret != 0) { + return ret; + } + // TODO mwj 是否应该在连接之后再进行轮询? + PollerRegisterEvent(CqSidOp::ADD, EPOLLIN); + return ret; +} + +void UBShmEndpoint::DeallocateResources() { + if (!_ub_ring) { + return; + } + PollerRegisterEvent(CqSidOp::REMOVE); + _ub_ring->UbrTrxClose(); + if (INVALID_SOCKET_ID != _cq_sid) { + SocketUniquePtr s; + if (Socket::Address(_cq_sid, &s) == 0) { + s->_user = NULL; + s->_fd = -1; + s->SetFailed(); + } + } +} + +void UBShmEndpoint::PollIn(UBShmEndpoint* ep, uint32_t epEvent) { + SocketUniquePtr s; + if (Socket::Address(ep->_socket->id(), &s) < 0) { + return; + } + auto* ub_transport = static_cast(s->_transport.get()); + CHECK(ep == ub_transport->_ub_ep); + + InputMessageClosure last_msg; + while (true) { + int ret = ep->_ub_ring->IsUbrTrxReadable(epEvent); + if (ret < 0) { + return; + } + + bool read_eof = false; + while (!read_eof) { + const int64_t received_us = butil::cpuwide_time_us(); + const int64_t base_realtime = butil::gettimeofday_us() - received_us; + + size_t once_read = s->_avg_msg_size * 16; + if (once_read < MIN_ONCE_READ) { + once_read = MIN_ONCE_READ; + } else if (once_read > MAX_ONCE_READ) { + once_read = MAX_ONCE_READ; + } + + const ssize_t nr = s->_read_buf.pappend_from_ub_ring(ep->_ub_ring, once_read); + if (nr <= 0) { + if (0 == nr) { + // Set `read_eof' flag and proceed to feed EOF into `Protocol' + // (implied by m->_read_buf.empty), which may produce a new + // `InputMessageBase' under some protocols such as HTTP + LOG_IF(WARNING, FLAGS_log_connection_close) << *s << " was closed by remote side"; + read_eof = true; + } else if (errno != EAGAIN) { + if (errno == EINTR) { + continue; + } + const int saved_errno = errno; + PLOG(WARNING) << "Fail to read from " << *s; + s->SetFailed(saved_errno, "Fail to read from %s: %s", + s->description().c_str(), berror(saved_errno)); + return; + } else { + return; + } + } + + InputMessenger* messenger = static_cast(s->user()); + if (messenger->ProcessNewMessage(s.get(), nr, read_eof, received_us, + base_realtime, last_msg) < 0) { + return; + } + } + + if (read_eof) { + s->SetEOF(); + } + } +} + +void UBShmEndpoint::PollOut(UBShmEndpoint* ep, uint32_t epEvent) { + SocketUniquePtr s; + if (Socket::Address(ep->_socket->id(), &s) < 0) { + return; + } + auto* ub_transport = static_cast(s->_transport.get()); + CHECK(ep == ub_transport->_ub_ep); + if (ep->IsWritable()) { + ep->_socket->WakeAsEpollOut(); + } + +} + +int UBShmEndpoint::GlobalInitialize() { + g_rdma_resource_mutex = new butil::Mutex; + _poller_groups = std::vector(FLAGS_task_group_ntags); + return 0; +} + +void UBShmEndpoint::GlobalRelease() { + for (int i = 0; i < FLAGS_task_group_ntags; ++i) { + PollingModeRelease(i); + } +} + +std::vector UBShmEndpoint::_poller_groups; + +int UBShmEndpoint::PollingModeInitialize(bthread_tag_t tag, + std::function callback, + std::function init_fn, + std::function release_fn) { + auto& group = _poller_groups[tag]; + auto& pollers = group.pollers; + auto& running = group.running; + bool expected = false; + if (!running.compare_exchange_strong(expected, true)) { + return 0; + } + struct FnArgs { + Poller* poller; + std::atomic* running; + }; + auto fn = [](void* p) -> void* { + std::unique_ptr args(static_cast(p)); + auto poller = args->poller; + auto running = args->running; + std::unordered_set cq_sids; + CqSidOp op; + + if (poller->init_fn) { + poller->init_fn(); + } + while (running->load(std::memory_order_relaxed)) { + while (poller->op_queue.Dequeue(op)) { + if (op.type == CqSidOp::ADD) { + cq_sids.emplace(op); + } else if (op.type == CqSidOp::REMOVE) { + cq_sids.erase(op); + + } else if (op.type == CqSidOp::MOD) { + cq_sids.erase(op); + cq_sids.emplace(op); + } + } + for (auto cq : cq_sids) { + SocketUniquePtr s; + if (Socket::Address(cq.sid, &s) < 0) { + continue; + } + UBShmEndpoint* ep = static_cast(s->user()); + if (!ep) { + continue; + } + + if (cq.event & EPOLLIN) { + PollIn(ep, cq.event); + } + + if (cq.event & EPOLLOUT) { + PollOut(ep, cq.event); + } + } + if (poller->callback) { + poller->callback(); + } + if (FLAGS_ub_poller_yield) { + bthread_yield(); + } + } + + if (poller->release_fn) { + poller->release_fn(); + } + + return nullptr; + }; + for (int i = 0; i < FLAGS_ub_poller_num; ++i) { + auto args = new FnArgs{&pollers[i], &running}; + auto attr = FLAGS_ub_disable_bthread ? BTHREAD_ATTR_PTHREAD + : BTHREAD_ATTR_NORMAL; + attr.tag = tag; + bthread_attr_set_name(&attr, "UBPolling"); + pollers[i].callback = callback; + pollers[i].init_fn = init_fn; + pollers[i].release_fn = release_fn; + auto rc = bthread_start_background(&pollers[i].tid, &attr, fn, args); + if (rc != 0) { + LOG(ERROR) << "Fail to start rdma polling bthread"; + return -1; + } + } + return 0; +} + +void UBShmEndpoint::PollingModeRelease(bthread_tag_t tag) { + auto& group = _poller_groups[tag]; + auto& pollers = group.pollers; + auto& running = group.running; + running.store(false, std::memory_order_relaxed); + for (int i = 0; i < FLAGS_ub_poller_num; ++i) { + bthread_join(pollers[i].tid, NULL); + } +} + +void UBShmEndpoint::PollerRegisterEvent(CqSidOp::OpType op, uint32_t events) { + auto index = butil::fmix32(_cq_sid) % FLAGS_ub_poller_num; + auto& group = _poller_groups[bthread_self_tag()]; + auto& pollers = group.pollers; + auto& poller = pollers[index]; + if (INVALID_SOCKET_ID != _cq_sid) { + poller.op_queue.Enqueue(CqSidOp{_cq_sid, events, op}); + } +} + +} // namespace ub +} // namespace brpc + +#endif // if BRPC_WITH_UBRING diff --git a/src/brpc/ub/ub_endpoint.h b/src/brpc/ub/ub_endpoint.h new file mode 100644 index 0000000000..4c7be8c94c --- /dev/null +++ b/src/brpc/ub/ub_endpoint.h @@ -0,0 +1,221 @@ +// +// Created by z00926396 on 2026/4/11. +// + +#ifndef BRPC_UB_ENDPOINT_H +#define BRPC_UB_ENDPOINT_H + +#if BRPC_WITH_UBRING + +#include +#include +#include +#include +#include +#include +#include "butil/atomicops.h" +#include "butil/iobuf.h" +#include "butil/macros.h" +#include "butil/containers/mpsc_queue.h" +#include "brpc/socket.h" +#include "brpc/ub/ub_helper.h" +#include "brpc/ub/ub_ring.h" + + +namespace brpc { +class Socket; +namespace ub { + +DECLARE_int32(ub_poller_num); +DECLARE_bool(ub_edisp_unsched); +DECLARE_bool(ub_disable_bthread); + +class UBConnect : public AppConnect { +public: + void StartConnect(const Socket* socket, + void (*done)(int err, void* data), void* data) override; + void StopConnect(Socket*) override; + struct RunGuard { + RunGuard(UBConnect* rc) { this_rc = rc; } + ~RunGuard() { if (this_rc) this_rc->Run(); } + UBConnect* this_rc; + }; + +private: + void Run(); + void (*_done)(int, void*){NULL}; + void* _data{NULL}; +}; + +class BAIDU_CACHELINE_ALIGNMENT UBShmEndpoint : public SocketUser { +friend class UBConnect; +friend class Socket; +public: + explicit UBShmEndpoint(Socket* s); + ~UBShmEndpoint() override; + + // Global initialization + // Return 0 if success, -1 if failed and errno set + static int GlobalInitialize(); + + static void GlobalRelease(); + + // Reset the endpoint (for next use) + void Reset(); + + // Cut data from the given IOBuf list and use RDMA to send + // Return bytes cut if success, -1 if failed and errno set + ssize_t CutFromIOBufList(butil::IOBuf** data, size_t ndata); + + // Whether the endpoint can send more data + bool IsWritable() const; + + void PollerRegisterEpollOut(bool pollin) { + uint32_t events = EPOLLOUT | EPOLLET; + if (pollin) { + PollerRegisterEvent(CqSidOp::MOD, events | EPOLLIN); + return; + } + PollerRegisterEvent(CqSidOp::ADD, events); + } + + void PollerUnRegisterEpollOut(bool pollin) { + uint32_t events = EPOLLIN | EPOLLET; + if (pollin) { + PollerRegisterEvent(CqSidOp::MOD, events); + return; + } + PollerRegisterEvent(CqSidOp::REMOVE); + } + + // Callback when there is new epollin event on TCP fd + static void OnNewDataFromTcp(Socket* m); + + // Initialize polling mode + static int PollingModeInitialize(bthread_tag_t tag, + std::function callback, + std::function init_fn, + std::function release_fn); + + static void PollingModeRelease(bthread_tag_t tag); + +private: + enum State { + UNINIT = 0x0, + C_ALLOC_SHM = 0x1, + C_HELLO_SEND = 0x2, + C_HELLO_WAIT = 0x3, + C_MAP_REMOTE_SHM = 0x4, + C_ACK_SEND = 0x5, + S_HELLO_WAIT = 0x11, + S_ALLOC_SHM = 0x12, + S_HELLO_SEND = 0x13, + S_ACK_WAIT = 0x14, + ESTABLISHED = 0x100, + FALLBACK_TCP = 0x200, + FAILED = 0x300 + }; + + // Process handshake at the client + static void* ProcessHandshakeAtClient(void* arg); + + // Process handshake at the server + static void* ProcessHandshakeAtServer(void* arg); + + // Allocate resources + // Return 0 if success, -1 if failed and errno set + int AllocateClientResources(SHM* local_trx_shm, const char* shm_name); + + int AllocateServerResources(SHM* remote_trx_shm, SHM* local_trx_shm); + + // Release resources + void DeallocateResources(); + + // Read at most len bytes from fd in _socket to data + // wait for _read_butex if encounter EAGAIN + // return -1 if encounter other errno (including EOF) + int ReadFromFd(void* data, size_t len); + + + // Write at most len bytes from data to fd in _socket + // wait for _epollout_butex if encounter EAGAIN + // return -1 if encounter other errno + int WriteToFd(void* data, size_t len); + + // Poll CQ and get the work completion + static void PollIn(UBShmEndpoint* ep, uint32_t epEvent); + + static void PollOut(UBShmEndpoint* ep, uint32_t epEvent); + + // Try to read data on TCP fd in _socket + inline void TryReadOnTcp(); + + // Not owner + Socket* _socket; + + State _state; + + // ub resource + UBRing* _ub_ring{nullptr}; + + SocketId _cq_sid; + + // butex for inform read events on TCP fd during handshake + butil::atomic *_read_butex; + + DISALLOW_COPY_AND_ASSIGN(UBShmEndpoint); + + struct CqSidOp { + enum OpType { + ADD, + REMOVE, + MOD + }; + SocketId sid; + uint32_t event; + OpType type; + }; + + struct CqSidOpHash { + std::size_t operator()(const CqSidOp& op) const { + return op.sid; + } + }; + + struct CqSidOpEqual { + bool operator()(const CqSidOp& lhs, const CqSidOp& rhs) const { + return lhs.sid == rhs.sid; + } + }; + + // Poller instance + struct BAIDU_CACHELINE_ALIGNMENT Poller { + bthread_t tid{INVALID_BTHREAD}; + butil::MPSCQueue> op_queue; + // Callback used for io_uring/spdk etc + std::function callback; + // Init and Destroy function + std::function init_fn; + std::function release_fn; + }; + // Poller group + struct BAIDU_CACHELINE_ALIGNMENT PollerGroup { + PollerGroup() : pollers(FLAGS_ub_poller_num), running(false) {} + std::vector pollers; + std::atomic running; + }; + static std::vector _poller_groups; + + void PollerRegisterEvent(CqSidOp::OpType op, uint32_t events = EPOLLET); +}; + +} // namespace ub +} // namespace brpc + +#else // if BRPC_WITH_UBRING + +class UBShmEndpoint { }; + +#endif + +#endif //BRPC_UB_ENDPOINT_H \ No newline at end of file diff --git a/src/brpc/ub/ub_helper.cpp b/src/brpc/ub/ub_helper.cpp new file mode 100644 index 0000000000..7d014ae2c4 --- /dev/null +++ b/src/brpc/ub/ub_helper.cpp @@ -0,0 +1,120 @@ +#if BRPC_WITH_UBRING + +#include // dlopen +#include +#include +#include +#include +#include "butil/logging.h" +#include "brpc/socket.h" +#include "brpc/ub/ub_endpoint.h" +#include "brpc/ub/ub_helper.h" +#include "ub_ring_manager.h" + +namespace brpc { +namespace ub { + +void* g_handle_ub = NULL; +bool g_skip_ub_init = false; + +butil::atomic g_ub_available(false); + +void GlobalRelease() { + g_ub_available.store(false, butil::memory_order_release); + UBShmEndpoint::GlobalRelease(); + UBRingManager::UbrMgrFini(); + ShmMgrFini(); +} + +static inline void ExitWithError() { + GlobalRelease(); + exit(1); +} + +static void GlobalUBInitializeOrDieImpl() { + if (BAIDU_UNLIKELY(g_skip_ub_init)) { + // Just for UT + return; + } + + if (UBRingManager::UbrMgrInit()) { + PLOG(ERROR) << "Fail to UbrMgrInit"; + ExitWithError(); + } + + if (TimerInit()) { + PLOG(ERROR) << "Fail to TimerInit"; + ExitWithError(); + } + + if (ShmMgrInit()) { + PLOG(ERROR) << "Fail to ShmMgrInit"; + ExitWithError(); + } + + if (UBShmEndpoint::GlobalInitialize() < 0) { + LOG(ERROR) << "rdma_recv_block_type incorrect " + << "(valid value: default/large/huge)"; + ExitWithError(); + } + + g_ub_available.store(true, butil::memory_order_relaxed); +} + +static pthread_once_t initialize_UB_once = PTHREAD_ONCE_INIT; + +void GlobalUBInitializeOrDie() { + if (pthread_once(&initialize_UB_once, + GlobalUBInitializeOrDieImpl) != 0) { + LOG(FATAL) << "Fail to pthread_once GlobalUBInitializeOrDie"; + exit(1); + } +} + +bool IsUBAvailable() { + return g_ub_available.load(butil::memory_order_acquire); +} + +void GlobalDisableUb() { + if (g_ub_available.exchange(false, butil::memory_order_acquire)) { + LOG(FATAL) << "ub is disabled due to some unrecoverable problem"; + } +} + +bool SupportedByUB(std::string protocol) { + if (protocol.compare("baidu_std") == 0) { + return true; + } + return false; +} + +bool InitPollingModeWithTag(bthread_tag_t tag, + std::function callback, + std::function init_fn, + std::function release_fn) { + if (UBShmEndpoint::PollingModeInitialize(tag, callback, init_fn, + release_fn) == 0) { + return true; + } + return false; +} + +} // namespace ub +} // namespace brpc + +#else + +#include +#include "butil/logging.h" + +namespace brpc { +namespace ub { +void GlobalUBInitializeOrDie() { + LOG(ERROR) << "brpc is not compiled with rdma. To enable it, please refer to " + << "https://github.com/apache/brpc/blob/master/docs/en/rdma.md"; + exit(1); +} +} +} + +#endif // if BRPC_WITH_UBRING \ No newline at end of file diff --git a/src/brpc/ub/ub_helper.h b/src/brpc/ub/ub_helper.h new file mode 100644 index 0000000000..64f844deb3 --- /dev/null +++ b/src/brpc/ub/ub_helper.h @@ -0,0 +1,49 @@ +#ifndef BRPC_UB_HELPER_H +#define BRPC_UB_HELPER_H + +#if BRPC_WITH_UBRING + +#include +#include +#include +#include "bthread/types.h" + + +namespace brpc { + namespace ub { + + void GlobalRelease(); + + void GlobalUBInitializeOrDie(); + + bool InitPollingModeWithTag(bthread_tag_t tag, + std::function callback = nullptr, + std::function init_fn = nullptr, + std::function release_fn = nullptr); + + // If the UB environment is available + bool IsUBAvailable(); + + // Disable UB in the remaining lifetime of the process + void GlobalDisableUb(); + + // If the given protocol supported by UB + bool SupportedByUB(std::string protocol); + + } // namespace ub +} // namespace brpc +#else +namespace brpc { + namespace ub { + + void GlobalRelease(); + + // Initialize UB environment + // Exit if failed + void GlobalUBInitializeOrDie(); + + } // namespace ub +} // namespace brpc +#endif // if BRPC_WITH_UBRING + +#endif //BRPC_UB_HELPER_H \ No newline at end of file diff --git a/src/brpc/ub/ub_ring.cpp b/src/brpc/ub/ub_ring.cpp new file mode 100644 index 0000000000..25b2b1b224 --- /dev/null +++ b/src/brpc/ub/ub_ring.cpp @@ -0,0 +1,1031 @@ +#include +#include +#include +#include +#include "butil/logging.h" +#include "brpc/ub/ub_ring.h" + +namespace brpc { +namespace ub { +uint32_t g_sleepTime[UBR_TASK_STEP_NUM] = {0}; +#define TIME_COVERSION 1000 +DEFINE_int32(ub_disconnect_timeout, 1, "Ubshm disconnection timeout."); +DEFINE_int32(ub_connect_timeout, 1, "Ubshm connection timeout."); +DEFINE_int32(ub_hb_timer_interval, 1, "Heartbeat timer interval."); +DEFINE_int32(ub_hb_retry_cnt, 3, "Heartbeat retry times."); +DEFINE_int32(ub_event_queue_timer_interval, 100, "Interval of the disconnection timer."); + +UBRing::UBRing() +{} +UBRing::~UBRing() +{} + +RETURN_CODE UBRing::UbrTrxMapShm(SHM *localShm, SHM *remoteShm) +{ + RETURN_CODE rc = UbrTrxMapLocalShm(localShm); + if (UNLIKELY(rc != HLC_OK)) { + LOG(ERROR) << "Trx map local shared memory failed."; + return rc; + } + rc = UbrTrxMapRemoteShm(remoteShm); + if (UNLIKELY(rc != HLC_OK)) { + LOG(ERROR) << "Trx map remote shared memory failed."; + return rc; + } + return HLC_OK; +} + +RETURN_CODE UBRing::UbrTrxClose() { + if (UNLIKELY(UbrTrxCloseCheck(_trx) != HLC_OK)) { + return HLC_ERR; + } + ((UbrEventQMsg *)_trx->ubrRx.remoteTxEventQ.addr)->flag = UBR_STATE_CLOSING; + + uint32_t disconnectTimeout = FLAGS_ub_disconnect_timeout; + uint64_t startTime = GetCurNanoSeconds(); + + if (((UbrEventQMsg *)_trx->ubrTx.localTxEventQ.addr)->flag == UBR_STATE_CONNECTED) { + ((UbrEventQMsg *)_trx->ubrTx.localTxEventQ.addr)->flag = UBR_STATE_CLOSED; + _trx->ubrTx.trxState = UBR_STATE_CLOSED; + } + + ((UbrEventQMsg *)_trx->ubrTx.remoteRxEventQ.addr)->flag = UBR_STATE_CLOSED; + while (((UbrEventQMsg *)_trx->ubrRx.localRxEventQ.addr)->flag != UBR_STATE_CLOSED) { + UbrSetSleepTask(UBR_TASK_CLOSE); + if (HasTimedOut(startTime, disconnectTimeout) != HLC_OK) { + LOG(ERROR) << "Local shm " << _trx->localShm.name + << " wait for the peer to close the connection failed."; + _trx->ubrRx.trxState = UBR_STATE_CLOSED; + ClearTrxResource(_trx, startTime, UBR_SEND_CLOSE); + return HLC_ERR_TIMEOUT; + } + usleep(1); + } + _trx->ubrRx.trxState = UBR_STATE_CLOSED; + RETURN_CODE rc; + if (UNLIKELY((rc = ClearTrxResource(_trx, startTime, UBR_SEND_CLOSE)) != HLC_OK)) { + LOG(ERROR) << "Trx close, clear trx resource failed, trx local name=" << _trx->localShm.name; + return HLC_ERR; + } + LOG(INFO) << "The peer is closed, local name=" << _trx->localShm.name; + return HLC_OK; +} + +RETURN_CODE UBRing::UbrAddCloseTimer() { + if (UNLIKELY(_trx == NULL)) { + LOG(ERROR) << "Trx add close timer failed, trx is null."; + return HLC_ERR; + } + + uint32_t eventQTimerInterval = FLAGS_ub_event_queue_timer_interval * TIME_COVERSION; + struct itimerspec timeSpec = { + .it_interval = {.tv_sec = 0, .tv_nsec = eventQTimerInterval}, + .it_value = {.tv_sec = 0, .tv_nsec = 1} + }; + int timerFd = TimerStart(&timeSpec, UbrTrxCloseCallback, (void*)_trx); + if (UNLIKELY(timerFd == -1)) { + LOG(ERROR) << "Start ubr close timer failed, trx local name=" << _trx->localShm.name; + return HLC_ERR; + } + _trx->timerFd = timerFd; + return HLC_OK; +} + +RETURN_CODE UBRing::UbrAddTimer() { + if (UNLIKELY(UbrAddCloseTimer() != HLC_OK)) { + LOG(ERROR) << "Ubr " << _trx->localShm.name << " add closed timer failed."; + return HLC_ERR; + } + + if (UNLIKELY(UbrAddHBTimer() != HLC_OK)) { + DeleteTimerSafe((uint32_t)_trx->timerFd); + LOG(ERROR) << "Ubr " << _trx->localShm.name << " add heartbeat timer failed."; + return HLC_ERR; + } + return HLC_OK; +} + +void* UBRing::UbrTrxCloseCallback(void* args) { + auto* trx = (UbrTrx*) args; + if (UNLIKELY(UBRing::UbrTrxCallbackCheck(trx) != HLC_OK)) { + return nullptr; + } + + auto* localRxEventQ = (UbrEventQMsg *)trx->ubrRx.localRxEventQ.addr; + auto* localTxEventQ = (UbrEventQMsg *)trx->ubrTx.localTxEventQ.addr; + if (localRxEventQ->flag != UBR_STATE_CLOSED || localTxEventQ->flag == UBR_STATE_CLOSED) { + return nullptr; + } + trx->ubrRx.trxState = UBR_STATE_CLOSED; + int fd = (int)trx->localShm.fd; + do { + if (ATOMIC_LOAD(trx->closeCnt) == 0) { + LOG(ERROR) << "Trx close callback failed, exist other closing call, name=" << trx->localShm.name; + break; + } + ATOMIC_SUB(trx->closeCnt, 1); + + uint64_t startTime = GetCurNanoSeconds(); + + if (localTxEventQ->flag == UBR_STATE_CONNECTED || ATOMIC_LOAD(trx->closeCnt) == 1) { + localTxEventQ->flag = UBR_STATE_CLOSED; + trx->ubrTx.trxState = UBR_STATE_CLOSED; + } + UbrEventQMsg* remoteRxEventQ = (UbrEventQMsg *)trx->ubrTx.remoteRxEventQ.addr; + if (remoteRxEventQ == nullptr) { + LOG(ERROR) << "Trx close callback failed, " << trx->localShm.name << " remoteRxEventQ is NULL."; + break; + } + remoteRxEventQ->flag = UBR_STATE_CLOSED; + if (UNLIKELY(ClearTrxResource(trx, startTime, UBR_CALL_BACK_CLOSE, 1) != HLC_OK)) { + LOG(ERROR) << "Trx close callback failed, " << trx->localShm.name << " clear trx resource failed."; + break; + } + } while (0); + return nullptr; +} + +RETURN_CODE UBRing::UbrAddHBTimer() { + if (UNLIKELY(_trx == NULL)) { + LOG(ERROR) << "Trx add heartbeat timer failed, trx is null."; + return HLC_ERR; + } + + struct itimerspec timeSpec = { + .it_interval = {.tv_sec = FLAGS_ub_hb_timer_interval, .tv_nsec = 0}, + .it_value = {.tv_sec = 0, .tv_nsec = 1} + }; + int timerFd = TimerStart(&timeSpec, UbrTrxHBCallback, (void*)_trx); + if (UNLIKELY(timerFd == -1)) { + LOG(ERROR) << "Start ubr heartbeat timer failed."; + return HLC_ERR; + } + _trx->hbTimerFd = timerFd; + return HLC_OK; +} + +RETURN_CODE UBRing::UbrPassiveClearTrx(UbrTrx *trx, int fd, PASSIVE_DISC_TYPE type) { + if (UNLIKELY(UbrTrxCloseCheck(trx) != HLC_OK)) { + return HLC_ERR; + } + trx->ubrTx.trxState = UBR_STATE_CLOSED; + trx->ubrRx.trxState = UBR_STATE_CLOSED; + DeleteTimerSafe((uint32_t)trx->timerFd); + const char *typeName = NULL; + if (type == UBR_HEARTBEAT) { + DeleteTimer((uint32_t)trx->hbTimerFd); + typeName = "Trx heartbeat"; + } else if (type == UBR_UB_EVENT) { + DeleteTimerSafe((uint32_t)trx->hbTimerFd); + typeName = "Ub event callback"; + } + sleep(FLAGS_ub_flying_io_timeout); + + int rc = ShmLocalFree(&trx->remoteShm); + if (rc != HLC_OK) { + LOG(ERROR) << typeName << ", delete remote shm failed. ret=" << rc; + } + rc = ShmLocalFree(&trx->localShm); + if (rc != HLC_OK) { + LOG(ERROR) << typeName << ", delete local shm failed. ret=" << rc; + } + + UBRingManager::ReleaseUbrTrxFromMgr(trx); + return HLC_OK; +} + +void* UBRing::UbrTrxHBCallback(void* args) { + auto* trx = (UbrTrx*) args; + if (UNLIKELY(UbrTrxCallbackCheck(trx) != HLC_OK)) { + return NULL; + } + + auto* localDataStatus = (UbrDataStatusQMsg *)trx->ubrTx.localDataStatusQ.addr; + auto* remoteDataStatus = (UbrDataStatusQMsg *)trx->ubrRx.remoteDataStatusQ.addr; + if (UNLIKELY(localDataStatus == NULL || remoteDataStatus == NULL)) { + LOG(ERROR) << "Heartbeat error, datastatus is NULL."; + return NULL; + } + + if (trx->ubrTx.trxState != UBR_STATE_CONNECTED || trx->ubrRx.trxState != UBR_STATE_CONNECTED) { + LOG_EVERY_SECOND(INFO) << "Heartbeat cannot be started, wait connected state."; + return NULL; + } + + remoteDataStatus->heartBeat = 1; + if (localDataStatus->heartBeat == 1) { + localDataStatus->heartBeat = 0; + trx->ubrTx.hbRetryCnt = 0; + return NULL; + } + + ++trx->ubrTx.hbRetryCnt; + if (trx->ubrTx.hbRetryCnt <= FLAGS_ub_hb_retry_cnt) { + return NULL; + } + + int fd = (int)trx->localShm.fd; + LOG(INFO) << "Hlc heartbeat, start to clear trx resource. hbTimerFd=" << fd << ", shmName=" << trx->localShm.name; + UbrPassiveClearTrx(trx, fd, UBR_HEARTBEAT); + LOG(INFO) << "Hlc heartbeat clear trx resource finish."; + return NULL; +} + +RETURN_CODE UBRing::UbrAddAsynClearTimer(UbrTrx *trx) { + if (UNLIKELY(trx == NULL)) { + LOG(ERROR) << "Trx add close timer failed, trx is null."; + return HLC_ERR; + } + + struct itimerspec timeSpec = { + .it_interval = {.tv_sec = 0, .tv_nsec = 0}, + .it_value = {.tv_sec = FLAGS_ub_flying_io_timeout, .tv_nsec = 0} + }; + + int timerFd = TimerStart(&timeSpec, UbrAsynClearCallback, (void*)trx); + if (UNLIKELY(timerFd == -1)) { + LOG(ERROR) << "Start ubr close timer failed, trx name=%s.", trx->localShm.name; + return HLC_ERR; + } + trx->clearTimerFd = timerFd; + return HLC_OK; +} + +void *UBRing::UbrAsynClearCallback(void *args) +{ + auto* trx = (UbrTrx*) args; + if (UNLIKELY(trx == NULL)) { + LOG(ERROR) << "Trx close, trx is null."; + return NULL; + } + + if (UNLIKELY(ShmRemoteFree(&trx->remoteShm) != HLC_OK)) { + LOG(ERROR) << "Trx close, remote shm " << trx->remoteShm.name << " free failed."; + } + + if (UNLIKELY(UbrTrxFreeShm(trx) != HLC_OK)) { + LOG(ERROR) << "Trx close, wait for local shm " << trx->localShm.name << " free fail."; + } + + if (UNLIKELY(UBRingManager::ReleaseUbrTrxFromMgr(trx) != HLC_OK)) { + LOG(ERROR) << "Trx close, release shm " << trx->localShm.name << " trx failed."; + } + return NULL; +} + +int UBRing::UbrTrxSend(const void *buf, uint32_t bufLen) +{ + if (UNLIKELY(CheckTrxSendPreCheck(_trx) != HLC_OK)) { + return HLC_ERR; + } + // 1.2 计算空间 + auto *dataStatusMsg = (UbrDataStatusQMsg *)_trx->ubrTx.localDataStatusQ.addr; + auto *dataMsg = (UbrMsgFormat *)_trx->ubrTx.remoteDataQ.addr; + uint32_t cap = _trx->ubrTx.capacity; + uint32_t tail = dataStatusMsg->tail; + uint32_t remainChunkNum = + (_trx->ubrTx.writePos > tail) ? (tail + cap - _trx->ubrTx.writePos) : (tail - _trx->ubrTx.writePos); + uint32_t needMsgChunkNum = CalcUbrMsgChunkCnt(bufLen); + if (remainChunkNum < needMsgChunkNum) { + return HLC_RETRY; + } + UbrMsgFormat *msg = &(_trx->ubrTx.localMsgSpace); + uint32_t totalSendLen = 0; + uint32_t remainBufLen = bufLen; + uint8_t isLastPkt = 0; + _trx->ubrTx.outIoId++; + ((UbrEventQMsg *)_trx->ubrTx.remoteRxEventQ.addr)->ioId = _trx->ubrTx.outIoId; + while (remainBufLen > 0) { + isLastPkt = (uint8_t)(remainBufLen <= UBR_MSG_PAYLOAD_LEN); + msg->header[UBR_MSG_FLAG_INDEX] = isLastPkt ? UBR_MSG_CHUNK_EOF : UBR_MSG_CHUNK_EXIST; + msg->header[UBR_MSG_LEN_INDEX] = isLastPkt ? (uint8_t)remainBufLen : UBR_MSG_PAYLOAD_LEN; + msg->header[UBR_MSG_CUR_INDEX] = 0; + memcpy(msg->payload.inner, (const uint8_t *)buf + totalSendLen, msg->header[UBR_MSG_LEN_INDEX]); + Copy64Byte((int8_t *)&dataMsg[_trx->ubrTx.writePos], (int8_t *)msg); + _trx->ubrTx.writePos = (_trx->ubrTx.writePos + 1) % cap; + totalSendLen += msg->header[UBR_MSG_LEN_INDEX]; + remainBufLen -= msg->header[UBR_MSG_LEN_INDEX]; + } + return (int)totalSendLen; +} + +int UBRing::UbrTrxRecv(void *buf, uint32_t bufLen) +{ + RETURN_CODE rc = HLC_OK; + if (UNLIKELY((rc = CheckTrxRecvParam(_trx, buf, bufLen)) != HLC_OK)) { + return (rc == UBR_NOT_CONNECTED) ? 0 : rc; + } + UbrMsgFormat *dataMsg = (UbrMsgFormat *)_trx->ubrRx.localDataQ.addr; + uint32_t readPosEnd = _trx->ubrRx.readPos; + uint8_t flag = dataMsg[readPosEnd].header[UBR_MSG_FLAG_INDEX]; + if (flag == UBR_MSG_CHUNK_NONE) { + return HLC_RETRY; + } + return UbrTrxRecvBlockMode(static_cast(buf), bufLen); +} + +int UBRing::UbrTrxRecvBlockMode(uint8_t *dest, uint32_t bufLen) +{ + RETURN_CODE rc = HLC_OK; + if (UNLIKELY((rc = CheckTrxRecvParam(_trx, dest, bufLen)) != HLC_OK)) { + return (rc == UBR_NOT_CONNECTED) ? 0 : rc; + } + + int32_t totalCopied = 0; + int32_t remainingLen = (int32_t)bufLen; + bool notEofEncountered = true; + + UbrRx *ubrRx = &_trx->ubrRx; + UbrMsgFormat *dataMsg = (UbrMsgFormat *)ubrRx->localDataQ.addr; + bool needUpdateEpollEofPos = ubrRx->readPos == ubrRx->epEofPos; + + while (notEofEncountered && remainingLen > 0) { + if (UNLIKELY(CheckTrxRecvPreCheck(_trx) != HLC_OK)) { + return HLC_ERR; + } + UbrMsgFormat *currentChunk = &dataMsg[ubrRx->readPos]; + uint8_t flag = currentChunk->header[UBR_MSG_FLAG_INDEX]; + if (flag == UBR_MSG_CHUNK_NONE) { + continue; + } + if (flag == UBR_MSG_CHUNK_EOF) { + notEofEncountered = false; + } + uint8_t chunkMsgLen = currentChunk->header[UBR_MSG_LEN_INDEX]; + uint8_t curIndex = currentChunk->header[UBR_MSG_CUR_INDEX]; + uint8_t availableData = chunkMsgLen - curIndex; + + int32_t copyLen = (remainingLen < availableData) ? remainingLen : availableData; + memcpy(dest + totalCopied, dataMsg[ubrRx->readPos].payload.inner + curIndex, (size_t)copyLen); + totalCopied += copyLen; + remainingLen -= copyLen; + currentChunk->header[UBR_MSG_CUR_INDEX] += (uint8_t)copyLen; + if (LIKELY(currentChunk->header[UBR_MSG_CUR_INDEX] == chunkMsgLen)) { + currentChunk->header[UBR_MSG_FLAG_INDEX] = UBR_MSG_CHUNK_NONE; + UpdateDataQTail(_trx); + ubrRx->readPos = (ubrRx->readPos + 1) % ubrRx->capacity; + } + } + if (needUpdateEpollEofPos) { + ubrRx->epEofPos = ubrRx->readPos; + } + return (int)totalCopied; +} + +ssize_t UBRing::UbrTrxWritev(const struct iovec *iov, int iovcnt) +{ + if (UNLIKELY(CheckTrxSendPreCheck(_trx) != HLC_OK)) { + return HLC_ERR; + } + + size_t bufLen = 0; + for (int i = 0; i < iovcnt; i++) { + bufLen += iov[i].iov_len; + } + RETURN_CODE rc = WritevHasEnoughSpace(bufLen); + if (rc != HLC_OK) { + return rc; + } + + UbrMsgFormat *dataMsg = (UbrMsgFormat *)_trx->ubrTx.remoteDataQ.addr; + UbrMsgFormat *msg = &(_trx->ubrTx.localMsgSpace); + int curIov = 0; + size_t curIovPos = 0; + ssize_t totalSendLen = 0; + size_t pktRemainN = 0; + size_t iovRemain = 0; + size_t fulled = 0; + uint8_t isLastPkt = 0; + uint8_t curPktLen = 0; + _trx->ubrTx.outIoId++; + ((UbrEventQMsg *)_trx->ubrTx.remoteRxEventQ.addr)->ioId = _trx->ubrTx.outIoId; + while (bufLen > 0) { + isLastPkt = (uint8_t)(bufLen <= UBR_MSG_PAYLOAD_LEN); + curPktLen = isLastPkt ? (uint8_t)bufLen : UBR_MSG_PAYLOAD_LEN; + msg->header[UBR_MSG_FLAG_INDEX] = isLastPkt ? UBR_MSG_CHUNK_EOF : UBR_MSG_CHUNK_EXIST; + msg->header[UBR_MSG_LEN_INDEX] = curPktLen; + msg->header[UBR_MSG_CUR_INDEX] = 0; + pktRemainN = curPktLen; + while (curIov < iovcnt && pktRemainN > 0) { + iovRemain = (iov[curIov].iov_len - curIovPos); + fulled = iovRemain > pktRemainN ? pktRemainN : iovRemain; + memcpy((msg->payload.inner + (curPktLen - (uint8_t)pktRemainN)), + (uint8_t *)(iov[curIov].iov_base) + curIovPos, + fulled); + pktRemainN -= fulled; + curIovPos += fulled; + if (curIovPos == iov[curIov].iov_len) { + curIov++; + curIovPos = 0; + } + } + + Copy64Byte((int8_t *)&dataMsg[_trx->ubrTx.writePos], (int8_t *)msg); + _trx->ubrTx.writePos = (_trx->ubrTx.writePos + 1) % _trx->ubrTx.capacity; + totalSendLen += (ssize_t)curPktLen; + bufLen -= (int)curPktLen; + } + return totalSendLen; +} + +ssize_t UBRing::UbrTrxReadv(const struct iovec *iov, int iovcnt) +{ + RETURN_CODE rc = HLC_OK; + if (UNLIKELY((rc = CheckTrxRecvParam(_trx, iov, (uint32_t)iovcnt)) != HLC_OK)) { + return (rc == UBR_NOT_CONNECTED) ? 0 : rc; + } + UbrMsgFormat *dataMsg = (UbrMsgFormat *)_trx->ubrRx.localDataQ.addr; + uint32_t readPosEnd = _trx->ubrRx.readPos; + uint8_t flag = dataMsg[readPosEnd].header[UBR_MSG_FLAG_INDEX]; + if (flag == UBR_MSG_CHUNK_NONE) { + errno = EAGAIN; + return -1; + } + ssize_t nr = UbrTrxReadvBlockMode(iov, iovcnt); + if (UNLIKELY(nr == -1)) { + LOG(ERROR) << "Non-blocking readv msg in failed, connection has been closed."; + errno = EPIPE; + return -1; + } + return nr; +} + +ssize_t UBRing::UbrTrxReadvBlockMode(const struct iovec *iov, int iovcnt) +{ + RETURN_CODE rc = HLC_OK; + if (UNLIKELY((rc = CheckTrxRecvParam(_trx, iov, (uint32_t)iovcnt)) != HLC_OK)) { + return (rc == UBR_NOT_CONNECTED) ? 0 : rc; + } + + size_t remainBufLen = 0; + for (int i = 0; i < iovcnt; i++) { + remainBufLen += iov[i].iov_len; + } + + bool needUpdateEpollEofPos = _trx->ubrRx.readPos == _trx->ubrRx.epEofPos; + ssize_t totalRecvLen = StartReadv(_trx, iov, iovcnt, remainBufLen); + + if (needUpdateEpollEofPos) { + _trx->ubrRx.epEofPos = _trx->ubrRx.readPos; + } + return totalRecvLen; +} + +RETURN_CODE UBRing::IsUbrTrxReadable(uint32_t epEvent) +{ + if (UNLIKELY(_trx == NULL)) { + LOG(ERROR) << "The trx to be checked is NULL."; + return HLC_ERR; + } + if (UNLIKELY(_trx->localShm.addr == NULL)) { + LOG(ERROR) << "The trx localShm to be checked is NULL."; + return HLC_ERR; + } + if (UNLIKELY(_trx->ubrTx.trxState != UBR_STATE_CONNECTED)) { + // TODO mwj 这几块的日志是否需要删除 + // LOG(ERROR) << "The trx is not connected state."; + return HLC_ERR; + } + + uint64_t ioId = ((UbrEventQMsg *)_trx->ubrRx.localRxEventQ.addr)->ioId; + if ((epEvent & EPOLLET) && ioId == _trx->ubrRx.inIoId) { + return MPA_MUXER_NOT_READY; + } + + uint32_t readPosEnd = _trx->ubrRx.readPos; + if (epEvent & EPOLLET) { + readPosEnd = _trx->ubrRx.epEofPos; + } + + UbrMsgFormat *dataMsg = (UbrMsgFormat *)_trx->ubrRx.localDataQ.addr; + uint8_t flag = dataMsg[readPosEnd].header[UBR_MSG_FLAG_INDEX]; + if (flag == UBR_MSG_CHUNK_NONE) { + return MPA_MUXER_NOT_READY; + } + if (epEvent & EPOLLET) { + _trx->ubrRx.inIoId = ioId; + } + return HLC_OK; +} + +RETURN_CODE UBRing::IsUbrTrxWriteable(uint32_t epEvent) +{ + if (UNLIKELY(_trx == NULL)) { + LOG(ERROR) << "The trx to be checked is NULL."; + return HLC_ERR; + } + if (UNLIKELY(_trx->localShm.addr == NULL)) { + LOG(ERROR) << "The trx localShm to be checked is NULL."; + return HLC_ERR; + } + if (UNLIKELY((UbrEventQMsg *)_trx->ubrTx.localTxEventQ.addr == NULL)) { + LOG(ERROR) << "The trx localTxEventQ addr is NULL."; + return HLC_ERR; + } + if (UNLIKELY((UbrEventQMsg *)_trx->ubrTx.localDataStatusQ.addr == NULL)) { + LOG(ERROR) << "The trx localDataStatusQ addr is NULL."; + return HLC_ERR; + } + + if (UNLIKELY(_trx->ubrTx.trxState != UBR_STATE_CONNECTED)) { + LOG(ERROR) << "The trx is not connected state."; + return HLC_ERR; + } + + UbrDataStatusQMsg *dataStatusMsg = (UbrDataStatusQMsg *)_trx->ubrTx.localDataStatusQ.addr; + uint32_t cap = _trx->ubrTx.capacity; + uint32_t tail = dataStatusMsg->tail; + uint32_t remainChunkNum = + (_trx->ubrTx.writePos > tail) ? (tail + cap - _trx->ubrTx.writePos) : (tail - _trx->ubrTx.writePos); + if (remainChunkNum == 0) { + _trx->ubrTx.epLastCap = remainChunkNum; + return MPA_MUXER_NOT_READY; + } + + if ((epEvent & EPOLLET) && (_trx->ubrTx.epLastCap >= remainChunkNum)) { + _trx->ubrTx.epLastCap = remainChunkNum; + return MPA_MUXER_NOT_READY; + } + _trx->ubrTx.epLastCap = remainChunkNum; + return HLC_OK; +} + +RETURN_CODE UBRing::UbrSetTimeout(UbrTaskStep taskType, int timeout) +{ + if (taskType >= UBR_TASK_STEP_NUM || timeout < 0) { + LOG(ERROR) << "Set timeout failed, invalid task type."; + return HLC_ERR; + } + + g_sleepTime[taskType] = (uint32_t)timeout; + LOG(INFO) << "Set timeout success, taskType=" << taskType << ", timeout=" << timeout; + return HLC_OK; +} + +RETURN_CODE UBRing::UbrTrxFreeShm(UbrTrx *trx) +{ + if (trx == NULL) { + LOG(ERROR) << "Trx is NULL."; + return HLC_ERR; + } + + RETURN_CODE rc = HLC_OK; + rc = ShmMunmap(&trx->localShm); + if (UNLIKELY(rc != HLC_OK)) { + LOG(ERROR) << "Trx close, local unmap " << trx->localShm.name << " shm fail."; + return HLC_ERR; + } + + rc = ShmFree(&trx->localShm); + if (UNLIKELY(rc != HLC_OK)) { + if (UNLIKELY(rc == SHM_ERR_RESOURCE_ATTACHED || rc == SHM_ERR_NOT_FOUND)) { + LOG(INFO) << "Wait for " << trx->remoteShm.name << " remote free shm."; + return HLC_OK; + } + LOG(ERROR) << "Wait for " << trx->localShm.name << " local shm free fail."; + return HLC_ERR; + } + + size_t nameLen = strlen(trx->remoteShm.name); + if (!(nameLen <= 0 || nameLen > SHM_MAX_NAME_LEN || trx->remoteShm.len <= 0)) { + rc = ShmFree(&trx->remoteShm); + } + if (rc != HLC_OK) { + if (rc == SHM_ERR_RESOURCE_ATTACHED || rc == SHM_ERR_NOT_FOUND) { + LOG(INFO) << "Wait for " << trx->remoteShm.name << " remote free shm."; + return HLC_OK; + } + LOG(ERROR) << "Wait for " << trx->remoteShm.name << " remote shm free fail."; + return HLC_ERR; + } + + return HLC_OK; +} + +void UBRing::PreWriteAddr(uint8_t *addr, size_t len) +{ + if (addr == NULL) { + return; + } + + size_t i = 0; + while (i < len) { + if (i + sizeof(uint64_t) <= len) { + *(uint64_t *)(addr + i) = (uint64_t)0; + i += sizeof(uint64_t); + } else if (i + sizeof(uint32_t) < len) { + *(uint32_t *)(addr + i) = (uint32_t)0; + i += sizeof(uint32_t); + } else if (i + sizeof(uint16_t) < len) { + *(uint16_t *)(addr + i) = (uint16_t)0; + i += sizeof(uint16_t); + } else { + *(addr + i) = (uint8_t)0; + i += sizeof(uint8_t); + } + } +} + +void UBRing::PrewriteUbrTx(UbrTx *tx) +{ + if (tx == NULL) { + return; + } + PreWriteAddr(tx->remoteDataQ.addr, tx->capacity * sizeof(UbrMsgFormat)); +} + +void UBRing::PrewriteUbrRx(UbrRx *rx) +{ + if (rx == NULL) { + return; + } + PreWriteAddr(rx->localDataQ.addr, rx->capacity * sizeof(UbrMsgFormat)); +} + +RETURN_CODE UBRing::UbrTrxMapLocalShm(SHM *localShm) +{ + if (UNLIKELY(_trx == NULL)) { + LOG(ERROR) << "Trx map Shared memory failed, trx is null."; + return HLC_ERR; + } + if (UNLIKELY(localShm == NULL)) { + LOG(ERROR) << "Trx map Shared memory failed, localShm is null."; + return HLC_ERR; + } + _trx->localShm = *localShm; + _trx->ubrTx.localTxEventQ.addr = localShm->addr + TX_EVENTQ_ADDR_OFFSET; + _trx->ubrTx.localTxEventQ.len = UBR_EVENTQ_LEN; + _trx->ubrRx.localRxEventQ.addr = localShm->addr + RX_EVENTQ_ADDR_OFFSET; + _trx->ubrRx.localRxEventQ.len = UBR_EVENTQ_LEN; + _trx->ubrTx.localDataStatusQ.addr = localShm->addr + DATASTATUSQ_ADDR_OFFSET; + _trx->ubrTx.localDataStatusQ.len = UBR_DATASTATUSQ_LEN; + size_t addrAlignedOffset = Aligned64Offset(localShm->addr + DATAQ_ADDR_OFFSET); + LOG(DEBUG) << "UbrRx's localDataQ address will aligned with offset=" << addrAlignedOffset; + _trx->ubrRx.localDataQ.addr = localShm->addr + DATAQ_ADDR_OFFSET + addrAlignedOffset; + _trx->ubrRx.localDataQ.len = localShm->len - DATAQ_ADDR_OFFSET - addrAlignedOffset; + return HLC_OK; +} + +RETURN_CODE UBRing::UbrTrxMapRemoteShm(SHM *remoteShm) +{ + if (UNLIKELY(_trx == NULL)) { + LOG(ERROR) << "Trx map Shared memory failed, trx is null."; + return HLC_ERR; + } + if (UNLIKELY(remoteShm == NULL)) { + LOG(ERROR) << "Trx map Shared memory failed, remoteShm is null."; + return HLC_ERR; + } + _trx->remoteShm = *remoteShm; + _trx->ubrRx.remoteTxEventQ.addr = remoteShm->addr + TX_EVENTQ_ADDR_OFFSET; + _trx->ubrRx.remoteTxEventQ.len = UBR_EVENTQ_LEN; + _trx->ubrTx.remoteRxEventQ.addr = remoteShm->addr + RX_EVENTQ_ADDR_OFFSET; + _trx->ubrTx.remoteRxEventQ.len = UBR_EVENTQ_LEN; + _trx->ubrRx.remoteDataStatusQ.addr = remoteShm->addr + DATASTATUSQ_ADDR_OFFSET; + _trx->ubrRx.remoteDataStatusQ.len = UBR_DATASTATUSQ_LEN; + size_t addrAlignedOffset = Aligned64Offset(remoteShm->addr + DATAQ_ADDR_OFFSET); + LOG(DEBUG) << "UbrTx's remoteDataQ will aligned with offset=" << addrAlignedOffset; + _trx->ubrTx.remoteDataQ.addr = remoteShm->addr + DATAQ_ADDR_OFFSET + addrAlignedOffset; + _trx->ubrTx.remoteDataQ.len = remoteShm->len - DATAQ_ADDR_OFFSET - addrAlignedOffset; + return HLC_OK; +} + +RETURN_CODE UBRing::UbrServerTrxInit(SHM *localShm, SHM *remoteShm) +{ + RETURN_CODE rc = UbrTrxMapShm(localShm, remoteShm); + if (UNLIKELY(rc != HLC_OK)) { + LOG(ERROR) <<"Trx map shared memory failed."; + return rc; + } + + uint32_t localDataMsgCap = (uint32_t)(_trx->ubrRx.localDataQ.len / UBR_MSG_LEN); + uint32_t remoteDataMsgCap = (uint32_t)(_trx->ubrTx.remoteDataQ.len / UBR_MSG_LEN); + _trx->ubrRx.capacity = localDataMsgCap; + _trx->ubrTx.capacity = remoteDataMsgCap; + rc = UBRingManager::GetHlcDealMsgMaxCnt(_trx->ubrRx.capacity, &_trx->ubrRx.dealMsgMaxCnt); + if (UNLIKELY(rc != HLC_OK)) { + LOG(ERROR) << "Get hlc deal msg max cnt."; + return rc; + } + PrewriteUbrRx(&_trx->ubrRx); + PrewriteUbrTx(&_trx->ubrTx); + + ((UbrDataStatusQMsg *)(_trx->ubrTx.localDataStatusQ.addr))->tail = remoteDataMsgCap - 1; + ((UbrDataStatusQMsg *)(_trx->ubrRx.remoteDataStatusQ.addr))->tail = localDataMsgCap - 1; + + if (UNLIKELY(UbrAddTimer() != HLC_OK)) { + LOG(ERROR) << "Ubr add timer failed, localName=" << localShm->name; + return HLC_ERR; + } + + ((UbrDataStatusQMsg *)(_trx->ubrTx.localDataStatusQ.addr))->timeout = FLAGS_ub_connect_timeout; + ((UbrDataStatusQMsg *)(_trx->ubrRx.remoteDataStatusQ.addr))->timeout = FLAGS_ub_connect_timeout; + + ((UbrEventQMsg *)_trx->ubrTx.remoteRxEventQ.addr)->flag = UBR_STATE_CONNECTED; + ((UbrEventQMsg *)_trx->ubrRx.localRxEventQ.addr)->flag = UBR_STATE_CONNECTED; + _trx->ubrTx.trxState = UBR_STATE_CONNECTED; + _trx->ubrRx.trxState = UBR_STATE_CONNECTED; + return HLC_OK; +} + +int UBRing::UbrAllocateServerShm(SHM* remote_trx_shm, SHM* local_trx_shm) { + UbrSetSleepTask(UBR_TASK_ACCEPT_MAP_FRONT); + if (UNLIKELY((ShmRemoteMalloc(remote_trx_shm)) != HLC_OK)) { + LOG(ERROR) << "Trx apply remote shared memory failed."; + return -1; + } + + if (UNLIKELY((ShmLocalCalloc(local_trx_shm)) != HLC_OK)) { + LOG(ERROR) << "Trx apply local shared memory failed."; + return -1; + } + + UbrTrx **ubrTrxPtr = &_trx; + if (UNLIKELY((UBRingManager::AcquireUbrTrxFromMgr(ubrTrxPtr)) != HLC_OK)) { + LOG(ERROR) << "Acquire ubrtrx failed."; + ShmRemoteFree(remote_trx_shm); + ShmLocalFree(local_trx_shm); + return -1; + } + _trx->type = TCP_TRX; + if (UNLIKELY((UbrServerTrxInit(local_trx_shm, remote_trx_shm)) != HLC_OK)) { + LOG(ERROR) << "Server trx init failed."; + ShmRemoteFree(remote_trx_shm); + UbrTrxFreeShm(_trx); + UBRingManager::ReleaseUbrTrxFromMgr(_trx); + return -1; + } + return 0; +} + +int UBRing::UbrAllocateLocalShm(SHM *local_trx_shm, const char *shm_name) +{ + if (UNLIKELY((UBRingManager::AcquireUbrTrxFromMgr(&(_trx))) != HLC_OK)) { + LOG(ERROR) << "Acquire ubrtrx failed, localName=" << shm_name; + return -1; + } + + _trx->type = TCP_TRX; + if (UNLIKELY((ApplyAndMapLocalShm(local_trx_shm, shm_name)) != HLC_OK)) { + LOG(ERROR) << "Trx apply or map local shared memory failed, localName=" << shm_name; + return -1; + } + return 0; +} + +int UBRing::UbrMapRemoteShm(SHM *local_trx_shm, const char *local_name) +{ + RETURN_CODE rc = UbrMapRemoteShmAddTimer(local_trx_shm, local_name); + if (UNLIKELY(rc != HLC_OK)) { + LOG(ERROR) << "Connect Trx failed, local shm name=" << local_trx_shm->name; + return -1; + } + PrewriteUbrRx(&_trx->ubrRx); + PrewriteUbrTx(&_trx->ubrTx); + ((UbrEventQMsg *)_trx->ubrRx.remoteTxEventQ.addr)->flag = UBR_STATE_CONNECTED; + ((UbrEventQMsg *)_trx->ubrRx.localRxEventQ.addr)->flag = UBR_STATE_CONNECTED; + _trx->ubrTx.trxState = UBR_STATE_CONNECTED; + _trx->ubrRx.trxState = UBR_STATE_CONNECTED; + return 0; +} + +RETURN_CODE UBRing::UbrMapRemoteShmAddTimer(SHM *localTrxShm, const char *localName) +{ + uint64_t startTime = GetCurNanoSeconds(); + + size_t remoteServerLen = UBR_MSG_LEN * (((UbrDataStatusQMsg *)(_trx->ubrTx.localDataStatusQ.addr))->tail + 1) + + UBR_MSG_LEN * ((DATAQ_ADDR_OFFSET / UBR_MSG_LEN) + 1); + SHM remoteTrxShm = {NULL, remoteServerLen, 0, {0}, localTrxShm->fd}; + int result = snprintf(remoteTrxShm.name, + SHM_MAX_NAME_BUFF_LEN, + "%s_%s_%s", + SHM_NAME_PREFIX, + localName, + SERVER_SHM_NAME_SUFFIX); + if (UNLIKELY(result < 0)) { + LOG(ERROR) << "Copy server shared memory name failed, localName=%s, ret=%d.", localName, result; + return HLC_ERR; + } + UbrSetSleepTask(UBR_TASK_CONNECT_MAP_FRONT); + RETURN_CODE rc = ApplyAndMapRemoteShm(&remoteTrxShm); + if (UNLIKELY(rc != HLC_OK)) { + LOG(ERROR) << "Connect Trx map shared memory failed, remote shm=" << remoteTrxShm.name; + return rc; + } + + if (UNLIKELY(UbrAddTimer() != HLC_OK)) { + LOG(ERROR) << "Ubr add timer failed, localName=" << localName; + ShmRemoteFree(&remoteTrxShm); + return HLC_ERR; + } + + UbrSetSleepTask(UBR_TASK_CONNECT_MAP_AFTER); + + uint32_t timeout = ((UbrDataStatusQMsg *)(_trx->ubrTx.localDataStatusQ.addr))->timeout; + if (HasTimedOut(startTime, timeout) != HLC_OK) { + LOG(ERROR) << "Local shm " << localTrxShm->name << " wait for connect remote map timeout."; + DeleteTimerSafe((uint32_t)_trx->hbTimerFd); + DeleteTimerSafe((uint32_t)_trx->timerFd); + ShmRemoteFree(&remoteTrxShm); + return HLC_ERR_TIMEOUT; + } + + return HLC_OK; +} + +RETURN_CODE UBRing::ApplyAndMapLocalShm(SHM *localTrxShm, const char *localName) +{ + if (UNLIKELY(_trx == NULL || localTrxShm == NULL)) { + LOG(ERROR) << "Trx map Shared memory failed, trx is null, localName=" << localName; + return HLC_ERR; + } + int result = snprintf(localTrxShm->name, + SHM_MAX_NAME_BUFF_LEN, + "%s_%s_%s", + SHM_NAME_PREFIX, + localName, + CLIENT_SHM_NAME_SUFFIX); + if (UNLIKELY(result < 0)) { + LOG(ERROR) << "Copy client localTrx shared memory name failed, localName=" << localName << ", ret=" << result; + return HLC_ERR; + } + + RETURN_CODE rc = ShmLocalCalloc(localTrxShm); + if (UNLIKELY(rc != HLC_OK)) { + LOG(ERROR) << "Trx apply local shared memory failed, local shm name=" << localTrxShm->name; + if (rc == SHM_ERR_EXIST || rc == SHM_ERR_NOT_FOUND) { + rc = UBR_ERR_ADDR_IN_USE; + } + UBRingManager::ReleaseUbrTrxFromMgr(_trx); + return rc; + } + rc = UbrTrxMapLocalShm(localTrxShm); + if (UNLIKELY(rc != HLC_OK)) { + LOG(ERROR) << "Trx map local shared memory failed, local shm name=" << localTrxShm->name; + ShmLocalFree(localTrxShm); + UBRingManager::ReleaseUbrTrxFromMgr(_trx); + return rc; + } + ((UbrDataStatusQMsg *)_trx->ubrTx.localDataStatusQ.addr)->timeout = FLAGS_ub_connect_timeout; + _trx->ubrRx.capacity = (uint32_t)(_trx->ubrRx.localDataQ.len / UBR_MSG_LEN); + rc = UBRingManager::GetHlcDealMsgMaxCnt(_trx->ubrRx.capacity, &_trx->ubrRx.dealMsgMaxCnt); + if (rc != HLC_OK) { + LOG(ERROR) << "Get hlc deal msg max cnt, local shm name=" << localTrxShm->name; + ShmLocalFree(localTrxShm); + UBRingManager::ReleaseUbrTrxFromMgr(_trx); + return rc; + } + return HLC_OK; +} + +RETURN_CODE UBRing::ApplyAndMapRemoteShm(SHM *remoteTrxShm) +{ + RETURN_CODE rc = ShmRemoteMalloc(remoteTrxShm); + if (UNLIKELY(rc != HLC_OK)) { + LOG(ERROR) << "Trx apply remote shared memory failed."; + return rc; + } + rc = UbrTrxMapRemoteShm(remoteTrxShm); + if (UNLIKELY(rc != HLC_OK)) { + LOG(ERROR) << "Trx map shared memory failed."; + ShmRemoteFree(remoteTrxShm); + return rc; + } + _trx->ubrTx.capacity = (uint32_t)(_trx->ubrTx.remoteDataQ.len / UBR_MSG_LEN); + return HLC_OK; +} + +RETURN_CODE UBRing::WritevHasEnoughSpace(size_t bufLen) +{ + UbrDataStatusQMsg *dataStatusMsg = (UbrDataStatusQMsg *)_trx->ubrTx.localDataStatusQ.addr; + uint32_t cap = _trx->ubrTx.capacity; + uint32_t tail = dataStatusMsg->tail; + uint32_t remainChunkNum = + (_trx->ubrTx.writePos > tail) ? (tail + cap - _trx->ubrTx.writePos) : (tail - _trx->ubrTx.writePos); + uint32_t needMsgChunkNum = CalcUbrMsgChunkCnt((uint32_t)bufLen); + if (remainChunkNum < needMsgChunkNum) { + return HLC_RETRY; + } + return HLC_OK; +} + +RETURN_CODE UBRing::UbrClearResourceCheck(UbrTrx *trx, uint64_t startTime, UbrCloseType closeType) +{ + if (UNLIKELY(trx == NULL)) { + LOG(ERROR) << "Trx close failed, trx is null."; + return HLC_ERR; + } + + UbrEventQMsg* localTxEventQ = (UbrEventQMsg *)trx->ubrTx.localTxEventQ.addr; + while (ATOMIC_LOAD(trx->closeCnt) == 1 && localTxEventQ->flag == UBR_STATE_CLOSING) { + if (HasTimedOut(startTime, FLAGS_ub_disconnect_timeout) != HLC_OK) { + LOG(ERROR) << "Trx close failed, wait close time out."; + break; + } + usleep(1); + } + int firstClearExpected = UBR_CLOSE_FIRST; + int secondClearExpected = UBR_CLOSE_SECOND; + if (localTxEventQ->flag == UBR_STATE_CLOSING) { + if (ATOMIC_COMPARE_EXCHANGE_STRONG(trx->closeState, firstClearExpected, UBR_CLOSE_SECOND)) { + LOG(ERROR) << "Trx close, exist process is closing, name=" << trx->localShm.name; + return HLC_REENTRY; + } else if (ATOMIC_COMPARE_EXCHANGE_STRONG(trx->closeState, secondClearExpected, UBR_CLOSE_END)) { + localTxEventQ->flag = UBR_STATE_CLOSED; + trx->ubrTx.trxState = UBR_STATE_CLOSED; + } + } + + if (closeType == UBR_SEND_CLOSE) { + DeleteTimerSafe((uint32_t)trx->timerFd); + } else { + DeleteTimer((uint32_t)trx->timerFd); + } + DeleteTimerSafe((uint32_t)trx->hbTimerFd); + return HLC_OK; +} + +RETURN_CODE UBRing::ClearTrxResource(UbrTrx *trx, uint64_t startTime, UbrCloseType closeType, int op) +{ + UbrEventQMsg* localTxEventQ = (UbrEventQMsg *)trx->ubrTx.localTxEventQ.addr; + RETURN_CODE rc = UbrClearResourceCheck(trx, startTime, closeType); + if (rc != HLC_OK) { + return rc; + } + + rc = UbrAddAsynClearTimer(trx); + if (rc != HLC_OK) { + LOG(ERROR) << "Trx close, add " << trx->localShm.name << " close clear timer failed."; + return HLC_ERR; + } + + return HLC_OK; +} + +RETURN_CODE UBRing::UbrTrxCloseCheck(UbrTrx *trx) +{ + if (UNLIKELY(trx == NULL)) { + LOG(ERROR) << "Trx close failed, client trx is null."; + return HLC_ERR; + } + int expected = MAX_CLOSE_COUNT; + if (!ATOMIC_COMPARE_EXCHANGE_STRONG(trx->closeCnt, expected, MAX_CLOSE_COUNT - 1)) { + LOG(ERROR) << "Trx close failed, exist other close acquire, trx local name=" << trx->localShm.name; + return HLC_ERR; + } + + if (UNLIKELY(trx->ubrTx.localTxEventQ.addr == NULL)) { + LOG(ERROR) << "Trx close failed, localTxEventQ addr is NULL, trx local name=" << trx->localShm.name; + return HLC_ERR; + } + return HLC_OK; +} + +ssize_t UBRing::StartReadv(UbrTrx *trx, const struct iovec *iov, int iovcnt, size_t remainBufLen) +{ + ssize_t totalRecvLen = 0; + int iovIndex = 0; + size_t iovPos = 0; + UbrMsgFormat *dataMsg = (UbrMsgFormat *)trx->ubrRx.localDataQ.addr; + bool notEofEncountered = true; + while (notEofEncountered && remainBufLen > 0) { + if (UNLIKELY(CheckTrxRecvPreCheck(trx) != HLC_OK)) { + return HLC_ERR; + } + UbrMsgFormat *currentChunk = &dataMsg[trx->ubrRx.readPos]; + uint8_t flag = currentChunk->header[UBR_MSG_FLAG_INDEX]; + if (flag == UBR_MSG_CHUNK_NONE) { + continue; + } + if (flag == UBR_MSG_CHUNK_EOF) { + notEofEncountered = false; + } + uint8_t chunkMsgLen = currentChunk->header[UBR_MSG_LEN_INDEX]; + uint8_t curIndex = currentChunk->header[UBR_MSG_CUR_INDEX]; + uint8_t recvLen = + remainBufLen > (size_t)(chunkMsgLen - curIndex) ? (chunkMsgLen - curIndex) : (uint8_t)remainBufLen; + while (iovIndex < iovcnt && recvLen > 0) { + size_t copyLen = + recvLen > (iov[iovIndex].iov_len - iovPos) ? iov[iovIndex].iov_len - iovPos : (size_t)recvLen; + memcpy((uint8_t *)iov[iovIndex].iov_base + iovPos, currentChunk->payload.inner + curIndex, copyLen); + recvLen -= (uint8_t)copyLen; + iovPos += copyLen; + curIndex += (uint8_t)copyLen; + if (iovPos == iov[iovIndex].iov_len) { + iovIndex++; + iovPos = 0; + } + remainBufLen -= copyLen; + totalRecvLen += (ssize_t)copyLen; + } + currentChunk->header[UBR_MSG_CUR_INDEX] = curIndex; + if (currentChunk->header[UBR_MSG_CUR_INDEX] == chunkMsgLen) { + currentChunk->header[UBR_MSG_FLAG_INDEX] = UBR_MSG_CHUNK_NONE; + UpdateDataQTail(trx); + trx->ubrRx.readPos = (trx->ubrRx.readPos + 1) % trx->ubrRx.capacity; + } + } + return totalRecvLen; +} +} // namespace ub +} // namespace brpc \ No newline at end of file diff --git a/src/brpc/ub/ub_ring.h b/src/brpc/ub/ub_ring.h new file mode 100644 index 0000000000..f99ab8c819 --- /dev/null +++ b/src/brpc/ub/ub_ring.h @@ -0,0 +1,185 @@ +#ifndef BRPC_UB_RING_H +#define BRPC_UB_RING_H + +#include +#include +#include "butil/macros.h" +#include "brpc/ub/ubr_trx.h" +#include "brpc/ub/ub_ring_manager.h" +#include "brpc/ub/shm/shm_mgr.h" +#include "brpc/ub/timer/timer_mgr.h" + +namespace brpc { +namespace ub { +DECLARE_int32(ub_flying_io_timeout); +extern uint32_t g_sleepTime[UBR_TASK_STEP_NUM]; + +class UBRing { +public: + UBRing(); + ~UBRing(); + DISALLOW_COPY_AND_ASSIGN(UBRing); + + RETURN_CODE UbrTrxMapShm(SHM *localShm, SHM *remoteShm); + + RETURN_CODE UbrTrxClose(); + + RETURN_CODE UbrAddCloseTimer(); + + RETURN_CODE UbrAddTimer(); + + static void *UbrTrxCloseCallback(void *args); + + RETURN_CODE UbrAddHBTimer(); + + static void *UbrTrxHBCallback(void *args); + + static RETURN_CODE UbrPassiveClearTrx(UbrTrx *trx, int fd, PASSIVE_DISC_TYPE type); + + static RETURN_CODE UbrAddAsynClearTimer(UbrTrx *trx); + + static void *UbrAsynClearCallback(void *args); + + int UbrTrxSend(const void *buf, uint32_t bufLen); + + int UbrTrxRecv(void *buf, uint32_t bufLen); + + int UbrTrxRecvBlockMode(uint8_t *dest, uint32_t bufLen); + + ssize_t UbrTrxWritev(const struct iovec *iov, int iovcnt); + ssize_t UbrTrxReadv(const struct iovec *iov, int iovcnt); + ssize_t UbrTrxReadvBlockMode(const struct iovec *iov, int iovcnt); + + RETURN_CODE IsUbrTrxReadable(uint32_t epEvent); + + RETURN_CODE IsUbrTrxWriteable(uint32_t epEvent); + + RETURN_CODE UbrSetTimeout(UbrTaskStep taskType, int timeout); + + static RETURN_CODE UbrTrxFreeShm(UbrTrx *trx); + + void PrewriteUbrTx(UbrTx *tx); + void PrewriteUbrRx(UbrRx *rx); + + static inline void UbrSetSleepTask(UbrTaskStep taskType) + { + if (taskType >= UBR_TASK_STEP_NUM || taskType < 0) { + return; + } + uint32_t type = (uint32_t)taskType; + sleep(g_sleepTime[type]); + return; + } + + static inline RETURN_CODE CheckTrxConnectParam(const char *listenerName, const char *localName) + { + if (UNLIKELY(listenerName == NULL)) { + LOG(ERROR) << "The request listener name is null."; + return HLC_ERR; + } + if (UNLIKELY(localName == NULL)) { + LOG(ERROR) << "The request trx shared memory name is null."; + return HLC_ERR; + } + return HLC_OK; + } + + int UbrAllocateServerShm(SHM* remote_trx_shm, SHM* local_trx_shm); + + int UbrMapRemoteShm(SHM *local_trx_shm, const char *local_name); + + int UbrAllocateLocalShm(SHM *local_trx_shm, const char *shm_name); + + RETURN_CODE UbrMapRemoteShmAddTimer(SHM *localTrxShm, const char *localName); + + static inline RETURN_CODE CheckTrxSendPreCheck(UbrTrx *trx) + { + if (UNLIKELY(trx->ubrTx.trxState != UBR_STATE_CONNECTED)) { + LOG(ERROR) << "Trx send failed, trx is not connected state."; + return HLC_ERR; + } + + return HLC_OK; + } + static RETURN_CODE CheckTrxRecvParam(UbrTrx *trx, const void *buf, uint32_t bufLen) + { + if (UNLIKELY(trx == NULL)) { + LOG(ERROR) << "Trx recv failed, trx is null."; + return HLC_ERR; + } + + if (UNLIKELY((UbrEventQMsg *)trx->ubrRx.localRxEventQ.addr == NULL)) { + LOG(ERROR) << "Trx send failed, localTxEventQ addr is NULL."; + return HLC_ERR; + } + + if (UNLIKELY(trx->ubrRx.trxState != UBR_STATE_CONNECTED)) { + LOG(ERROR) << "Trx recv failed, trx is not connected statep=" << trx->ubrRx.trxState; + return UBR_NOT_CONNECTED; + } + if (UNLIKELY(buf == NULL)) { + LOG(ERROR) << "Trx recv failed, buf is null."; + return HLC_ERR; + } + if (UNLIKELY(bufLen == 0)) { + LOG(ERROR) << "Trx recv failed, bufLen is 0."; + return HLC_ERR; + } + return HLC_OK; + } + + static inline RETURN_CODE CheckTrxRecvPreCheck(UbrTrx *trx) + { + if (UNLIKELY(trx->ubrRx.trxState != UBR_STATE_CONNECTED)) { + LOG(ERROR) << "Trx recv failed, trx is not connected state."; + return HLC_ERR; + } + return HLC_OK; + } + + static inline void UpdateDataQTail(UbrTrx *trx) + { + ((UbrDataStatusQMsg *)trx->ubrRx.remoteDataStatusQ.addr)->tail = trx->ubrRx.readPos; + } + + static RETURN_CODE UbrTrxCallbackCheck(UbrTrx *trx) + { + if (trx == NULL) { + LOG(ERROR) << "Trx close callback failed, trx is null."; + return HLC_ERR; + } + if (UNLIKELY(trx->localShm.addr == NULL)) { + LOG(ERROR) << "Trx close failed, localShm addr is NULL."; + return HLC_ERR; + } + if (UNLIKELY(trx->ubrRx.localRxEventQ.addr == NULL)) { + LOG(ERROR) << "Trx close failed, localRxEventQ addr is NULL."; + return HLC_ERR; + } + if (UNLIKELY(trx->ubrTx.localTxEventQ.addr == NULL)) { + LOG(ERROR) << "Trx close failed, localTxEventQ addr is NULL."; + return HLC_ERR; + } + return HLC_OK; + } + +private: + RETURN_CODE UbrTrxMapLocalShm(SHM *localShm); + RETURN_CODE UbrTrxMapRemoteShm(SHM *remoteShm); + RETURN_CODE ApplyAndMapLocalShm(SHM *localTrxShm, const char *localName); + RETURN_CODE ApplyAndMapRemoteShm(SHM *remoteTrxShm); + static RETURN_CODE UbrTrxCloseCheck(UbrTrx *trx); + void ReleaseFileLock(int lockFd); + ssize_t StartReadv(UbrTrx *trx, const struct iovec *iov, int iovcnt, size_t remainBufLen); + void PreWriteAddr(uint8_t *addr, size_t len); + RETURN_CODE WritevHasEnoughSpace(size_t bufLen); + RETURN_CODE UbrServerTrxInit(SHM *localShm, SHM *remoteShm); + static RETURN_CODE UbrClearResourceCheck(UbrTrx *trx, uint64_t startTime, UbrCloseType closeType); + static RETURN_CODE ClearTrxResource(UbrTrx *trx, uint64_t startTime, UbrCloseType closeType, int op=0); + + UbrTrx* _trx{nullptr}; +}; +} +} + +#endif //BRPC_UB_RING_H \ No newline at end of file diff --git a/src/brpc/ub/ub_ring_manager.cpp b/src/brpc/ub/ub_ring_manager.cpp new file mode 100644 index 0000000000..983abd0d60 --- /dev/null +++ b/src/brpc/ub/ub_ring_manager.cpp @@ -0,0 +1,246 @@ +#include +#include "brpc/ub/ub_ring_manager.h" +#include "butil/logging.h" + +namespace brpc { +namespace ub { +DEFINE_int32(ubr_max_managed_num, 1024, "maximum number of managed ubring"); +DEFINE_int32(tail_update_after_read, 8, "Position of the tail update after the read"); + +UbrMgr UBRingManager::g_ubrMgr; +UbrLinkInfoMgr UBRingManager::g_linkInfoMgr; +pthread_mutex_t UBRingManager::g_ubrTrxMgrMtx = PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t UBRingManager::g_ubrListenerMgrMtx = PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t UBRingManager::g_linkInfoMgrMtx = PTHREAD_MUTEX_INITIALIZER; + +uint64_t g_ubrTrxNum = 0; +uint64_t g_ubEventCnt = 0; +uint64_t g_ubrListenerNum = 0; + +RETURN_CODE UBRingManager::GetHlcDealMsgMaxCnt(const uint32_t capacity, uint32_t *dealMsgMaxCnt) { + if (UNLIKELY(dealMsgMaxCnt == NULL)) { + LOG(ERROR) << "Get update factor failed, dealMsgMaxCnt is null."; + return HLC_ERR; + } + if (UNLIKELY(FLAGS_tail_update_after_read == 0)) { + LOG(ERROR) << "Get update factor failed, factor is 0."; + return HLC_ERR; + } + *dealMsgMaxCnt = capacity / FLAGS_tail_update_after_read; + return HLC_OK; +} + +RETURN_CODE UBRingManager::UbrMgrDefault() +{ + g_ubrMgr.trxNum = 0; + g_ubrMgr.trxCap = FLAGS_ubr_max_managed_num; + g_ubrMgr.trxMgrUnitStatus = NULL; + g_ubrMgr.trxMgr = NULL; + return HLC_OK; +} + +RETURN_CODE UBRingManager::UbrMgrInit() { + RETURN_CODE rc = UbrMgrDefault(); + if (UNLIKELY(rc != HLC_OK)) { + LOG(ERROR) << "Ubr manager set default values failed."; + return rc; + } + + size_t trxMgrSize = g_ubrMgr.trxCap * sizeof(UbrTrx); + g_ubrMgr.trxMgr = (UbrTrx *)malloc(trxMgrSize); + size_t trxMgrStatusSize = g_ubrMgr.trxCap * sizeof(UbrMgrUnitStatus); + g_ubrMgr.trxMgrUnitStatus = (UbrMgrUnitStatus *)malloc(trxMgrStatusSize); + if (UNLIKELY(g_ubrMgr.trxMgr == NULL || + g_ubrMgr.trxMgrUnitStatus == NULL)) { + LOG(ERROR) << "Ubr manager memory allocation failed."; + UbrMgrFini(); + return HLC_ERR; + } + + memset(g_ubrMgr.trxMgr, 0, trxMgrSize); + memset(g_ubrMgr.trxMgrUnitStatus, UBR_MGR_UNIT_FREE, trxMgrStatusSize); + LinkInfoInit(); + return HLC_OK; + return UBR_NOT_CONNECTED; +} + +void UBRingManager::UbrMgrFini() { + { + LOCK_GUARD(g_ubrTrxMgrMtx); + FREE_PTR(g_ubrMgr.trxMgr); + FREE_PTR(g_ubrMgr.trxMgrUnitStatus); + } + { + LOCK_GUARD(g_ubrListenerMgrMtx); + } + g_ubrMgr.trxNum = 0; + g_ubrMgr.trxCap = 0; + LinkInfoFini(); +} + +RETURN_CODE UBRingManager::AcquireUbrTrxFromMgr(UbrTrx **trx) { + if (UNLIKELY(trx == NULL)) { + LOG(ERROR) << "Acquire trx failed, trx is null."; + return HLC_ERR; + } + + if (UNLIKELY(g_ubrMgr.trxMgr == NULL)) { + LOG(ERROR) << "Acquire trx failed, trxMgr is null."; + return HLC_ERR; + } + + LOCK_GUARD(g_ubrTrxMgrMtx); + if (g_ubrMgr.trxNum >= g_ubrMgr.trxCap) { + LOG(ERROR) << "Acquire trx failed, trx number is full."; + return HLC_ERR; + } + + for (uint32_t i = 0; i < g_ubrMgr.trxCap; ++i) { + if (g_ubrMgr.trxMgrUnitStatus[i] == UBR_MGR_UNIT_FREE) { + memset(&g_ubrMgr.trxMgr[i], 0, sizeof(UbrTrx)); + g_ubrMgr.trxMgrUnitStatus[i] = UBR_MGR_UNIT_USED; + *trx = &g_ubrMgr.trxMgr[i]; + (*trx)->trxMgrIndex = i; + (*trx)->ubrId = g_ubrTrxNum; + (*trx)->closeState = UBR_CLOSE_FIRST; + (*trx)->closeCnt = MAX_CLOSE_COUNT; + ++g_ubrMgr.trxNum; + ++g_ubrTrxNum; + return HLC_OK; + } + } + LOG(ERROR) << "Acquire trx failed, no available space."; + return HLC_ERR; +} + +RETURN_CODE UBRingManager::ReleaseUbrTrxFromMgr(UbrTrx *trx) { + if (UNLIKELY(trx == NULL)) { + LOG(ERROR) << "Release trx failed, trx is null."; + return HLC_ERR; + } + + trx->localShm.addr = NULL; + trx->ubrTx.localTxEventQ.addr = NULL; + trx->ubrTx.localDataStatusQ.addr = NULL; + trx->ubrRx.localRxEventQ.addr = NULL; + trx->ubrRx.remoteDataStatusQ.addr = NULL; + if (UNLIKELY(g_ubrMgr.trxMgr == NULL)) { + LOG(ERROR) << "Release trx failed, trxMgr is null."; + return HLC_ERR; + } + + LOCK_GUARD(g_ubrTrxMgrMtx); + if (g_ubrMgr.trxNum == 0) { + LOG(ERROR) << "Release trx failed, trx number is 0."; + return HLC_ERR; + } + + uint32_t idx = trx->trxMgrIndex; + if (g_ubrMgr.trxMgrUnitStatus[idx] == UBR_MGR_UNIT_FREE) { + LOG(ERROR) << "Release trx failed, trx is not in manager."; + return HLC_ERR; + } + g_ubrMgr.trxMgrUnitStatus[idx] = UBR_MGR_UNIT_FREE; + --g_ubrMgr.trxNum; + return HLC_OK; +} + +void UBRingManager::LinkInfoInit(void) { + + size_t linkInfoMgrSize = FLAGS_ubr_max_managed_num * sizeof(UbrLinkInfo); + g_linkInfoMgr.allLinkInfo = (UbrLinkInfo*) malloc(linkInfoMgrSize); + if (g_linkInfoMgr.allLinkInfo == NULL) { + LOG(ERROR) << "allLinkInfo is NULL"; + LinkInfoFini(); + return; + } + + g_linkInfoMgr.linkMgrUnitStatus = (UbrMgrUnitStatus*) malloc(linkInfoMgrSize); + if (g_linkInfoMgr.linkMgrUnitStatus == NULL) { + LinkInfoFini(); + return; + } + + memset(g_linkInfoMgr.allLinkInfo, 0, linkInfoMgrSize); + memset(g_linkInfoMgr.linkMgrUnitStatus, 0, linkInfoMgrSize); +} + +void UBRingManager::LinkInfoFini(void) { + if (g_linkInfoMgr.linkMgrUnitStatus == NULL || g_linkInfoMgr.allLinkInfo == NULL) { + LOG(ERROR) << "LinkInfo is NULL"; + return; + } + { + LOCK_GUARD(g_linkInfoMgrMtx); + FREE_PTR(g_linkInfoMgr.allLinkInfo); + FREE_PTR(g_linkInfoMgr.linkMgrUnitStatus); + } + + g_linkInfoMgr.linkNum = 0; +} + +void UBRingManager::AcquireLinkInfoToMgr(const char *listenerName, UbrTrx *trx) { + if (listenerName == NULL || trx == NULL) { + LOG(ERROR) << "LinkInfo acquire fail."; + return; + } + + if (g_linkInfoMgr.linkMgrUnitStatus == NULL || g_linkInfoMgr.allLinkInfo == NULL) { + LOG(ERROR) << "LinkInfo is NULL."; + return; + } + uint32_t ubrIndex = trx->trxMgrIndex; + char* connectName = trx->localShm.name; + if (g_linkInfoMgr.linkMgrUnitStatus[ubrIndex] == UBR_MGR_UNIT_FREE) { + strncpy(g_linkInfoMgr.allLinkInfo[ubrIndex].connectName, + connectName, SHM_MAX_NAME_BUFF_LEN); + strncpy(g_linkInfoMgr.allLinkInfo[ubrIndex].listenerName, + listenerName, SHM_MAX_NAME_BUFF_LEN); + g_linkInfoMgr.linkMgrUnitStatus[ubrIndex] = UBR_MGR_UNIT_USED; + g_linkInfoMgr.linkNum++; + } +} + +void UBRingManager::ReleaseLinkInfoFromMgr(UbrTrx *trx) { + if (trx == NULL || g_linkInfoMgr.linkMgrUnitStatus == NULL) { + LOG(ERROR) << "LinkInfo release fail."; + return; + } + + if (g_linkInfoMgr.linkMgrUnitStatus[trx->trxMgrIndex] == UBR_MGR_UNIT_FREE) { + LOG(ERROR) << "Release linkInfo failed, trx is not in manager."; + return; + } + g_linkInfoMgr.linkMgrUnitStatus[trx->trxMgrIndex] = UBR_MGR_UNIT_FREE; + g_linkInfoMgr.linkNum--; +} + +int32_t UBRingManager::UbEventCallback(const char *shmName) +{ + if (UNLIKELY(shmName == NULL)) { + LOG(ERROR) << "Ub event callback failed, shm name is null."; + return HLC_ERR; + } + if (UNLIKELY(g_ubrMgr.trxMgr == NULL)) { + LOG(ERROR) << "Ub event callback failed, trx mgr is null."; + return HLC_ERR; + } + LOG(DEBUG) << "Ub event callback is processing. shm_name=" << shmName; + + for (uint32_t i = 0; i < g_ubrMgr.trxCap; ++i) { + if (g_ubrMgr.trxMgrUnitStatus[i] == UBR_MGR_UNIT_FREE) { + continue; + } + + if (strcmp(g_ubrMgr.trxMgr[i].localShm.name, shmName) == 0 || // 故障链路为该trx的本端shm + strcmp(g_ubrMgr.trxMgr[i].remoteShm.name, shmName) == 0) { // 故障链路为该trx的对端shm + ++g_ubEventCnt; + int fd = (int)g_ubrMgr.trxMgr[i].localShm.fd; + LOG(INFO) << "Ub event callback, the fd of the faulty link is " << fd; + return UBRing::UbrPassiveClearTrx(&g_ubrMgr.trxMgr[i], fd, UBR_UB_EVENT); + } + } + return HLC_ERR; +} +} +} \ No newline at end of file diff --git a/src/brpc/ub/ub_ring_manager.h b/src/brpc/ub/ub_ring_manager.h new file mode 100644 index 0000000000..38bfe92248 --- /dev/null +++ b/src/brpc/ub/ub_ring_manager.h @@ -0,0 +1,75 @@ +// +// Created by z00926396 on 2026/4/11. +// + +#ifndef BRPC_UB_RING_MANAGER_H +#define BRPC_UB_RING_MANAGER_H + +#include +#include +#include "brpc/ub/ub_ring.h" +#include "brpc/ub/common/common.h" + +namespace brpc { +namespace ub { +typedef enum { + UBR_MGR_UNIT_FREE = 0, + UBR_MGR_UNIT_USED = 1 +} UbrMgrUnitStatus; + +typedef struct TagUbrMgr { + uint32_t trxNum; + uint32_t trxCap; + UbrTrx *trxMgr; + UbrMgrUnitStatus *trxMgrUnitStatus; +} UbrMgr; + +typedef struct TagUbrLinkInfo { + char connectName[SHM_MAX_NAME_BUFF_LEN]; + char listenerName[SHM_MAX_NAME_BUFF_LEN]; +} UbrLinkInfo; + +typedef struct TagUbrLinkInfoMgr { + uint32_t linkNum; + UbrLinkInfo* allLinkInfo; + UbrMgrUnitStatus *linkMgrUnitStatus; +} UbrLinkInfoMgr; + +class UBRingManager { +public: + ~UBRingManager(){ + UbrMgrFini(); + } + + static RETURN_CODE GetHlcDealMsgMaxCnt(const uint32_t capacity, uint32_t *dealMsgMaxCnt); + + static RETURN_CODE UbrMgrDefault(); + + static RETURN_CODE UbrMgrInit(); + + static void UbrMgrFini(); + + static RETURN_CODE AcquireUbrTrxFromMgr(UbrTrx **trx); + + static RETURN_CODE ReleaseUbrTrxFromMgr(UbrTrx *trx); + + static void LinkInfoInit(void); + static void LinkInfoFini(void); + static void AcquireLinkInfoToMgr(const char* listenerName, UbrTrx *trx); + static void ReleaseLinkInfoFromMgr(UbrTrx* trx); + static int32_t UbEventCallback(const char *shmName); + +private: + UBRingManager() { + } + + static UbrMgr g_ubrMgr; + static UbrLinkInfoMgr g_linkInfoMgr; + static pthread_mutex_t g_ubrTrxMgrMtx; + static pthread_mutex_t g_ubrListenerMgrMtx; + static pthread_mutex_t g_linkInfoMgrMtx; +}; +} +} + +#endif //BRPC_UB_RING_MANAGER_H \ No newline at end of file diff --git a/src/brpc/ub/ubr_msg.h b/src/brpc/ub/ubr_msg.h new file mode 100644 index 0000000000..a82f0f0989 --- /dev/null +++ b/src/brpc/ub/ubr_msg.h @@ -0,0 +1,40 @@ +// +// Created by z00926396 on 2026/4/11. +// + +#ifndef BRPC_UBR_MSG_H +#define BRPC_UBR_MSG_H +#define UBR_MSG_HEADER_LEN 4 +#define UBR_MSG_PAYLOAD_LEN 60 +#define UBR_MSG_LEN (UBR_MSG_HEADER_LEN + UBR_MSG_PAYLOAD_LEN) + +#define UBR_MSG_FLAG_INDEX 0 +#define UBR_MSG_LEN_INDEX 1 +#define UBR_MSG_CUR_INDEX 2 + +namespace brpc { +namespace ub { +typedef enum { + UBR_MSG_CHUNK_NONE = 0, + UBR_MSG_CHUNK_EXIST = 1, + UBR_MSG_CHUNK_EOF = 2 +} UbrMsgHdrFlag; + +typedef struct TagUbrMsgPayload { + uint8_t inner[UBR_MSG_PAYLOAD_LEN]; +} UbrMsgPayload; + +typedef struct __attribute__((aligned(64))) TagUbrMsgFormat { + UbrMsgPayload payload; + + uint8_t header[UBR_MSG_HEADER_LEN]; +} UbrMsgFormat; + +static inline uint32_t CalcUbrMsgChunkCnt(uint32_t bufLen) +{ + uint32_t msgChunkNum = (bufLen + UBR_MSG_PAYLOAD_LEN - 1) / UBR_MSG_PAYLOAD_LEN; + return msgChunkNum; +} +} +} +#endif //BRPC_UBR_MSG_H \ No newline at end of file diff --git a/src/brpc/ub/ubr_trx.h b/src/brpc/ub/ubr_trx.h new file mode 100644 index 0000000000..37cbc13104 --- /dev/null +++ b/src/brpc/ub/ubr_trx.h @@ -0,0 +1,149 @@ +// +// Created by z00926396 on 2026/4/11. +// + +#ifndef BRPC_UBR_TRX_H +#define BRPC_UBR_TRX_H +#include +#include +#include +#include "brpc/ub/shm/shm_def.h" +#include "brpc/ub/common/common.h" +#include "brpc/ub/common/thread_lock.h" +#include "brpc/ub/ubr_msg.h" + +/* +----------------------------------------------------------------------------+ + │ UbrTrx shm │ + +-------------+-------------+-------------+---------------+------------------+ + │ TxEventQ │ RxEventQ │ DataStatusQ │ zero(44Bytes) | DataQ │ + +-------------+-------------+-------------+---------------+------------------+ */ + +#define UBR_EVENTQ_LEN sizeof(UbrEventQMsg) +#define UBR_DATASTATUSQ_LEN sizeof(UbrDataStatusQMsg) + +#define TX_EVENTQ_ADDR_OFFSET 0 +#define RX_EVENTQ_ADDR_OFFSET UBR_EVENTQ_LEN +#define DATASTATUSQ_ADDR_OFFSET ((UBR_EVENTQ_LEN) << 1) +#define DATAQ_ADDR_OFFSET (DATASTATUSQ_ADDR_OFFSET + UBR_DATASTATUSQ_LEN) +#define MB_TO_BYTE (1024 * 1024) +#define MAX_CLOSE_COUNT 2 + +#define SHM_NAME_PREFIX "HLC" +#define SERVER_SHM_NAME_SUFFIX "S" +#define CLIENT_SHM_NAME_SUFFIX "C" + +namespace brpc { +namespace ub { +extern RETURN_CODE(*g_BeforeTcpClose)(int); +extern RETURN_CODE(*g_AfterTcpClose)(int); + +typedef enum { + UBR_STATE_NONE, + UBR_STATE_CONNECTED, + UBR_STATE_CLOSING, + UBR_STATE_CLOSED +} EventQState; + +typedef enum { + UBR_SEND_CLOSE, + UBR_CALL_BACK_CLOSE +} UbrCloseType; + +typedef enum { + UBR_CLOSE_FIRST, + UBR_CLOSE_SECOND, + UBR_CLOSE_END +} UbrCloseCount; + +typedef enum { + UDP_TRX, + TCP_TRX, + UBR_TRX +} UbrTrxType; + +typedef enum { + UBR_TASK_CONNECT_MAP_FRONT, + UBR_TASK_CONNECT_MAP_AFTER, + UBR_TASK_ACCEPT_MAP_FRONT, + UBR_TASK_ACCEPT_MAP_AFTER, + UBR_TASK_CLOSE, + UBR_TASK_STEP_NUM +} UbrTaskStep; + +typedef struct TagUbrDataStatusQMsg { + uint32_t tail; + uint32_t timeout; + uint8_t heartBeat; +} UbrDataStatusQMsg; + +typedef struct TagUbrEventQMsg { + uint64_t ioId; + EventQState flag; +} UbrEventQMsg; + +typedef struct TagUbrAddrInfo { + uint8_t *addr; + size_t len; +} UbrAddrInfo; + +typedef struct TagUbrTx { + UbrAddrInfo remoteDataQ; + UbrAddrInfo remoteRxEventQ; + UbrAddrInfo localDataStatusQ; + UbrAddrInfo localTxEventQ; + uint64_t outIoId; + uint32_t writePos; + uint32_t capacity; + UbrMsgFormat localMsgSpace; + uint32_t hbRetryCnt; + uint32_t epLastCap; + volatile EventQState trxState; +} UbrTx; + +typedef struct TagUbrRx { + UbrAddrInfo localDataQ; + UbrAddrInfo localRxEventQ; + UbrAddrInfo remoteDataStatusQ; + UbrAddrInfo remoteTxEventQ; + uint64_t inIoId; + uint32_t readPos; + uint32_t capacity; + uint32_t dealMsgNum; + uint32_t dealMsgMaxCnt; + uint32_t epEofPos; + volatile EventQState trxState; +} UbrRx; + +typedef struct TagUbrTrx { + UbrTx ubrTx; + UbrRx ubrRx; + uint64_t ubrId; + uint32_t trxMgrIndex; + UbrTrxType type; + SHM localShm; + SHM remoteShm; + int timerFd; + int hbTimerFd; + int clearTimerFd; + AtomicInt closeCnt; + AtomicInt closeState; +} UbrTrx; + +typedef struct TagFileLock { + int lockFd; + char* lockPath; +} FileLock; + +typedef struct TagUbrLinkLock { + int fileLockNum; + FileLock* fileLock; +} UbrLinkLock; + +typedef enum { + UBR_UB_EVENT, + UBR_HEARTBEAT, +}PASSIVE_DISC_TYPE; + +} +} +#endif //BRPC_UBR_TRX_H \ No newline at end of file diff --git a/src/brpc/ub_transport.cpp b/src/brpc/ub_transport.cpp new file mode 100644 index 0000000000..8698cf1b01 --- /dev/null +++ b/src/brpc/ub_transport.cpp @@ -0,0 +1,224 @@ +#if BRPC_WITH_UBRING + +#include "brpc/ub_transport.h" +#include "brpc/tcp_transport.h" +#include "brpc/ub/ub_endpoint.h" +#include "brpc/ub/ub_helper.h" + +namespace brpc { +DECLARE_bool(usercode_in_coroutine); +DECLARE_bool(usercode_in_pthread); + +extern SocketVarsCollector *g_vars; + +void UBShmTransport::Init(Socket *socket, const SocketOptions &options) { + CHECK(_ub_ep == NULL); + if (options.socket_mode == SOCKET_MODE_UBRING) { + _ub_ep = new(std::nothrow)ub::UBShmEndpoint(socket); + if (!_ub_ep) { + const int saved_errno = errno; + PLOG(ERROR) << "Fail to create UBShmEndpoint"; + socket->SetFailed( + saved_errno, "Fail to create UBShmEndpoint: %s", berror(saved_errno)); + } + _ub_state = UB_UNKNOWN; + } else { + _ub_state = UB_OFF; + socket->_socket_mode = SOCKET_MODE_TCP; + } + _socket = socket; + _default_connect = options.app_connect; + _on_edge_trigger = options.on_edge_triggered_events; + if (options.need_on_edge_trigger && _on_edge_trigger == NULL) { + _on_edge_trigger = ub::UBShmEndpoint::OnNewDataFromTcp; + } + _tcp_transport = std::unique_ptr(new TcpTransport()); + _tcp_transport->Init(socket, options); +} + +void UBShmTransport::Release() { + if (_ub_ep) { + delete _ub_ep; + _ub_ep = NULL; + _ub_state = UB_UNKNOWN; + } +} + +int UBShmTransport::Reset(int32_t expected_nref) { + if (_ub_ep) { + _ub_ep->Reset(); + _ub_state = UB_UNKNOWN; + } + return 0; +} + +std::shared_ptr UBShmTransport::Connect() { + if (_default_connect == nullptr) { + return std::make_shared(); + } + return _default_connect; +} + +int UBShmTransport::CutFromIOBuf(butil::IOBuf *buf) { + if (_ub_ep && _ub_state != UB_OFF) { + butil::IOBuf *data_arr[1] = {buf}; + return _ub_ep->CutFromIOBufList(data_arr, 1); + } else { + return _tcp_transport->CutFromIOBuf(buf); + } +} + +ssize_t UBShmTransport::CutFromIOBufList(butil::IOBuf **buf, size_t ndata) { + if (_ub_ep && _ub_state != UB_OFF) { + return _ub_ep->CutFromIOBufList(buf, ndata); + } + return _tcp_transport->CutFromIOBufList(buf, ndata); +} + +int UBShmTransport::WaitEpollOut(butil::atomic *_epollout_butex, + bool pollin, const timespec duetime) { + // LOG(INFO) << "mwj pollin4=" << pollin << " duetime=" << butil::timespec_to_microseconds(duetime); + if (_ub_state == UB_ON) { + // LOG(INFO) << "mwj pollin1=" << pollin; + const int expected_val = _epollout_butex->load(butil::memory_order_acquire); + CHECK(_ub_ep != NULL); + if (!_ub_ep->IsWritable()) { + g_vars->nwaitepollout << 1; + _ub_ep->PollerRegisterEpollOut(pollin); + auto mwj_ret = bthread::butex_wait(_epollout_butex, expected_val, &duetime); + // LOG(INFO) << "mwj pollin2=" << pollin << " mwj_ret=" << mwj_ret; + if (mwj_ret < 0) { + if (errno != EAGAIN && errno != ETIMEDOUT) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to wait ub window of " << _socket; + _socket->SetFailed(saved_errno, + "Fail to wait ub window of %s: %s", + _socket->description().c_str(), + berror(saved_errno)); + } + if (_socket->Failed()) { + // NOTE: + // Different from TCP, we cannot find the UB channel + // failed by writing to it. Thus we must check if it + // is already failed here. + return 1; + } + } + _ub_ep->PollerUnRegisterEpollOut(pollin); + } + } else { + return _tcp_transport->WaitEpollOut(_epollout_butex, pollin, duetime); + } + // LOG(INFO) << "mwj return 0"; + return 0; +} + +void UBShmTransport::ProcessEvent(bthread_attr_t attr) { + bthread_t tid; + if (FLAGS_usercode_in_coroutine) { + OnEdge(_socket); + } else if (ub::FLAGS_ub_edisp_unsched == false) { + auto rc = bthread_start_background(&tid, &attr, OnEdge, _socket); + if (rc != 0) { + LOG(FATAL) << "Fail to start ProcessEvent"; + OnEdge(_socket); + } + } else if (bthread_start_urgent(&tid, &attr, OnEdge, _socket) != 0) { + LOG(FATAL) << "Fail to start ProcessEvent"; + OnEdge(_socket); + } +} + +void UBShmTransport::QueueMessage(InputMessageClosure& input_msg, + int* num_bthread_created, bool last_msg) { + if (last_msg) { + return; + } + InputMessageBase* to_run_msg = input_msg.release(); + if (!to_run_msg) { + return; + } + + if (ub::FLAGS_ub_disable_bthread) { + ProcessInputMessage(to_run_msg); + return; + } + // Create bthread for last_msg. The bthread is not scheduled + // until bthread_flush() is called (in the worse case). + + // TODO(gejun): Join threads. + bthread_t th; + bthread_attr_t tmp = (FLAGS_usercode_in_pthread ? + BTHREAD_ATTR_PTHREAD : + BTHREAD_ATTR_NORMAL) | BTHREAD_NOSIGNAL; + tmp.keytable_pool = _socket->keytable_pool(); + tmp.tag = bthread_self_tag(); + bthread_attr_set_name(&tmp, "ProcessInputMessage"); + + if (!FLAGS_usercode_in_coroutine && bthread_start_background( + &th, &tmp, ProcessInputMessage, to_run_msg) == 0) { + ++*num_bthread_created; + } else { + ProcessInputMessage(to_run_msg); + } +} + +void UBShmTransport::Debug(std::ostream &os) {} + +int UBShmTransport::ContextInitOrDie(bool serverOrNot, const void* _options) { + if (serverOrNot) { + if (!OptionsAvailableOverUB(static_cast(_options))) { + return -1; + } + ub::GlobalUBInitializeOrDie(); + if (!ub::InitPollingModeWithTag(static_cast(_options)->bthread_tag)) { + return -1; + } + } else { + if (!OptionsAvailableForUB(static_cast(_options))) { + return -1; + } + ub::GlobalUBInitializeOrDie(); + if (!ub::InitPollingModeWithTag(bthread_self_tag())) { + return -1; + } + return 0; + } + + return 0; +} + +bool UBShmTransport::OptionsAvailableForUB(const ChannelOptions* opt) { + if (opt->has_ssl_options()) { + LOG(WARNING) << "Cannot use SSL and UB at the same time"; + return false; + } + if (!ub::SupportedByUB(opt->protocol.name())) { + LOG(WARNING) << "Cannot use " << opt->protocol.name() + << " over UB"; + return false; + } + return true; +} + +bool UBShmTransport::OptionsAvailableOverUB(const ServerOptions* opt) { + if (opt->rtmp_service) { + LOG(WARNING) << "RTMP is not supported by UB"; + return false; + } + if (opt->has_ssl_options()) { + LOG(WARNING) << "SSL is not supported by UB"; + return false; + } + if (opt->nshead_service) { + LOG(WARNING) << "NSHEAD is not supported by UB"; + return false; + } + if (opt->mongo_service_adaptor) { + LOG(WARNING) << "MONGO is not supported by UB"; + return false; + } + return true; +} +} // namespace brpc +#endif \ No newline at end of file diff --git a/src/brpc/ub_transport.h b/src/brpc/ub_transport.h new file mode 100644 index 0000000000..eec5e54a8c --- /dev/null +++ b/src/brpc/ub_transport.h @@ -0,0 +1,51 @@ +// +// Created by z00926396 on 2026/4/11. +// + +#ifndef BRPC_UB_TRANSPORT_H +#define BRPC_UB_TRANSPORT_H +#if BRPC_WITH_UBRING +#include "brpc/socket.h" +#include "brpc/channel.h" +#include "brpc/transport.h" + +namespace brpc { + class UBShmTransport : public Transport { + friend class TransportFactory; + friend class ub::UBShmEndpoint; + friend class ub::UBConnect; + public: + void Init(Socket* socket, const SocketOptions& options) override; + void Release() override; + int Reset(int32_t expected_nref) override; + std::shared_ptr Connect() override; + int CutFromIOBuf(butil::IOBuf* buf) override; + ssize_t CutFromIOBufList(butil::IOBuf** buf, size_t ndata) override; + int WaitEpollOut(butil::atomic* _epollout_butex, bool pollin, const timespec duetime) override; + void ProcessEvent(bthread_attr_t attr) override; + void QueueMessage(InputMessageClosure& inputMsg, int* num_bthread_created, bool last_msg) override; + void Debug(std::ostream &os) override; + ub::UBShmEndpoint* GetUBShmEp() { + CHECK(_ub_ep != NULL); + return _ub_ep; + } + static int ContextInitOrDie(bool serverOrNot, const void* _options); + private: + static bool OptionsAvailableForUB(const ChannelOptions* opt); + static bool OptionsAvailableOverUB(const ServerOptions* opt); + private: + // The on/off state of UB + enum UBState { + UB_ON, + UB_OFF, + UB_UNKNOWN + }; + // The UBShmEndpoint + ub::UBShmEndpoint* _ub_ep = NULL; + // Should use UB or not + UBState _ub_state; + std::shared_ptr _tcp_transport; + }; +} // namespace brpc +#endif // BRPC_WITH_UBRING +#endif //BRPC_UB_TRANSPORT_H \ No newline at end of file diff --git a/src/butil/iobuf.cpp b/src/butil/iobuf.cpp index ce60932327..fb7b212f9d 100644 --- a/src/butil/iobuf.cpp +++ b/src/butil/iobuf.cpp @@ -1540,6 +1540,64 @@ ssize_t IOPortal::pappend_from_file_descriptor( return nr; } +ssize_t IOPortal::pappend_from_ub_ring( + brpc::ub::UBRing* _ub_ring, + size_t max_count) { + iovec vec[MAX_APPEND_IOVEC]; + int nvec = 0; + size_t space = 0; + Block* prev_p = NULL; + Block* p = _block; + // Prepare at most MAX_APPEND_IOVEC blocks or space of blocks >= max_count + do { + if (p == NULL) { + p = iobuf::acquire_tls_block(); + if (BAIDU_UNLIKELY(!p)) { + errno = ENOMEM; + return -1; + } + if (prev_p != NULL) { + prev_p->u.portal_next = p; + } else { + _block = p; + } + } + vec[nvec].iov_base = p->data + p->size; + vec[nvec].iov_len = std::min(p->left_space(), max_count - space); + space += vec[nvec].iov_len; + ++nvec; + if (space >= max_count || nvec >= MAX_APPEND_IOVEC) { + break; + } + prev_p = p; + p = p->u.portal_next; + } while (1); + + ssize_t nr = 0; + nr = _ub_ring->UbrTrxReadv(vec, nvec); + if (nr <= 0) { // -1 or 0 + if (empty()) { + return_cached_blocks(); + } + return nr; + } + + size_t total_len = nr; + do { + const size_t len = std::min(total_len, _block->left_space()); + total_len -= len; + const IOBuf::BlockRef r = { _block->size, (uint32_t)len, _block }; + _push_back_ref(r); + _block->size += len; + if (_block->full()) { + Block* const saved_next = _block->u.portal_next; + _block->dec_ref(); // _block may be deleted + _block = saved_next; + } + } while (total_len); + return nr; +} + ssize_t IOPortal::append_from_reader(IReader* reader, size_t max_count) { iovec vec[MAX_APPEND_IOVEC]; int nvec = 0; diff --git a/src/butil/iobuf.h b/src/butil/iobuf.h index 239e82d950..77bc9d5411 100644 --- a/src/butil/iobuf.h +++ b/src/butil/iobuf.h @@ -34,6 +34,7 @@ #include "butil/macros.h" #include "butil/reader_writer.h" #include "butil/binary_printer.h" +#include "brpc/ub/ub_ring.h" // For IOBuf::appendv(const const_iovec*, size_t). The only difference of this // struct from iovec (defined in sys/uio.h) is that iov_base is `const void*' @@ -466,6 +467,8 @@ class IOPortal : public IOBuf { // If `offset' is negative, does exactly what append_from_file_descriptor does. ssize_t pappend_from_file_descriptor(int fd, off_t offset, size_t max_count); + ssize_t pappend_from_ub_ring(brpc::ub::UBRing* _ub_ring, size_t max_count); + // Read as many bytes as possible from SSL channel `ssl', and stop until `max_count'. // Returns total bytes read and the ssl error code will be filled into `ssl_error' ssize_t append_from_SSL_channel(struct ssl_st* ssl, int* ssl_error, From 07e1b94d6c9a1c9102656b944c71732294327751 Mon Sep 17 00:00:00 2001 From: zchuango Date: Tue, 14 Apr 2026 02:14:26 +0000 Subject: [PATCH 55/84] fix the bug for ub ring transport --- example/ubring_performance/CMakeLists.txt | 135 +++++++++ example/ubring_performance/client.cpp | 322 ++++++++++++++++++++++ example/ubring_performance/server.cpp | 97 +++++++ example/ubring_performance/test.proto | 33 +++ 4 files changed, 587 insertions(+) create mode 100644 example/ubring_performance/CMakeLists.txt create mode 100644 example/ubring_performance/client.cpp create mode 100644 example/ubring_performance/server.cpp create mode 100644 example/ubring_performance/test.proto diff --git a/example/ubring_performance/CMakeLists.txt b/example/ubring_performance/CMakeLists.txt new file mode 100644 index 0000000000..cbccdbc983 --- /dev/null +++ b/example/ubring_performance/CMakeLists.txt @@ -0,0 +1,135 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +cmake_minimum_required(VERSION 2.8.10) +project(ubring_performance C CXX) + +option(LINK_SO "Whether examples are linked dynamically" OFF) + +execute_process( + COMMAND bash -c "find ${PROJECT_SOURCE_DIR}/../.. -type d -regex '.*output/include$' | head -n1 | xargs dirname | tr -d '\n'" + OUTPUT_VARIABLE OUTPUT_PATH +) + +set(CMAKE_PREFIX_PATH ${OUTPUT_PATH}) + +include(FindThreads) +include(FindProtobuf) +protobuf_generate_cpp(PROTO_SRC PROTO_HEADER test.proto) +# include PROTO_HEADER +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +# Search for libthrift* by best effort. If it is not found and brpc is +# compiled with thrift protocol enabled, a link error would be reported. +find_library(THRIFT_LIB NAMES thrift) +if (NOT THRIFT_LIB) + set(THRIFT_LIB "") +endif() + +find_path(BRPC_INCLUDE_PATH NAMES brpc/server.h) +if(LINK_SO) + find_library(BRPC_LIB NAMES brpc) +else() + find_library(BRPC_LIB NAMES libbrpc.a brpc) +endif() +if((NOT BRPC_INCLUDE_PATH) OR (NOT BRPC_LIB)) + message(FATAL_ERROR "Fail to find brpc") +endif() +include_directories(${BRPC_INCLUDE_PATH}) + +find_path(GFLAGS_INCLUDE_PATH gflags/gflags.h) +find_library(GFLAGS_LIBRARY NAMES gflags libgflags) +if((NOT GFLAGS_INCLUDE_PATH) OR (NOT GFLAGS_LIBRARY)) + message(FATAL_ERROR "Fail to find gflags") +endif() +include_directories(${GFLAGS_INCLUDE_PATH}) + +if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + include(CheckFunctionExists) + CHECK_FUNCTION_EXISTS(clock_gettime HAVE_CLOCK_GETTIME) + if(NOT HAVE_CLOCK_GETTIME) + set(DEFINE_CLOCK_GETTIME "-DNO_CLOCK_GETTIME_IN_MAC") + endif() +endif() + +set(CMAKE_CPP_FLAGS "${DEFINE_CLOCK_GETTIME} -DBRPC_WITH_UBRING=1") +set(CMAKE_CXX_FLAGS "${CMAKE_CPP_FLAGS} -DNDEBUG -O2 -D__const__=__unused__ -pipe -W -Wall -Wno-unused-parameter -fPIC -fno-omit-frame-pointer") + +if(CMAKE_VERSION VERSION_LESS "3.1.3") + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") + endif() + if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") + endif() +else() + set(CMAKE_CXX_STANDARD 11) + set(CMAKE_CXX_STANDARD_REQUIRED ON) +endif() + +find_path(LEVELDB_INCLUDE_PATH NAMES leveldb/db.h) +find_library(LEVELDB_LIB NAMES leveldb) +if ((NOT LEVELDB_INCLUDE_PATH) OR (NOT LEVELDB_LIB)) + message(FATAL_ERROR "Fail to find leveldb") +endif() +include_directories(${LEVELDB_INCLUDE_PATH}) + +if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + set(OPENSSL_ROOT_DIR + "/usr/local/opt/openssl" # Homebrew installed OpenSSL + ) +endif() + +find_package(OpenSSL) +include_directories(${OPENSSL_INCLUDE_DIR}) + +set(DYNAMIC_LIB + ${CMAKE_THREAD_LIBS_INIT} + ${GFLAGS_LIBRARY} + ${PROTOBUF_LIBRARIES} + ${LEVELDB_LIB} + ${OPENSSL_CRYPTO_LIBRARY} + ${OPENSSL_SSL_LIBRARY} + ${THRIFT_LIB} + dl + ) + +if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + set(DYNAMIC_LIB ${DYNAMIC_LIB} + pthread + "-framework CoreFoundation" + "-framework CoreGraphics" + "-framework CoreData" + "-framework CoreText" + "-framework Security" + "-framework Foundation" + "-Wl,-U,_MallocExtension_ReleaseFreeMemory" + "-Wl,-U,_ProfilerStart" + "-Wl,-U,_ProfilerStop" + "-Wl,-U,__Z13GetStackTracePPvii" + "-Wl,-U,_mallctl" + "-Wl,-U,_malloc_stats_print" + ) +endif() + +if(BRPC_WITH_UBRING) + add_executable(ubring_performance_client client.cpp ${PROTO_SRC} ${PROTO_HEADER}) + add_executable(ubring_performance_server server.cpp ${PROTO_SRC} ${PROTO_HEADER}) + + target_link_libraries(ubring_performance_client ${BRPC_LIB} ${DYNAMIC_LIB}) + target_link_libraries(ubring_performance_server ${BRPC_LIB} ${DYNAMIC_LIB}) +endif() \ No newline at end of file diff --git a/example/ubring_performance/client.cpp b/example/ubring_performance/client.cpp new file mode 100644 index 0000000000..d9e7b8403b --- /dev/null +++ b/example/ubring_performance/client.cpp @@ -0,0 +1,322 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include "butil/atomicops.h" +#include "butil/fast_rand.h" +#include "butil/logging.h" +#include "brpc/server.h" +#include "brpc/channel.h" +#include "bthread/bthread.h" +#include "bvar/latency_recorder.h" +#include "bvar/variable.h" +#include "test.pb.h" + +#ifdef BRPC_WITH_UBRING + +DEFINE_int32(thread_num, 0, "How many threads are used"); +DEFINE_int32(queue_depth, 1, "How many requests can be pending in the queue"); +DEFINE_int32(expected_qps, 0, "The expected QPS"); +DEFINE_int32(max_thread_num, 16, "The max number of threads are used"); +DEFINE_int32(attachment_size, -1, "Attachment size is used (in Bytes)"); +DEFINE_bool(echo_attachment, false, "Select whether attachment should be echo"); +DEFINE_string(connection_type, "single", "Connection type of the channel"); +DEFINE_string(protocol, "baidu_std", "Protocol type."); +DEFINE_string(servers, "0.0.0.0:8002+0.0.0.0:8002", "IP Address of servers"); +DEFINE_bool(use_ubring, true, "Use UBRING or not"); +DEFINE_int32(rpc_timeout_ms, 5000, "RPC call timeout"); +DEFINE_int32(test_seconds, 20, "Test running time"); +DEFINE_int32(test_iterations, 0, "Test iterations"); +DEFINE_int32(dummy_port, 8001, "Dummy server port number"); + +bvar::LatencyRecorder g_latency_recorder("client"); +bvar::LatencyRecorder g_server_cpu_recorder("server_cpu"); +bvar::LatencyRecorder g_client_cpu_recorder("client_cpu"); +butil::atomic g_last_time(0); +butil::atomic g_total_bytes; +butil::atomic g_total_cnt; +std::vector g_servers; +int rr_index = 0; +volatile bool g_stop = false; + +butil::atomic g_token(10000); + +static void* GenerateToken(void* arg) { + int64_t start_time = butil::monotonic_time_ns(); + int64_t accumulative_token = g_token.load(butil::memory_order_relaxed); + while (!g_stop) { + bthread_usleep(100000); + int64_t now = butil::monotonic_time_ns(); + if (accumulative_token * 1000000000 / (now - start_time) < FLAGS_expected_qps) { + int64_t delta = FLAGS_expected_qps * (now - start_time) / 1000000000 - accumulative_token; + g_token.fetch_add(delta, butil::memory_order_relaxed); + accumulative_token += delta; + } + } + return NULL; +} + +class PerformanceTest { +public: + PerformanceTest(int attachment_size, bool echo_attachment) + : _addr(NULL) + , _channel(NULL) + , _start_time(0) + , _iterations(0) + , _stop(false) + { + if (attachment_size > 0) { + _addr = malloc(attachment_size); + butil::fast_rand_bytes(_addr, attachment_size); + _attachment.append(_addr, attachment_size); + } + _echo_attachment = echo_attachment; + } + + ~PerformanceTest() { + if (_addr) { + free(_addr); + } + delete _channel; + } + + inline bool IsStop() { return _stop; } + + int Init() { + brpc::ChannelOptions options; + options.socket_mode = FLAGS_use_ubring? brpc::SOCKET_MODE_UBRING : brpc::SOCKET_MODE_TCP; + options.protocol = FLAGS_protocol; + options.connection_type = FLAGS_connection_type; + options.timeout_ms = FLAGS_rpc_timeout_ms; + options.max_retry = 0; + std::string server = g_servers[(rr_index++) % g_servers.size()]; + _channel = new brpc::Channel(); + if (_channel->Init(server.c_str(), &options) != 0) { + LOG(ERROR) << "Fail to initialize channel"; + return -1; + } + + // Add retry mechanism for RPC call + int retry = 3; + while (retry > 0) { + brpc::Controller cntl; + test::PerfTestResponse response; + test::PerfTestRequest request; + request.set_echo_attachment(_echo_attachment); + test::PerfTestService_Stub stub(_channel); + stub.Test(&cntl, &request, &response, NULL); + if (!cntl.Failed()) { + return 0; + } + LOG(WARNING) << "RPC call failed, retrying... (" << retry << " left): " << cntl.ErrorText(); + retry--; + bthread_usleep(100000); // 100ms delay before retry + } + LOG(ERROR) << "RPC call failed after multiple retries"; + return -1; + } + + struct RespClosure { + brpc::Controller* cntl; + test::PerfTestResponse* resp; + PerformanceTest* test; + }; + + void SendRequest() { + if (FLAGS_expected_qps > 0) { + while (g_token.load(butil::memory_order_relaxed) <= 0) { + bthread_usleep(10); + } + g_token.fetch_sub(1, butil::memory_order_relaxed); + } + RespClosure* closure = new RespClosure; + test::PerfTestRequest request; + closure->resp = new test::PerfTestResponse(); + closure->cntl = new brpc::Controller(); + request.set_echo_attachment(_echo_attachment); + closure->cntl->request_attachment().append(_attachment); + closure->test = this; + google::protobuf::Closure* done = brpc::NewCallback(&HandleResponse, closure); + test::PerfTestService_Stub stub(_channel); + stub.Test(closure->cntl, &request, closure->resp, done); + } + + static void HandleResponse(RespClosure* closure) { + std::unique_ptr cntl_guard(closure->cntl); + std::unique_ptr response_guard(closure->resp); + if (closure->cntl->Failed()) { + LOG(WARNING) << "RPC call failed: " << closure->cntl->ErrorText(); + // Don't stop the test immediately, just log the error and continue + } else { + g_latency_recorder << closure->cntl->latency_us(); + if (closure->resp->cpu_usage().size() > 0) { + g_server_cpu_recorder << atof(closure->resp->cpu_usage().c_str()) * 100; + } + g_total_bytes.fetch_add(closure->cntl->request_attachment().size(), butil::memory_order_relaxed); + g_total_cnt.fetch_add(1, butil::memory_order_relaxed); + } + + cntl_guard.reset(NULL); + response_guard.reset(NULL); + + if (closure->test->_iterations == 0 && FLAGS_test_iterations > 0) { + closure->test->_stop = true; + return; + } + --closure->test->_iterations; + uint64_t last = g_last_time.load(butil::memory_order_relaxed); + uint64_t now = butil::gettimeofday_us(); + if (now > last && now - last > 100000) { + if (g_last_time.exchange(now, butil::memory_order_relaxed) == last) { + g_client_cpu_recorder << + atof(bvar::Variable::describe_exposed("process_cpu_usage").c_str()) * 100; + } + } + if (now - closure->test->_start_time > FLAGS_test_seconds * 1000000u) { + closure->test->_stop = true; + return; + } + closure->test->SendRequest(); + } + + static void* RunTest(void* arg) { + PerformanceTest* test = (PerformanceTest*)arg; + test->_start_time = butil::gettimeofday_us(); + test->_iterations = FLAGS_test_iterations; + + for (int i = 0; i < FLAGS_queue_depth; ++i) { + test->SendRequest(); + } + + return NULL; + } + +private: + void* _addr; + brpc::Channel* _channel; + uint64_t _start_time; + uint32_t _iterations; + volatile bool _stop; + butil::IOBuf _attachment; + bool _echo_attachment; +}; + +static void* DeleteTest(void* arg) { + PerformanceTest* test = (PerformanceTest*)arg; + delete test; + return NULL; +} + +void Test(int thread_num, int attachment_size) { + std::cout << "[Threads: " << thread_num + << ", Depth: " << FLAGS_queue_depth + << ", Attachment: " << attachment_size << "B" + << ", UBRING: " << (FLAGS_use_ubring ? "yes" : "no") + << ", Echo: " << (FLAGS_echo_attachment ? "yes]" : "no]") + << std::endl; + g_total_bytes.store(0, butil::memory_order_relaxed); + g_total_cnt.store(0, butil::memory_order_relaxed); + std::vector tests; + for (int k = 0; k < thread_num; ++k) { + PerformanceTest* t = new PerformanceTest(attachment_size, FLAGS_echo_attachment); + if (t->Init() < 0) { + exit(1); + } + tests.push_back(t); + } + uint64_t start_time = butil::gettimeofday_us(); + bthread_t tid[thread_num]; + if (FLAGS_expected_qps > 0) { + bthread_t tid; + bthread_start_background(&tid, &BTHREAD_ATTR_NORMAL, GenerateToken, NULL); + } + for (int k = 0; k < thread_num; ++k) { + bthread_start_background(&tid[k], &BTHREAD_ATTR_NORMAL, + PerformanceTest::RunTest, tests[k]); + } + for (int k = 0; k < thread_num; ++k) { + while (!tests[k]->IsStop()) { + bthread_usleep(10000); + } + } + uint64_t end_time = butil::gettimeofday_us(); + double throughput = g_total_bytes / 1.048576 / (end_time - start_time); + if (FLAGS_test_iterations == 0) { + std::cout << "Avg-Latency: " << g_latency_recorder.latency(10) + << ", 90th-Latency: " << g_latency_recorder.latency_percentile(0.9) + << ", 99th-Latency: " << g_latency_recorder.latency_percentile(0.99) + << ", 99.9th-Latency: " << g_latency_recorder.latency_percentile(0.999) + << ", Throughput: " << throughput << "MB/s" + << ", QPS: " << (g_total_cnt.load(butil::memory_order_relaxed) * 1000 / (end_time - start_time)) << "k" + << ", Server CPU-utilization: " << g_server_cpu_recorder.latency(10) << "%" + << ", Client CPU-utilization: " << g_client_cpu_recorder.latency(10) << "%" + << std::endl; + } else { + std::cout << " Throughput: " << throughput << "MB/s" << std::endl; + } + g_stop = true; + for (int k = 0; k < thread_num; ++k) { + bthread_start_background(&tid[k], &BTHREAD_ATTR_NORMAL, DeleteTest, tests[k]); + } +} + +int main(int argc, char* argv[]) { + GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true); + + brpc::StartDummyServerAt(FLAGS_dummy_port); + + std::string::size_type pos1 = 0; + std::string::size_type pos2 = FLAGS_servers.find('+'); + while (pos2 != std::string::npos) { + g_servers.push_back(FLAGS_servers.substr(pos1, pos2 - pos1)); + pos1 = pos2 + 1; + pos2 = FLAGS_servers.find('+', pos1); + } + g_servers.push_back(FLAGS_servers.substr(pos1)); + + if (FLAGS_thread_num > 0 && FLAGS_attachment_size >= 0) { + Test(FLAGS_thread_num, FLAGS_attachment_size); + } else if (FLAGS_thread_num <= 0 && FLAGS_attachment_size >= 0) { + for (int i = 1; i <= FLAGS_max_thread_num; i *= 2) { + Test(i, FLAGS_attachment_size); + } + } else if (FLAGS_thread_num > 0 && FLAGS_attachment_size < 0) { + for (int i = 1; i <= 1024; i *= 4) { + Test(FLAGS_thread_num, i); + } + } else { + for (int j = 1; j <= 1024; j *= 4) { + for (int i = 1; i <= FLAGS_max_thread_num; i *= 2) { + Test(i, j); + } + } + } + + return 0; +} + +#else + +int main(int argc, char* argv[]) { + LOG(ERROR) << " brpc is not compiled with ubring. To enable it, please refer to the ubring documentation"; + return 0; +} + +#endif \ No newline at end of file diff --git a/example/ubring_performance/server.cpp b/example/ubring_performance/server.cpp new file mode 100644 index 0000000000..35277255e1 --- /dev/null +++ b/example/ubring_performance/server.cpp @@ -0,0 +1,97 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +#include +#include "butil/atomicops.h" +#include "butil/logging.h" +#include "butil/time.h" +#include "brpc/server.h" +#include "bvar/variable.h" +#include "test.pb.h" + +#ifdef BRPC_WITH_UBRING + +DEFINE_int32(port, 8002, "TCP Port of this server"); +DEFINE_bool(use_ubring, true, "Use UBRING or not"); + +butil::atomic g_last_time(0); + +namespace test { +class PerfTestServiceImpl : public PerfTestService { +public: + PerfTestServiceImpl() {} + ~PerfTestServiceImpl() {} + + void Test(google::protobuf::RpcController* cntl_base, + const PerfTestRequest* request, + PerfTestResponse* response, + google::protobuf::Closure* done) { + brpc::ClosureGuard done_guard(done); + uint64_t last = g_last_time.load(butil::memory_order_relaxed); + uint64_t now = butil::monotonic_time_us(); + if (now > last && now - last > 100000) { + if (g_last_time.exchange(now, butil::memory_order_relaxed) == last) { + response->set_cpu_usage(bvar::Variable::describe_exposed("process_cpu_usage")); + } else { + response->set_cpu_usage(""); + } + } else { + response->set_cpu_usage(""); + } + if (request->echo_attachment()) { + brpc::Controller* cntl = + static_cast(cntl_base); + cntl->response_attachment().append(cntl->request_attachment()); + } + } +}; +} + +int main(int argc, char* argv[]) { + GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true); + + brpc::Server server; + test::PerfTestServiceImpl perf_test_service_impl; + + if (server.AddService(&perf_test_service_impl, + brpc::SERVER_DOESNT_OWN_SERVICE) != 0) { + LOG(ERROR) << "Fail to add service"; + return -1; + } + g_last_time.store(0, butil::memory_order_relaxed); + + brpc::ServerOptions options; + options.socket_mode = FLAGS_use_ubring? brpc::SOCKET_MODE_UBRING : brpc::SOCKET_MODE_TCP; + if (server.Start(FLAGS_port, &options) != 0) { + LOG(ERROR) << "Fail to start EchoServer"; + return -1; + } + + server.RunUntilAskedToQuit(); + return 0; +} + +#else + + +int main(int argc, char* argv[]) { + LOG(ERROR) << " brpc is not compiled with ubring. To enable it, please refer to the ubring documentation"; + return 0; +} + +#endif \ No newline at end of file diff --git a/example/ubring_performance/test.proto b/example/ubring_performance/test.proto new file mode 100644 index 0000000000..22646d113c --- /dev/null +++ b/example/ubring_performance/test.proto @@ -0,0 +1,33 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +syntax="proto2"; +option cc_generic_services = true; + +package test; + +message PerfTestRequest { + required bool echo_attachment = 1; +}; + +message PerfTestResponse { + required string cpu_usage = 1; +}; + +service PerfTestService { + rpc Test(PerfTestRequest) returns (PerfTestResponse); +}; \ No newline at end of file From ffdf170b64dd2de73a2d1b5f54b78dac3f382746 Mon Sep 17 00:00:00 2001 From: zchuango Date: Tue, 14 Apr 2026 02:25:31 +0000 Subject: [PATCH 56/84] fix the bug for ub ring transport and other --- src/brpc/controller.h | 3 +++ src/brpc/socket.h | 3 +++ src/brpc/ub/timer/timer_mgr.cpp | 26 ++++++++++++++++---------- src/brpc/ub/ub_endpoint.cpp | 8 ++++---- src/brpc/ub/ub_endpoint.h | 3 +-- src/brpc/ub/ub_helper.h | 3 +-- 6 files changed, 28 insertions(+), 18 deletions(-) diff --git a/src/brpc/controller.h b/src/brpc/controller.h index 45f71b72f6..ca1bb477c9 100644 --- a/src/brpc/controller.h +++ b/src/brpc/controller.h @@ -212,6 +212,9 @@ friend void policy::ProcessThriftRequest(InputMessageBase*); // In client side it gets latency of the RPC call. While in server side, // it gets queue time before server processes the RPC call. int64_t latency_us() const { + if (_begin_time_us == 0) { + return 0; + } if (_end_time_us == UNSET_MAGIC_NUM) { return butil::cpuwide_time_us() - _begin_time_us; } diff --git a/src/brpc/socket.h b/src/brpc/socket.h index 0ff2b8a5d0..467db7c202 100644 --- a/src/brpc/socket.h +++ b/src/brpc/socket.h @@ -320,6 +320,9 @@ friend class policy::RtmpContext; friend class schan::ChannelBalancer; friend class rdma::RdmaEndpoint; friend class rdma::RdmaConnect; +friend class ub::UBShmEndpoint; +friend class ub::UBConnect; +friend class UBShmTransport; friend class HealthCheckTask; friend class OnAppHealthCheckDone; friend class HealthCheckManager; diff --git a/src/brpc/ub/timer/timer_mgr.cpp b/src/brpc/ub/timer/timer_mgr.cpp index 683ce9bb62..90807ac817 100644 --- a/src/brpc/ub/timer/timer_mgr.cpp +++ b/src/brpc/ub/timer/timer_mgr.cpp @@ -154,21 +154,27 @@ RETURN_CODE TimerInit(void) void *UnifiedCallback(void *args) { TimerFdCtx *ctx = (TimerFdCtx *)args; - if (pthread_spin_trylock(&ctx->spinLock) == 0) { - if (ctx->status == TIMER_CONTEXT_NOT_USING) { - pthread_spin_unlock(&ctx->spinLock); + // Try to lock with a small delay if initial try fails + int retry = 0; + while (pthread_spin_trylock(&ctx->spinLock) != 0) { + if (retry >= 3) { + LOG_EVERY_SECOND(WARNING) << "Failed to acquire spin lock after multiple attempts, context status is " << ctx->status; return NULL; } - ctx->status = TIMER_CONTEXT_CALLBACK_ONGOING; - ctx->cb(ctx->args); - if (ctx->periodical != 1) { - DeleteTimerInner((uint32_t)ctx->fd); - } + usleep(100); // Small delay before retry + retry++; + } + + if (ctx->status == TIMER_CONTEXT_NOT_USING) { pthread_spin_unlock(&ctx->spinLock); - } else { - LOG_EVERY_SECOND(WARNING) << "The context status is " << ctx->status; return NULL; } + ctx->status = TIMER_CONTEXT_CALLBACK_ONGOING; + ctx->cb(ctx->args); + if (ctx->periodical != 1) { + DeleteTimerInner((uint32_t)ctx->fd); + } + pthread_spin_unlock(&ctx->spinLock); return NULL; } diff --git a/src/brpc/ub/ub_endpoint.cpp b/src/brpc/ub/ub_endpoint.cpp index fe8da1d6c1..ba701e96c2 100644 --- a/src/brpc/ub/ub_endpoint.cpp +++ b/src/brpc/ub/ub_endpoint.cpp @@ -579,11 +579,11 @@ ssize_t UBShmEndpoint::CutFromIOBufList(butil::IOBuf** from, size_t ndata) { size_t nvec = 0; for (size_t i = 0; i < ndata; ++i) { const butil::IOBuf* p = from[i]; - const size_t nref = p->_ref_num(); + const size_t nref = p->backing_block_num(); for (size_t j = 0; j < nref && nvec < IOBUF_IOV_MAX; ++j, ++nvec) { - butil::IOBuf::BlockRef const& r = p->_ref_at(j); - vec[nvec].iov_base = r.block->data + r.offset; - vec[nvec].iov_len = r.length; + butil::StringPiece sp = p->backing_block(j); + vec[nvec].iov_base = const_cast(sp.data()); + vec[nvec].iov_len = sp.size(); } } diff --git a/src/brpc/ub/ub_endpoint.h b/src/brpc/ub/ub_endpoint.h index 4c7be8c94c..8fd835b33a 100644 --- a/src/brpc/ub/ub_endpoint.h +++ b/src/brpc/ub/ub_endpoint.h @@ -12,7 +12,6 @@ #include #include #include -#include #include "butil/atomicops.h" #include "butil/iobuf.h" #include "butil/macros.h" @@ -218,4 +217,4 @@ class UBShmEndpoint { }; #endif -#endif //BRPC_UB_ENDPOINT_H \ No newline at end of file +#endif //BRPC_UB_ENDPOINT_H diff --git a/src/brpc/ub/ub_helper.h b/src/brpc/ub/ub_helper.h index 64f844deb3..b3c84cd153 100644 --- a/src/brpc/ub/ub_helper.h +++ b/src/brpc/ub/ub_helper.h @@ -3,7 +3,6 @@ #if BRPC_WITH_UBRING -#include #include #include #include "bthread/types.h" @@ -46,4 +45,4 @@ namespace brpc { } // namespace brpc #endif // if BRPC_WITH_UBRING -#endif //BRPC_UB_HELPER_H \ No newline at end of file +#endif //BRPC_UB_HELPER_H From 1c3ec06515ba58569b62296741484c8e3e385c77 Mon Sep 17 00:00:00 2001 From: zchuango Date: Tue, 14 Apr 2026 02:33:52 +0000 Subject: [PATCH 57/84] add the license for ub transport --- src/brpc/ub/common/common.h | 15 ++++++++++++++- src/brpc/ub/common/thread_lock.h | 15 ++++++++++++++- src/brpc/ub/rack_mem/ubs_mem.h | 15 ++++++++++++++- src/brpc/ub/rack_mem/ubs_mem_def.h | 15 ++++++++++++++- src/brpc/ub/rack_mem/ubshmem_stub.cpp | 17 +++++++++++++++++ src/brpc/ub/shm/shm_def.h | 17 +++++++++++++++++ src/brpc/ub/shm/shm_ipc.cpp | 17 +++++++++++++++++ src/brpc/ub/shm/shm_ipc.h | 17 +++++++++++++++++ src/brpc/ub/shm/shm_mgr.cpp | 18 ++++++++++++++++++ src/brpc/ub/shm/shm_mgr.h | 17 +++++++++++++++++ src/brpc/ub/shm/shm_ubs.cpp | 17 +++++++++++++++++ src/brpc/ub/shm/shm_ubs.h | 17 +++++++++++++++++ src/brpc/ub/timer/timer_mgr.cpp | 17 +++++++++++++++++ src/brpc/ub/timer/timer_mgr.h | 17 +++++++++++++++++ src/brpc/ub/ub_endpoint.cpp | 17 +++++++++++++++++ src/brpc/ub/ub_endpoint.h | 15 ++++++++++++++- src/brpc/ub/ub_helper.cpp | 17 +++++++++++++++++ src/brpc/ub/ub_helper.h | 17 +++++++++++++++++ src/brpc/ub/ub_ring.cpp | 17 +++++++++++++++++ src/brpc/ub/ub_ring.h | 17 +++++++++++++++++ src/brpc/ub/ub_ring_manager.cpp | 17 +++++++++++++++++ src/brpc/ub/ub_ring_manager.h | 15 ++++++++++++++- src/brpc/ub/ubr_msg.h | 15 ++++++++++++++- src/brpc/ub/ubr_trx.h | 15 ++++++++++++++- src/brpc/ub_transport.cpp | 17 +++++++++++++++++ src/brpc/ub_transport.h | 15 ++++++++++++++- 26 files changed, 416 insertions(+), 9 deletions(-) diff --git a/src/brpc/ub/common/common.h b/src/brpc/ub/common/common.h index bbb87ff45e..55f27690e3 100644 --- a/src/brpc/ub/common/common.h +++ b/src/brpc/ub/common/common.h @@ -1,6 +1,19 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at // -// Created by z00926396 on 2026/4/11. +// http://www.apache.org/licenses/LICENSE-2.0 // +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. #ifndef BRPC_COMMON_H #define BRPC_COMMON_H diff --git a/src/brpc/ub/common/thread_lock.h b/src/brpc/ub/common/thread_lock.h index f8aaf4b20d..6822fb60ce 100644 --- a/src/brpc/ub/common/thread_lock.h +++ b/src/brpc/ub/common/thread_lock.h @@ -1,6 +1,19 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at // -// Created by z00926396 on 2026/4/11. +// http://www.apache.org/licenses/LICENSE-2.0 // +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. #ifndef BRPC_THREAD_LOCK_H #define BRPC_THREAD_LOCK_H diff --git a/src/brpc/ub/rack_mem/ubs_mem.h b/src/brpc/ub/rack_mem/ubs_mem.h index a1a986ca18..66069c6e9c 100644 --- a/src/brpc/ub/rack_mem/ubs_mem.h +++ b/src/brpc/ub/rack_mem/ubs_mem.h @@ -1,6 +1,19 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at // -// Created by z00926396 on 2026/4/11. +// http://www.apache.org/licenses/LICENSE-2.0 // +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. #ifndef BRPC_UBS_MEM_H #define BRPC_UBS_MEM_H diff --git a/src/brpc/ub/rack_mem/ubs_mem_def.h b/src/brpc/ub/rack_mem/ubs_mem_def.h index cc63cee6f5..29646611f3 100644 --- a/src/brpc/ub/rack_mem/ubs_mem_def.h +++ b/src/brpc/ub/rack_mem/ubs_mem_def.h @@ -1,6 +1,19 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at // -// Created by z00926396 on 2026/4/11. +// http://www.apache.org/licenses/LICENSE-2.0 // +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. #ifndef BRPC_UBS_MEM_DEF_H #define BRPC_UBS_MEM_DEF_H diff --git a/src/brpc/ub/rack_mem/ubshmem_stub.cpp b/src/brpc/ub/rack_mem/ubshmem_stub.cpp index ad238a3b0f..f0eaf29f8e 100644 --- a/src/brpc/ub/rack_mem/ubshmem_stub.cpp +++ b/src/brpc/ub/rack_mem/ubshmem_stub.cpp @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + #include #include #include diff --git a/src/brpc/ub/shm/shm_def.h b/src/brpc/ub/shm/shm_def.h index c76ca3b962..0e0116f6ea 100644 --- a/src/brpc/ub/shm/shm_def.h +++ b/src/brpc/ub/shm/shm_def.h @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + #ifndef BRPC_SHM_DEF_H #define BRPC_SHM_DEF_H #include diff --git a/src/brpc/ub/shm/shm_ipc.cpp b/src/brpc/ub/shm/shm_ipc.cpp index cc1597d1c7..a195ea5a8f 100644 --- a/src/brpc/ub/shm/shm_ipc.cpp +++ b/src/brpc/ub/shm/shm_ipc.cpp @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + #include #include #include diff --git a/src/brpc/ub/shm/shm_ipc.h b/src/brpc/ub/shm/shm_ipc.h index e71ed454c0..058142951c 100644 --- a/src/brpc/ub/shm/shm_ipc.h +++ b/src/brpc/ub/shm/shm_ipc.h @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + #ifndef BRPC_SHM_IPC_H #define BRPC_SHM_IPC_H diff --git a/src/brpc/ub/shm/shm_mgr.cpp b/src/brpc/ub/shm/shm_mgr.cpp index a3061bcc53..403f172f53 100644 --- a/src/brpc/ub/shm/shm_mgr.cpp +++ b/src/brpc/ub/shm/shm_mgr.cpp @@ -1,3 +1,21 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + #include #include #include diff --git a/src/brpc/ub/shm/shm_mgr.h b/src/brpc/ub/shm/shm_mgr.h index 2268bd980a..5ab6cfe886 100644 --- a/src/brpc/ub/shm/shm_mgr.h +++ b/src/brpc/ub/shm/shm_mgr.h @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + #ifndef BRPC_SHM_MGR_H #define BRPC_SHM_MGR_H diff --git a/src/brpc/ub/shm/shm_ubs.cpp b/src/brpc/ub/shm/shm_ubs.cpp index 1ae4d8c295..bfa53e9c46 100644 --- a/src/brpc/ub/shm/shm_ubs.cpp +++ b/src/brpc/ub/shm/shm_ubs.cpp @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + #define _GNU_SOURCE #include #include diff --git a/src/brpc/ub/shm/shm_ubs.h b/src/brpc/ub/shm/shm_ubs.h index a1a9c8b289..226f58bb8c 100644 --- a/src/brpc/ub/shm/shm_ubs.h +++ b/src/brpc/ub/shm/shm_ubs.h @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + #ifndef BRPC_SHM_UBS_H #define BRPC_SHM_UBS_H namespace brpc { diff --git a/src/brpc/ub/timer/timer_mgr.cpp b/src/brpc/ub/timer/timer_mgr.cpp index 90807ac817..bf67fe27e9 100644 --- a/src/brpc/ub/timer/timer_mgr.cpp +++ b/src/brpc/ub/timer/timer_mgr.cpp @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + #define _GNU_SOURCE #include #include diff --git a/src/brpc/ub/timer/timer_mgr.h b/src/brpc/ub/timer/timer_mgr.h index be7a646488..a26933bcb0 100644 --- a/src/brpc/ub/timer/timer_mgr.h +++ b/src/brpc/ub/timer/timer_mgr.h @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + #ifndef BRPC_TIMER_MGR_H #define BRPC_TIMER_MGR_H #include diff --git a/src/brpc/ub/ub_endpoint.cpp b/src/brpc/ub/ub_endpoint.cpp index ba701e96c2..c5d06aeefc 100644 --- a/src/brpc/ub/ub_endpoint.cpp +++ b/src/brpc/ub/ub_endpoint.cpp @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + #if BRPC_WITH_UBRING #include diff --git a/src/brpc/ub/ub_endpoint.h b/src/brpc/ub/ub_endpoint.h index 8fd835b33a..4dc7419390 100644 --- a/src/brpc/ub/ub_endpoint.h +++ b/src/brpc/ub/ub_endpoint.h @@ -1,6 +1,19 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at // -// Created by z00926396 on 2026/4/11. +// http://www.apache.org/licenses/LICENSE-2.0 // +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. #ifndef BRPC_UB_ENDPOINT_H #define BRPC_UB_ENDPOINT_H diff --git a/src/brpc/ub/ub_helper.cpp b/src/brpc/ub/ub_helper.cpp index 7d014ae2c4..c58f69617f 100644 --- a/src/brpc/ub/ub_helper.cpp +++ b/src/brpc/ub/ub_helper.cpp @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + #if BRPC_WITH_UBRING #include // dlopen diff --git a/src/brpc/ub/ub_helper.h b/src/brpc/ub/ub_helper.h index b3c84cd153..958e918e9a 100644 --- a/src/brpc/ub/ub_helper.h +++ b/src/brpc/ub/ub_helper.h @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + #ifndef BRPC_UB_HELPER_H #define BRPC_UB_HELPER_H diff --git a/src/brpc/ub/ub_ring.cpp b/src/brpc/ub/ub_ring.cpp index 25b2b1b224..0ca2766d58 100644 --- a/src/brpc/ub/ub_ring.cpp +++ b/src/brpc/ub/ub_ring.cpp @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + #include #include #include diff --git a/src/brpc/ub/ub_ring.h b/src/brpc/ub/ub_ring.h index f99ab8c819..9c9e635de9 100644 --- a/src/brpc/ub/ub_ring.h +++ b/src/brpc/ub/ub_ring.h @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + #ifndef BRPC_UB_RING_H #define BRPC_UB_RING_H diff --git a/src/brpc/ub/ub_ring_manager.cpp b/src/brpc/ub/ub_ring_manager.cpp index 983abd0d60..dfb99188f4 100644 --- a/src/brpc/ub/ub_ring_manager.cpp +++ b/src/brpc/ub/ub_ring_manager.cpp @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + #include #include "brpc/ub/ub_ring_manager.h" #include "butil/logging.h" diff --git a/src/brpc/ub/ub_ring_manager.h b/src/brpc/ub/ub_ring_manager.h index 38bfe92248..14bc5d27ac 100644 --- a/src/brpc/ub/ub_ring_manager.h +++ b/src/brpc/ub/ub_ring_manager.h @@ -1,6 +1,19 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at // -// Created by z00926396 on 2026/4/11. +// http://www.apache.org/licenses/LICENSE-2.0 // +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. #ifndef BRPC_UB_RING_MANAGER_H #define BRPC_UB_RING_MANAGER_H diff --git a/src/brpc/ub/ubr_msg.h b/src/brpc/ub/ubr_msg.h index a82f0f0989..69d7aeec45 100644 --- a/src/brpc/ub/ubr_msg.h +++ b/src/brpc/ub/ubr_msg.h @@ -1,6 +1,19 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at // -// Created by z00926396 on 2026/4/11. +// http://www.apache.org/licenses/LICENSE-2.0 // +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. #ifndef BRPC_UBR_MSG_H #define BRPC_UBR_MSG_H diff --git a/src/brpc/ub/ubr_trx.h b/src/brpc/ub/ubr_trx.h index 37cbc13104..b3702496d1 100644 --- a/src/brpc/ub/ubr_trx.h +++ b/src/brpc/ub/ubr_trx.h @@ -1,6 +1,19 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at // -// Created by z00926396 on 2026/4/11. +// http://www.apache.org/licenses/LICENSE-2.0 // +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. #ifndef BRPC_UBR_TRX_H #define BRPC_UBR_TRX_H diff --git a/src/brpc/ub_transport.cpp b/src/brpc/ub_transport.cpp index 8698cf1b01..937030ba83 100644 --- a/src/brpc/ub_transport.cpp +++ b/src/brpc/ub_transport.cpp @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + #if BRPC_WITH_UBRING #include "brpc/ub_transport.h" diff --git a/src/brpc/ub_transport.h b/src/brpc/ub_transport.h index eec5e54a8c..49403c172a 100644 --- a/src/brpc/ub_transport.h +++ b/src/brpc/ub_transport.h @@ -1,6 +1,19 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at // -// Created by z00926396 on 2026/4/11. +// http://www.apache.org/licenses/LICENSE-2.0 // +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. #ifndef BRPC_UB_TRANSPORT_H #define BRPC_UB_TRANSPORT_H From 43bb268e93011f60978c17e22ed709aef14f22a9 Mon Sep 17 00:00:00 2001 From: zchuango Date: Tue, 14 Apr 2026 20:24:01 +0800 Subject: [PATCH 58/84] Modifying the variable naming style --- src/brpc/ub/common/common.h | 7 +- src/brpc/ub/common/thread_lock.h | 64 +-- src/brpc/ub/rack_mem/ubs_mem.h | 4 +- src/brpc/ub/rack_mem/ubshmem_stub.cpp | 4 +- src/brpc/ub/shm/shm_def.h | 4 +- src/brpc/ub/shm/shm_ubs.cpp | 183 +++--- src/brpc/ub/timer/timer_mgr.cpp | 218 ++++--- src/brpc/ub/timer/timer_mgr.h | 2 +- src/brpc/ub/ub_endpoint.cpp | 24 +- src/brpc/ub/ub_endpoint.h | 5 +- src/brpc/ub/ub_helper.cpp | 4 +- src/brpc/ub/ub_ring.cpp | 780 +++++++++++++------------- src/brpc/ub/ub_ring.h | 84 +-- src/brpc/ub/ub_ring_manager.cpp | 188 +++---- src/brpc/ub/ub_ring_manager.h | 34 +- src/brpc/ub/ubr_msg.h | 6 +- src/brpc/ub/ubr_trx.h | 80 +-- 17 files changed, 841 insertions(+), 850 deletions(-) diff --git a/src/brpc/ub/common/common.h b/src/brpc/ub/common/common.h index 55f27690e3..f2c185b109 100644 --- a/src/brpc/ub/common/common.h +++ b/src/brpc/ub/common/common.h @@ -39,7 +39,6 @@ #define INLINE inline #define HLC_STATISTICS_PATH "/opt/hlc/run" #endif - #ifdef __cplusplus #include using AtomicInt = std::atomic; @@ -164,10 +163,10 @@ static inline size_t Aligned64Offset(uint8_t *addr) return ((ALIGN_BYTES - (((size_t)(addr)) & CHECKED_ALIGN_BITS)) & CHECKED_ALIGN_BITS); } -static inline RETURN_CODE HasTimedOut(const uint64_t startTime, const uint32_t timeout) +static inline RETURN_CODE HasTimedOut(const uint64_t start_time, const uint32_t timeout) { - uint64_t endTime = startTime + (uint64_t)timeout * SEC_TO_NSEC; - if (GetCurNanoSeconds() > endTime) { + uint64_t end_time = start_time + (uint64_t)timeout * SEC_TO_NSEC; + if (GetCurNanoSeconds() > end_time) { LOG(ERROR) << "task time out " << timeout << " seconds."; return HLC_ERR; } diff --git a/src/brpc/ub/common/thread_lock.h b/src/brpc/ub/common/thread_lock.h index 6822fb60ce..42713460d8 100644 --- a/src/brpc/ub/common/thread_lock.h +++ b/src/brpc/ub/common/thread_lock.h @@ -37,46 +37,46 @@ static inline void UnlockMutex(pthread_mutex_t **mtx) } } -#define LOCK_GUARD(mtxPtr) \ - pthread_mutex_t *__attribute__((cleanup(UnlockMutex))) _mtxPtr = ({ \ - pthread_mutex_lock(&(mtxPtr)); \ - &(mtxPtr); \ +#define LOCK_GUARD(mtx_ptr) \ + pthread_mutex_t *__attribute__((cleanup(UnlockMutex))) _mtx_ptr = ({ \ + pthread_mutex_lock(&(mtx_ptr)); \ + &(mtx_ptr); \ }) -static inline void UnlockSpinLock(pthread_spinlock_t **spinLock) +static inline void UnlockSpinLock(pthread_spinlock_t **spin_lock) { - if (LIKELY(spinLock != NULL && *spinLock != NULL)) { - pthread_spin_unlock(*spinLock); + if (LIKELY(spin_lock != NULL && *spin_lock != NULL)) { + pthread_spin_unlock(*spin_lock); } else { - LOG(ERROR) << "Invalid input for spinLock."; + LOG(ERROR) << "Invalid input for spin_lock."; } } -#define SPIN_LOCK_GUARD(spinLockPtr) \ - pthread_spinlock_t *__attribute__((cleanup(UnlockSpinLock))) _spinLockPtr = ({ \ - pthread_spin_lock(&(spinLockPtr)); \ - &(spinLockPtr); \ +#define SPIN_LOCK_GUARD(spin_lock_ptr) \ + pthread_spinlock_t *__attribute__((cleanup(UnlockSpinLock))) _spin_lock_ptr = ({ \ + pthread_spin_lock(&(spin_lock_ptr)); \ + &(spin_lock_ptr); \ }) -static inline void UnlockRWLock(pthread_rwlock_t **rwLock) +static inline void UnlockRWLock(pthread_rwlock_t **rw_lock) { - if (LIKELY(rwLock != NULL && *rwLock != NULL)) { - pthread_rwlock_unlock(*rwLock); + if (LIKELY(rw_lock != NULL && *rw_lock != NULL)) { + pthread_rwlock_unlock(*rw_lock); } else { - LOG(ERROR) << "Invalid input for rwLock."; + LOG(ERROR) << "Invalid input for rw_lock."; } } -#define R_LOCK_GUARD(readLockPtr) \ - pthread_rwlock_t *__attribute__((cleanup(UnlockRWLock))) _readLockPtr = ({ \ - pthread_rwlock_rdlock(&(readLockPtr)); \ - &(readLockPtr); \ +#define R_LOCK_GUARD(read_lock_ptr) \ + pthread_rwlock_t *__attribute__((cleanup(UnlockRWLock))) _read_lock_ptr = ({ \ + pthread_rwlock_rdlock(&(read_lock_ptr)); \ + &(read_lock_ptr); \ }) -#define W_LOCK_GUARD(writeLockPtr) \ - pthread_rwlock_t *__attribute__((cleanup(UnlockRWLock))) _writeLockPtr = ({ \ - pthread_rwlock_wrlock(&(writeLockPtr)); \ - &(writeLockPtr); \ +#define W_LOCK_GUARD(write_lock_ptr) \ + pthread_rwlock_t *__attribute__((cleanup(UnlockRWLock))) _write_lock_ptr = ({ \ + pthread_rwlock_wrlock(&(write_lock_ptr)); \ + &(write_lock_ptr); \ }) static inline void PostSemWithClose(sem_t **sem) @@ -100,16 +100,16 @@ static inline void PostSem(sem_t **sem) } } -#define SEMAPHORE_WAIT_GUARD_WITH_CLOSE(semPtr) \ - sem_t *__attribute__((cleanup(PostSemWithClose))) _semPtr = ({ \ - sem_wait(semPtr); \ - semPtr; \ +#define SEMAPHORE_WAIT_GUARD_WITH_CLOSE(sem_ptr) \ + sem_t *__attribute__((cleanup(PostSemWithClose))) _sem_ptr = ({ \ + sem_wait(sem_ptr); \ + sem_ptr; \ }) -#define SEMAPHORE_WAIT_GUARD(semPtr) \ - sem_t *__attribute__((cleanup(PostSem))) _semPtr = ({ \ - sem_wait(semPtr); \ - semPtr; \ +#define SEMAPHORE_WAIT_GUARD(sem_ptr) \ + sem_t *__attribute__((cleanup(PostSem))) _sem_ptr = ({ \ + sem_wait(sem_ptr); \ + sem_ptr; \ }) #ifdef __cplusplus diff --git a/src/brpc/ub/rack_mem/ubs_mem.h b/src/brpc/ub/rack_mem/ubs_mem.h index 66069c6e9c..6466dba67f 100644 --- a/src/brpc/ub/rack_mem/ubs_mem.h +++ b/src/brpc/ub/rack_mem/ubs_mem.h @@ -192,10 +192,10 @@ SHMEM_API int ubsmem_lookup_cluster_statistic(ubsmem_cluster_info_t *info); /** * Subscribes to shared memory UB Event. - * @param registerFunc - Shared Memory UB Event Response Handling Function. + * @param register_func - Shared Memory UB Event Response Handling Function. * @return - 0 on success and other on failure */ -SHMEM_API int ubsmem_shmem_faults_register(shmem_faults_func registerFunc); +SHMEM_API int ubsmem_shmem_faults_register(shmem_faults_func register_func); /** * Query the supernode ID of this node within the supernode domain. diff --git a/src/brpc/ub/rack_mem/ubshmem_stub.cpp b/src/brpc/ub/rack_mem/ubshmem_stub.cpp index f0eaf29f8e..ce01694a19 100644 --- a/src/brpc/ub/rack_mem/ubshmem_stub.cpp +++ b/src/brpc/ub/rack_mem/ubshmem_stub.cpp @@ -53,7 +53,7 @@ int ubsmem_set_extern_logger(void (*func)(int level, const char *msg)) return UBSM_OK; } -int ubsmem_lookup_regions(ubsmem_regions_t* regions) +int ubsmem_lookup_regions(ubsmem_regions_t *regions) { regions->num = 1; regions->region[0].host_num = 1; @@ -96,7 +96,7 @@ int ubsmem_shmem_unmap(void *local_ptr, size_t length) return UBSM_OK; } -int ubsmem_shmem_faults_register(shmem_faults_func registerFunc) +int ubsmem_shmem_faults_register(shmem_faults_func register_func) { return UBSM_OK; } diff --git a/src/brpc/ub/shm/shm_def.h b/src/brpc/ub/shm/shm_def.h index 0e0116f6ea..5be35a37d4 100644 --- a/src/brpc/ub/shm/shm_def.h +++ b/src/brpc/ub/shm/shm_def.h @@ -41,7 +41,7 @@ namespace brpc { typedef struct { uint8_t *addr; size_t len; - uint64_t memid; + uint64_t mem_id; char name[SHM_MAX_NAME_BUFF_LEN]; uint32_t fd; } SHM; @@ -56,7 +56,7 @@ namespace brpc { ShmListNode* head; ShmListNode* tail; size_t size; - pthread_mutex_t shmLock; + pthread_mutex_t shm_lock; } ShmList; } } diff --git a/src/brpc/ub/shm/shm_ubs.cpp b/src/brpc/ub/shm/shm_ubs.cpp index bfa53e9c46..ef2a777cbb 100644 --- a/src/brpc/ub/shm/shm_ubs.cpp +++ b/src/brpc/ub/shm/shm_ubs.cpp @@ -45,19 +45,19 @@ DEFINE_bool(shm_wr_delay_comp, true, "Indicates whether to enable the write rela "0: relay; 1: non-relay."); DEFINE_int32(ub_flying_io_timeout, 1, "Waiting time for stopping data" "sending and receiving when the link is disconnected."); -char g_regionName[MAX_REGION_NAME_DESC_LENGTH] = {0}; -int g_shmTimerFd = 0; -ShmList *g_shmList = NULL; +char g_region_name[MAX_REGION_NAME_DESC_LENGTH] = {0}; +int g_shm_timer_fd = 0; +ShmList *g_shm_list = NULL; static RETURN_CODE UbsShmInterfacesLoad(void); char hostname[MAX_HOST_NAME_DESC_LENGTH]; RETURN_CODE UbsShmInterfacesLoad(void) { #ifndef UT - const char *ubsmSdkLocation = "/usr/local/ubs_mem/lib/libubsm_sdk.so"; - void* dlhandler = dlmopen(LM_ID_NEWLM, ubsmSdkLocation, RTLD_NOW | RTLD_LOCAL | RTLD_NODELETE | RTLD_DEEPBIND); + const char *ubsm_sdk_location = "/usr/local/ubs_mem/lib/libubsm_sdk.so"; + void* dlhandler = dlmopen(LM_ID_NEWLM, ubsm_sdk_location, RTLD_NOW | RTLD_LOCAL | RTLD_NODELETE | RTLD_DEEPBIND); if (dlhandler == NULL) { - LOG(ERROR) << "Dlmopen libubsm_sdk.so in " << ubsmSdkLocation << " failed, error:" << dlerror(); + LOG(ERROR) << "Dlmopen libubsm_sdk.so in " << ubsm_sdk_location << " failed, error:" << dlerror(); return HLC_ERR; } @@ -85,9 +85,9 @@ RETURN_CODE UbsShmInterfacesLoad(void) return HLC_OK; } -static RETURN_CODE CreateUbsShmRegion(const char *regionName) +static RETURN_CODE CreateUbsShmRegion(const char *region_name) { - int ret = snprintf(g_regionName, MAX_REGION_NAME_DESC_LENGTH, "%s_%u", + int ret = snprintf(g_region_name, MAX_REGION_NAME_DESC_LENGTH, "%s_%u", HLC_REGION_NAME_PREFIX, FLAGS_node_location); if (ret < 0) { LOG(ERROR) << "Snprintf_s region name failed, ret=" << ret; @@ -100,17 +100,17 @@ static RETURN_CODE CreateUbsShmRegion(const char *regionName) LOG(ERROR) << "Ubs lookup share region failed, ret=" << ret << ", region.num=" << regions.region[0].host_num; return HLC_ERR; } - ubsmem_region_attributes_t regionAttr = {0}; - regionAttr.host_num = regions.region[0].host_num; - for (int i = 0; i < regionAttr.host_num; i++) { - strcpy(regionAttr.hosts[i].host_name, regions.region[0].hosts[i].host_name); - regionAttr.hosts[i].affinity = (strcmp(regionAttr.hosts[i].host_name, hostname) == 0) ? + ubsmem_region_attributes_t region_attr = {0}; + region_attr.host_num = regions.region[0].host_num; + for (int i = 0; i < region_attr.host_num; i++) { + strcpy(region_attr.hosts[i].host_name, regions.region[0].hosts[i].host_name); + region_attr.hosts[i].affinity = (strcmp(region_attr.hosts[i].host_name, hostname) == 0) ? true : false; } - ret = ubsmem_create_region(regionName, 0, ®ionAttr); + ret = ubsmem_create_region(region_name, 0, ®ion_attr); if (ret == UBSM_ERR_ALREADY_EXIST) { - LOG(WARNING) << "Ubs region exists, region_name=" << regionName; + LOG(WARNING) << "Ubs region exists, region_name=" << region_name; return HLC_OK; } else if (ret != UBSM_OK) { LOG(ERROR) << "Ubsmem create region failed, ret=" << ret; @@ -130,7 +130,7 @@ static uint64_t AquireFlagIfWrDelayComp(const uint64_t flag) RETURN_CODE UbsShmLocalMalloc(SHM *shm) { - int ret = ubsmem_shmem_allocate(g_regionName, shm->name, shm->len, SHM_RIGHT_MODE, + int ret = ubsmem_shmem_allocate(g_region_name, shm->name, shm->len, SHM_RIGHT_MODE, AquireFlagIfWrDelayComp(UBSM_FLAG_ONLY_IMPORT_NONCACHE | UBSM_FLAG_MEM_ANONYMOUS)); do { if (ret == UBSM_ERR_ALREADY_EXIST) { @@ -139,7 +139,7 @@ do { return SHM_ERR_EXIST; } LOG(INFO) << "Ubs delete shm name=" << shm->name << " success, try to recreate."; - ret = ubsmem_shmem_allocate(g_regionName, shm->name, shm->len, SHM_RIGHT_MODE, + ret = ubsmem_shmem_allocate(g_region_name, shm->name, shm->len, SHM_RIGHT_MODE, AquireFlagIfWrDelayComp(UBSM_FLAG_ONLY_IMPORT_NONCACHE | UBSM_FLAG_MEM_ANONYMOUS)); if (ret != UBSM_OK) { LOG(ERROR) << "Ubs recreate shm name=" << shm->name << " failed, ret=" << ret; @@ -162,8 +162,8 @@ do { } // 通过MXE获取memid - shm->memid = 1; // 暂时打桩 - LOG(DEBUG) << "Ubs malloc local shm=" << shm->name << " length=" << shm->len << " memid=" << shm->memid << " success."; + shm->mem_id = 1; // 暂时打桩 + LOG(DEBUG) << "Ubs malloc local shm=" << shm->name << " length=" << shm->len << " mem_id=" << shm->mem_id << " success."; return HLC_OK; } @@ -179,7 +179,7 @@ RETURN_CODE UbsShmMunmap(SHM *shm) if (ret != UBSM_OK) { if (ret == UBSM_ERR_NET) { LOG(ERROR) << "Ubs unmap shm=" << shm->name << " failed, ubsm net err=" << ret; - AddShmToList(g_shmList, shm); + AddShmToList(g_shm_list, shm); return SHM_ERR_UBSM_NET_ERR; } LOG(ERROR) << "Ubs unmap shm=" << shm->name << " length=" << shm->len << " failed, ret=" << ret; @@ -227,7 +227,7 @@ RETURN_CODE UbsShmLocalFree(SHM *shm) if (ret != UBSM_OK) { if (ret == UBSM_ERR_NET) { LOG(ERROR) << "Ubs unmap shm=" << shm->name << " failed, ubsm net err=" << ret; - AddShmToList(g_shmList, shm); + AddShmToList(g_shm_list, shm); return SHM_ERR_UBSM_NET_ERR; } LOG(WARNING) << "Ubs unmap shm=" << shm->name << " length=" << shm->len << " failed, ret=" << ret; @@ -284,7 +284,7 @@ RETURN_CODE UbsShmRemoteFree(SHM *shm) if (ret != UBSM_OK) { if (ret == UBSM_ERR_NET) { LOG(ERROR) << "Ubs unmap shm=" << shm->name << " failed, ubsm net err=" << ret; - AddShmToList(g_shmList, shm); + AddShmToList(g_shm_list, shm); return SHM_ERR_UBSM_NET_ERR; } LOG(ERROR) << "Ubs unmap shm=" << shm->name << " length=" << shm->len << " failed, ret=" << ret; @@ -309,10 +309,9 @@ void UbsMemLoggerPrint(int level, const char *msg) RETURN_CODE UbsShmInit(void) { - // 加载libubsm_sdk.so函数指针 - RETURN_CODE retCode = UbsShmInterfacesLoad(); - if (retCode != HLC_OK) { - LOG(ERROR) << "Load ubs shm functions failed, ret=" << retCode; + RETURN_CODE ret_code = UbsShmInterfacesLoad(); + if (ret_code != HLC_OK) { + LOG(ERROR) << "Load ubs shm functions failed, ret=" << ret_code; return HLC_ERR; } @@ -356,12 +355,12 @@ RETURN_CODE UbsShmInit(void) return HLC_ERR; } - if (CreateUbsShmRegion(g_regionName) != HLC_OK) { + if (CreateUbsShmRegion(g_region_name) != HLC_OK) { LOG(ERROR) << "Create Ubs region failed."; return HLC_ERR; } - if (InitShmTimer(&g_shmList) != HLC_OK) { + if (InitShmTimer(&g_shm_list) != HLC_OK) { LOG(ERROR) << "Ubs shm list init failed."; return HLC_ERR; } @@ -378,7 +377,7 @@ RETURN_CODE UbsShmFini(void) return HLC_ERR; } - if (UNLIKELY(DestroyShmTimer(g_shmList) != HLC_OK)) { + if (UNLIKELY(DestroyShmTimer(g_shm_list) != HLC_OK)) { LOG(ERROR) << "Ubs shm list finalize failed."; return HLC_ERR; } @@ -387,35 +386,35 @@ RETURN_CODE UbsShmFini(void) return HLC_OK; } -static void DeleteShmToList(ShmList* shmList) +static void DeleteShmToList(ShmList* shm_list) { - if (shmList == NULL || shmList->head == NULL) { + if (shm_list == NULL || shm_list->head == NULL) { return; } - ShmListNode *curNode = shmList->head; - shmList->head = curNode->next; - if (shmList->head != NULL) { - shmList->head->prev = NULL; + ShmListNode *cur_node = shm_list->head; + shm_list->head = cur_node->next; + if (shm_list->head != NULL) { + shm_list->head->prev = NULL; } else { - shmList->tail = NULL; + shm_list->tail = NULL; } - LOG(DEBUG) << "Delete shm to list, name=" << curNode->shm.name << " size=" << shmList->size; - FREE_PTR(curNode); - shmList->size--; + LOG(DEBUG) << "Delete shm to list, name=" << cur_node->shm.name << " size=" << shm_list->size; + FREE_PTR(cur_node); + shm_list->size--; } void *UbsShmCallback(void* args) { - ShmList *shmList = (ShmList*)args; - if (UNLIKELY(shmList == NULL)) { + ShmList *shm_list = (ShmList*)args; + if (UNLIKELY(shm_list == NULL)) { LOG(ERROR) << "Shm list is null."; return NULL; } - LOCK_GUARD(shmList->shmLock); - while (shmList->head != NULL) { - SHM shm = shmList->head->shm; + LOCK_GUARD(shm_list->shm_lock); + while (shm_list->head != NULL) { + SHM shm = shm_list->head->shm; if (shm.addr == NULL) { LOG(ERROR) << "Ubs input shm param is invalid, addr is NULL."; return NULL; @@ -433,67 +432,67 @@ void *UbsShmCallback(void* args) ret = ubsmem_shmem_deallocate(shm.name); if (ret != UBSM_OK) { - DeleteShmToList(shmList); + DeleteShmToList(shm_list); LOG(ERROR) << "Ubs delete shm=" << shm.name << " failed, ret=" << ret; return NULL; } - DeleteShmToList(shmList); + DeleteShmToList(shm_list); LOG(DEBUG) << "Ubs free local shm=" << shm.name << " length=" << shm.len << " success."; } return NULL; } -RETURN_CODE UbsShmAddTimer(ShmList *shmList) +RETURN_CODE UbsShmAddTimer(ShmList *shm_list) { - uint32_t timerInterval = FLAGS_ub_flying_io_timeout; - struct itimerspec timeSpec = { - .it_interval = {.tv_sec = timerInterval, .tv_nsec = 0}, + uint32_t timer_interval = FLAGS_ub_flying_io_timeout; + struct itimerspec time_spec = { + .it_interval = {.tv_sec = timer_interval, .tv_nsec = 0}, .it_value = {.tv_sec = 0, .tv_nsec = 1} }; - int timerFd = TimerStart(&timeSpec, UbsShmCallback, (void*)shmList); - if (UNLIKELY(timerFd == -1)) { + int timer_fd = TimerStart(&time_spec, UbsShmCallback, (void*)shm_list); + if (UNLIKELY(timer_fd == -1)) { LOG(ERROR) << "Start shm timer failed."; return HLC_ERR; } - g_shmTimerFd = timerFd; + g_shm_timer_fd = timer_fd; return HLC_OK; } -RETURN_CODE InitShmTimer(ShmList **shmList) +RETURN_CODE InitShmTimer(ShmList **shm_list) { - *shmList = (ShmList *)malloc(sizeof(ShmList)); - if (*shmList == NULL) { + *shm_list = (ShmList *)malloc(sizeof(ShmList)); + if (*shm_list == NULL) { LOG(ERROR) << "Malloc shm list failed."; return HLC_ERR; } - (*shmList)->head = NULL; - (*shmList)->tail = NULL; - (*shmList)->size = 0; + (*shm_list)->head = NULL; + (*shm_list)->tail = NULL; + (*shm_list)->size = 0; - if (pthread_mutex_init(&(*shmList)->shmLock, NULL) != 0) { + if (pthread_mutex_init(&(*shm_list)->shm_lock, NULL) != 0) { LOG(ERROR) << "Init shm list mutex failed."; - FREE_PTR(*shmList); + FREE_PTR(*shm_list); return HLC_ERR; } - if (UbsShmAddTimer(*shmList) == HLC_ERR) { + if (UbsShmAddTimer(*shm_list) == HLC_ERR) { LOG(ERROR) << "Ubs add timer failed."; - FREE_PTR(*shmList); + FREE_PTR(*shm_list); return HLC_ERR; } return HLC_OK; } -RETURN_CODE DestroyShmTimer(ShmList *shmList) +RETURN_CODE DestroyShmTimer(ShmList *shm_list) { - DeleteTimerSafe((uint32_t)g_shmTimerFd); - if (shmList == NULL) { + DeleteTimerSafe((uint32_t)g_shm_timer_fd); + if (shm_list == NULL) { LOG(WARNING) << "Shm list is null."; return HLC_ERR; } - ShmListNode* current = shmList->head; + ShmListNode* current = shm_list->head; ShmListNode* next; while (current != NULL) { @@ -501,60 +500,60 @@ RETURN_CODE DestroyShmTimer(ShmList *shmList) free(current); current = next; } - pthread_mutex_destroy(&shmList->shmLock); - FREE_PTR(shmList); + pthread_mutex_destroy(&shm_list->shm_lock); + FREE_PTR(shm_list); return HLC_OK; } -RETURN_CODE IsExistInShmList(ShmList *shmList, const SHM *shm) +RETURN_CODE IsExistInShmList(ShmList *shm_list, const SHM *shm) { - LOCK_GUARD(shmList->shmLock); - if (UNLIKELY(shmList == NULL)) { + LOCK_GUARD(shm_list->shm_lock); + if (UNLIKELY(shm_list == NULL)) { LOG(ERROR) << "Shm list is null."; return HLC_ERR; } - ShmListNode *curNode = shmList->head; - while (curNode != NULL) { - if (strcmp(curNode->shm.name, shm->name) == 0 && curNode->shm.len == shm->len) { + ShmListNode *cur_node = shm_list->head; + while (cur_node != NULL) { + if (strcmp(cur_node->shm.name, shm->name) == 0 && cur_node->shm.len == shm->len) { return HLC_OK; } - curNode = curNode->next; + cur_node = cur_node->next; } return HLC_ERR; } -RETURN_CODE AddShmToList(ShmList *shmList, SHM *shm) +RETURN_CODE AddShmToList(ShmList *shm_list, SHM *shm) { - if (shmList == NULL || shm == NULL) { + if (shm_list == NULL || shm == NULL) { LOG(ERROR) << "Shm list or shm is null."; return HLC_ERR; } - if (IsExistInShmList(shmList, shm) == HLC_OK) { + if (IsExistInShmList(shm_list, shm) == HLC_OK) { LOG(ERROR) << "Shm name=" << shm->name << " is exist in shm list."; return HLC_ERR; } - ShmListNode *newShmNode = (ShmListNode *)malloc(sizeof(ShmListNode)); - if (newShmNode == NULL) { + ShmListNode *new_shm_node = (ShmListNode *)malloc(sizeof(ShmListNode)); + if (new_shm_node == NULL) { LOG(ERROR) << "Malloc shm node failed."; return HLC_ERR; } - memcpy(&newShmNode->shm, shm, sizeof(SHM)); - LOCK_GUARD(shmList->shmLock); - newShmNode->next = NULL; - newShmNode->prev = shmList->tail; - if (shmList->tail) { - shmList->tail->next = newShmNode; - shmList->tail = newShmNode; + memcpy(&new_shm_node->shm, shm, sizeof(SHM)); + LOCK_GUARD(shm_list->shm_lock); + new_shm_node->next = NULL; + new_shm_node->prev = shm_list->tail; + if (shm_list->tail) { + shm_list->tail->next = new_shm_node; + shm_list->tail = new_shm_node; } else { - shmList->head = newShmNode; - shmList->tail = newShmNode; + shm_list->head = new_shm_node; + shm_list->tail = new_shm_node; } - shmList->size++; - LOG(DEBUG) << "Add shm to list success, shm name=" << shm->name << " size=" << shmList->size; + shm_list->size++; + LOG(DEBUG) << "Add shm to list success, shm name=" << shm->name << " size=" << shm_list->size; return HLC_OK; } } diff --git a/src/brpc/ub/timer/timer_mgr.cpp b/src/brpc/ub/timer/timer_mgr.cpp index bf67fe27e9..83ce18effe 100644 --- a/src/brpc/ub/timer/timer_mgr.cpp +++ b/src/brpc/ub/timer/timer_mgr.cpp @@ -28,43 +28,43 @@ namespace brpc { namespace ub { -int32_t g_epollFd = -1; -std::atomic g_totalTimerNum; -TimerFdCtx *g_timerFdCtxMap = NULL; -uint32_t maxSystemFd; -static pthread_t g_epollExecuteThread; -static int32_t g_timerModuleInitialized; +int32_t g_epoll_fd = -1; +std::atomic g_total_timer_num; +TimerFdCtx *g_timer_fd_ctx_map = NULL; +uint32_t max_system_fd; +static pthread_t g_epoll_execute_thread; +static int32_t g_timer_module_initialized; static RETURN_CODE DeleteTimerInner(uint32_t fd) { - if (g_timerFdCtxMap == NULL) { + if (g_timer_fd_ctx_map == NULL) { LOG(WARNING) << "The timer is not initialized."; return HLC_OK; } - if (g_timerFdCtxMap[fd].status == TIMER_CONTEXT_NOT_USING) { + if (g_timer_fd_ctx_map[fd].status == TIMER_CONTEXT_NOT_USING) { LOG(WARNING) << "The timer is not using, timerFd=" << fd; return HLC_OK; } - if (epoll_ctl(g_epollFd, EPOLL_CTL_DEL, (int)fd, NULL) != 0) { + if (epoll_ctl(g_epoll_fd, EPOLL_CTL_DEL, (int)fd, NULL) != 0) { LOG(ERROR) << "Failed to delete the timer fd=" << fd << " with errno=" << errno; } CloseTimerFd(fd); - atomic_fetch_sub(&g_totalTimerNum, 1); + atomic_fetch_sub(&g_total_timer_num, 1); return HLC_OK; } static RETURN_CODE StartTimeEpoll(void) { - g_epollFd = epoll_create1(0); - if (UNLIKELY(g_epollFd == -1)) { + g_epoll_fd = epoll_create1(0); + if (UNLIKELY(g_epoll_fd == -1)) { LOG(ERROR) << "Failed to create epoll. errno=" << errno; return HLC_ERR; } - int ret = pthread_create(&g_epollExecuteThread, NULL, TimerEpoll, NULL); + int ret = pthread_create(&g_epoll_execute_thread, NULL, TimerEpoll, NULL); if (UNLIKELY(ret != 0)) { LOG(ERROR) << "Failed to create thread err=" << ret; return HLC_ERR; @@ -74,17 +74,17 @@ static RETURN_CODE StartTimeEpoll(void) static RETURN_CODE TimerSpinLocksInit(void) { - if (g_timerFdCtxMap == NULL) { + if (g_timer_fd_ctx_map == NULL) { LOG(ERROR) << "Timer module is not fully initialized."; return HLC_ERR; } - for (uint32_t fd = 0; fd < maxSystemFd; fd++) { - int ret = pthread_spin_init(&g_timerFdCtxMap[fd].spinLock, PTHREAD_PROCESS_PRIVATE); + for (uint32_t fd = 0; fd < max_system_fd; fd++) { + int ret = pthread_spin_init(&g_timer_fd_ctx_map[fd].spin_lock, PTHREAD_PROCESS_PRIVATE); if (ret != EOK) { LOG(ERROR) << "Failed to initialize spin lock for fd=" << fd; - for (uint32_t cleanupFd = 0; cleanupFd < fd; cleanupFd++) { - pthread_spin_destroy(&g_timerFdCtxMap[cleanupFd].spinLock); + for (uint32_t cleanup_fd = 0; cleanup_fd < fd; cleanup_fd++) { + pthread_spin_destroy(&g_timer_fd_ctx_map[cleanup_fd].spin_lock); } return HLC_ERR; } @@ -92,7 +92,7 @@ static RETURN_CODE TimerSpinLocksInit(void) return HLC_OK; } -static RETURN_CODE ExecuteCallback(int32_t timerFd) +static RETURN_CODE ExecuteCallback(int32_t timer_fd) { pthread_attr_t attr; pthread_attr_init(&attr); @@ -101,8 +101,8 @@ static RETURN_CODE ExecuteCallback(int32_t timerFd) LOG(ERROR) << "Failed to set thread detach status when executing callback"; } - pthread_t cbThread; - err = pthread_create(&cbThread, &attr, UnifiedCallback, (void *)(&g_timerFdCtxMap[timerFd])); + pthread_t cb_thread; + err = pthread_create(&cb_thread, &attr, UnifiedCallback, (void *)(&g_timer_fd_ctx_map[timer_fd])); if (err != 0) { pthread_attr_destroy(&attr); LOG(ERROR) << "Failed to create thread while executing callback due to errno=" << err; @@ -114,8 +114,8 @@ static RETURN_CODE ExecuteCallback(int32_t timerFd) static RETURN_CODE TimerCtxMapCompletion(void) { - memset(g_timerFdCtxMap, 0, - sizeof(TimerFdCtx) * maxSystemFd); + memset(g_timer_fd_ctx_map, 0, + sizeof(TimerFdCtx) * max_system_fd); RETURN_CODE ret = TimerSpinLocksInit(); if (ret != HLC_OK) { @@ -127,22 +127,22 @@ static RETURN_CODE TimerCtxMapCompletion(void) RETURN_CODE TimerInit(void) { - if (g_timerModuleInitialized > 0) { + if (g_timer_module_initialized > 0) { return HLC_OK; } - g_totalTimerNum.store(0); + g_total_timer_num.store(0); struct rlimit rlim; if (getrlimit(RLIMIT_NOFILE, &rlim) != HLC_OK) { LOG(ERROR) << "Failed to get fd"; return HLC_ERR; } - maxSystemFd = (uint32_t)rlim.rlim_cur; + max_system_fd = (uint32_t)rlim.rlim_cur; - if (g_timerFdCtxMap == NULL) { - g_timerFdCtxMap = (TimerFdCtx *)malloc(sizeof(TimerFdCtx) * maxSystemFd); - if (UNLIKELY(!g_timerFdCtxMap)) { + if (g_timer_fd_ctx_map == NULL) { + g_timer_fd_ctx_map = (TimerFdCtx *)malloc(sizeof(TimerFdCtx) * max_system_fd); + if (UNLIKELY(!g_timer_fd_ctx_map)) { LOG(ERROR) << "Fail to malloc space for timer modules. errno=%d", errno; return HLC_ERR; } @@ -150,8 +150,8 @@ RETURN_CODE TimerInit(void) RETURN_CODE ret = TimerCtxMapCompletion(); if (ret != HLC_OK) { LOG(ERROR) << "Failed to init main data structure of Time Module. ret=" << ret; - free(g_timerFdCtxMap); - g_timerFdCtxMap = NULL; + free(g_timer_fd_ctx_map); + g_timer_fd_ctx_map = NULL; return HLC_ERR; } } @@ -159,54 +159,48 @@ RETURN_CODE TimerInit(void) RETURN_CODE ret = StartTimeEpoll(); if (ret != HLC_OK) { LOG(ERROR) << "Failed to start Timer Epoll. ret=" << ret; - if (LIKELY(g_timerFdCtxMap != NULL)) { - FREE_PTR(g_timerFdCtxMap); + if (LIKELY(g_timer_fd_ctx_map != NULL)) { + FREE_PTR(g_timer_fd_ctx_map); } return HLC_ERR; } - g_timerModuleInitialized = 1; + g_timer_module_initialized = 1; return HLC_OK; } void *UnifiedCallback(void *args) { TimerFdCtx *ctx = (TimerFdCtx *)args; - // Try to lock with a small delay if initial try fails - int retry = 0; - while (pthread_spin_trylock(&ctx->spinLock) != 0) { - if (retry >= 3) { - LOG_EVERY_SECOND(WARNING) << "Failed to acquire spin lock after multiple attempts, context status is " << ctx->status; + if (pthread_spin_trylock(&ctx->spin_lock) == 0) { + if (ctx->status == TIMER_CONTEXT_NOT_USING) { + pthread_spin_unlock(&ctx->spin_lock); return NULL; } - usleep(100); // Small delay before retry - retry++; - } - - if (ctx->status == TIMER_CONTEXT_NOT_USING) { - pthread_spin_unlock(&ctx->spinLock); + ctx->status = TIMER_CONTEXT_CALLBACK_ONGOING; + ctx->cb(ctx->args); + if (ctx->periodical != 1) { + DeleteTimerInner((uint32_t)ctx->fd); + } + pthread_spin_unlock(&ctx->spin_lock); + } else { + LOG_EVERY_SECOND(WARNING) << "The context status is " << ctx->status; return NULL; } - ctx->status = TIMER_CONTEXT_CALLBACK_ONGOING; - ctx->cb(ctx->args); - if (ctx->periodical != 1) { - DeleteTimerInner((uint32_t)ctx->fd); - } - pthread_spin_unlock(&ctx->spinLock); return NULL; } void *TimerEpoll(void *args) { UNREFERENCE_PARAM(args); - struct epoll_event readyEvents[MAX_TIMER]; + struct epoll_event ready_events[MAX_TIMER]; while (1) { - if (g_timerModuleInitialized <= 0) { + if (g_timer_module_initialized <= 0) { LOG(ERROR) << "The Timer module is not initialized."; break; } - int32_t readyNum = epoll_wait(g_epollFd, readyEvents, MAX_TIMER, TIMER_EPOLL_WAIT_TIMEOUT); - if (UNLIKELY(readyNum == -1)) { + int32_t ready_num = epoll_wait(g_epoll_fd, ready_events, MAX_TIMER, TIMER_EPOLL_WAIT_TIMEOUT); + if (UNLIKELY(ready_num == -1)) { error_t err = errno; if (err == EINTR) { LOG_EVERY_SECOND(WARNING) << "Epoll wait was interrupted. errno=" << err; @@ -219,23 +213,23 @@ void *TimerEpoll(void *args) break; } - for (int32_t i = 0; i < readyNum; i++) { - struct epoll_event *event = &readyEvents[i]; - int32_t timerFd = event->data.fd; + for (int32_t i = 0; i < ready_num; i++) { + struct epoll_event *event = &ready_events[i]; + int32_t timer_fd = event->data.fd; uint64_t exp = 0; - if (read(timerFd, &exp, sizeof(exp)) < 0) { - LOG(ERROR) << "Failed to read timerfd=" << timerFd << " errno=" << errno; + if (read(timer_fd, &exp, sizeof(exp)) < 0) { + LOG(ERROR) << "Failed to read timerfd=" << timer_fd << " errno=" << errno; continue; } - if (TimerFdCtxValidate((uint32_t)timerFd) != HLC_OK) { - LOG(ERROR) << "Timer ctx is not valid=" << timerFd; + if (TimerFdCtxValidate((uint32_t)timer_fd) != HLC_OK) { + LOG(ERROR) << "Timer ctx is not valid=" << timer_fd; continue; } - RETURN_CODE ret = ExecuteCallback(timerFd); + RETURN_CODE ret = ExecuteCallback(timer_fd); if (ret != HLC_OK) { LOG(ERROR) << "Failed execute callback ret=" << ret; - DeleteTimerInner((uint32_t)timerFd); + DeleteTimerInner((uint32_t)timer_fd); continue; } } @@ -245,103 +239,103 @@ void *TimerEpoll(void *args) void DeleteTimerSafe(uint32_t fd) { - if (g_timerFdCtxMap == NULL) { + if (g_timer_fd_ctx_map == NULL) { LOG(WARNING) << "The timer is not initialized."; return; } - if (pthread_spin_lock(&g_timerFdCtxMap[fd].spinLock) != 0) { + if (pthread_spin_lock(&g_timer_fd_ctx_map[fd].spin_lock) != 0) { LOG(ERROR) << "Failed to lock while deleting timer=" << fd << " errno=" << errno; return; } - if (g_timerFdCtxMap[fd].status == TIMER_CONTEXT_NOT_USING) { + if (g_timer_fd_ctx_map[fd].status == TIMER_CONTEXT_NOT_USING) { LOG(WARNING) << "The timer is not using, timerFd=" << fd; - pthread_spin_unlock(&g_timerFdCtxMap[fd].spinLock); + pthread_spin_unlock(&g_timer_fd_ctx_map[fd].spin_lock); return; } - if (epoll_ctl(g_epollFd, EPOLL_CTL_DEL, (int)fd, NULL) != 0) { + if (epoll_ctl(g_epoll_fd, EPOLL_CTL_DEL, (int)fd, NULL) != 0) { LOG(ERROR) << "Failed to delete the timer fd=" << fd << " with errno=" << errno; } CloseTimerFd(fd); - atomic_fetch_sub(&g_totalTimerNum, 1); + atomic_fetch_sub(&g_total_timer_num, 1); - pthread_spin_unlock(&g_timerFdCtxMap[fd].spinLock); + pthread_spin_unlock(&g_timer_fd_ctx_map[fd].spin_lock); } void DeleteTimer(uint32_t fd) { - if (g_timerFdCtxMap == NULL) { + if (g_timer_fd_ctx_map == NULL) { LOG(WARNING) << "The timer is not initialized."; return; } - g_timerFdCtxMap[fd].periodical = 0; + g_timer_fd_ctx_map[fd].periodical = 0; } int32_t TimerStart(const struct itimerspec *time, void *(*cb)(void *), void *args) { - if (g_epollFd == -1) { + if (g_epoll_fd == -1) { LOG(ERROR) << "Timer epoll encountered internal error."; return -1; } - int timerFd = timerfd_create(CLOCK_MONOTONIC, 0); - if (UNLIKELY(timerFd >= (int)maxSystemFd || timerFd == -1)) { - LOG(ERROR) << "Failed to create timerfd=" << timerFd << " errno=" << errno; + int timer_fd = timerfd_create(CLOCK_MONOTONIC, 0); + if (UNLIKELY(timer_fd >= (int)max_system_fd || timer_fd == -1)) { + LOG(ERROR) << "Failed to create timerfd=" << timer_fd << " errno=" << errno; return -1; } - g_timerFdCtxMap[timerFd].status = TIMER_CONTEXT_EPOLL_WAITING; - g_timerFdCtxMap[timerFd].cb = cb; - g_timerFdCtxMap[timerFd].args = args; - g_timerFdCtxMap[timerFd].fd = (uint32_t)timerFd; + g_timer_fd_ctx_map[timer_fd].status = TIMER_CONTEXT_EPOLL_WAITING; + g_timer_fd_ctx_map[timer_fd].cb = cb; + g_timer_fd_ctx_map[timer_fd].args = args; + g_timer_fd_ctx_map[timer_fd].fd = (uint32_t)timer_fd; if (LIKELY(time->it_interval.tv_sec > 0 || time->it_interval.tv_nsec > 0)) { - g_timerFdCtxMap[timerFd].periodical = 1; + g_timer_fd_ctx_map[timer_fd].periodical = 1; } struct epoll_event event = { .events = EPOLLIN, - .data = {.fd = timerFd} + .data = {.fd = timer_fd} }; - int32_t ret = epoll_ctl(g_epollFd, EPOLL_CTL_ADD, timerFd, &event); + int32_t ret = epoll_ctl(g_epoll_fd, EPOLL_CTL_ADD, timer_fd, &event); if (UNLIKELY(ret != 0)) { - CloseTimerFd((uint32_t)timerFd); + CloseTimerFd((uint32_t)timer_fd); LOG(ERROR) << "Failed to add event to epoll. errno=" << errno; return -1; } - atomic_fetch_add(&g_totalTimerNum, 1); + atomic_fetch_add(&g_total_timer_num, 1); - ret = timerfd_settime(timerFd, 0, time, NULL); + ret = timerfd_settime(timer_fd, 0, time, NULL); if (UNLIKELY(ret != 0)) { - if (epoll_ctl(g_epollFd, EPOLL_CTL_DEL, timerFd, NULL) != 0) { - LOG(ERROR) << "Failed to delete the timer fd=" << timerFd << " with errno=" << errno; + if (epoll_ctl(g_epoll_fd, EPOLL_CTL_DEL, timer_fd, NULL) != 0) { + LOG(ERROR) << "Failed to delete the timer fd=" << timer_fd << " with errno=" << errno; } - CloseTimerFd((uint32_t)timerFd); - atomic_fetch_sub(&g_totalTimerNum, 1); + CloseTimerFd((uint32_t)timer_fd); + atomic_fetch_sub(&g_total_timer_num, 1); LOG(ERROR) << "Failed to set timer"; return -1; } - return timerFd; + return timer_fd; } uint32_t GetActiveTimerNum(void) { - return atomic_load(&g_totalTimerNum); + return atomic_load(&g_total_timer_num); } void CloseTimerFd(uint32_t fd) { - g_timerFdCtxMap[fd].cb = NULL; - g_timerFdCtxMap[fd].args = NULL; - g_timerFdCtxMap[fd].status = TIMER_CONTEXT_NOT_USING; - g_timerFdCtxMap[fd].fd = 0; - g_timerFdCtxMap[fd].periodical = 0; + g_timer_fd_ctx_map[fd].cb = NULL; + g_timer_fd_ctx_map[fd].args = NULL; + g_timer_fd_ctx_map[fd].status = TIMER_CONTEXT_NOT_USING; + g_timer_fd_ctx_map[fd].fd = 0; + g_timer_fd_ctx_map[fd].periodical = 0; if (close((int)fd) != 0) { LOG(ERROR) << "Failed to close timer fd=" << fd << " errno=" << errno; return; @@ -350,19 +344,19 @@ void CloseTimerFd(uint32_t fd) void TimerModuleDestroy(void) { - uint32_t maxFd = maxSystemFd; - if (g_timerFdCtxMap) { - for (uint32_t fd = 0; fd < maxFd; fd++) { - if (g_timerFdCtxMap[fd].status != TIMER_CONTEXT_NOT_USING) { + uint32_t max_fd = max_system_fd; + if (g_timer_fd_ctx_map) { + for (uint32_t fd = 0; fd < max_fd; fd++) { + if (g_timer_fd_ctx_map[fd].status != TIMER_CONTEXT_NOT_USING) { DeleteTimerSafe(fd); } } } - close(g_epollFd); - g_epollFd = -1; - g_totalTimerNum = 0; - g_timerModuleInitialized = 0; - int32_t ret = pthread_join(g_epollExecuteThread, NULL); + close(g_epoll_fd); + g_epoll_fd = -1; + g_total_timer_num = 0; + g_timer_module_initialized = 0; + int32_t ret = pthread_join(g_epoll_execute_thread, NULL); if (ret != EOK) { LOG(ERROR) << "Failed to join pthread, during destroying timer module. ret=" << ret; return; @@ -371,15 +365,15 @@ void TimerModuleDestroy(void) RETURN_CODE TimerFdCtxValidate(uint32_t fd) { - if (fd >= maxSystemFd) { - LOG(ERROR) << "TimerFd=" << fd << " is out of range=" << maxSystemFd; + if (fd >= max_system_fd) { + LOG(ERROR) << "TimerFd=" << fd << " is out of range=" << max_system_fd; return HLC_ERR; } - if (g_timerFdCtxMap[fd].status == TIMER_CONTEXT_NOT_USING) { - LOG(ERROR) << "TimerFd=" << fd << " has wrong status=" << g_timerFdCtxMap[fd].status; + if (g_timer_fd_ctx_map[fd].status == TIMER_CONTEXT_NOT_USING) { + LOG(ERROR) << "TimerFd=" << fd << " has wrong status=" << g_timer_fd_ctx_map[fd].status; return HLC_ERR; } - if (g_timerFdCtxMap[fd].cb == NULL) { + if (g_timer_fd_ctx_map[fd].cb == NULL) { LOG(ERROR) << "The callback is not set."; return HLC_ERR; } diff --git a/src/brpc/ub/timer/timer_mgr.h b/src/brpc/ub/timer/timer_mgr.h index a26933bcb0..01e81c968a 100644 --- a/src/brpc/ub/timer/timer_mgr.h +++ b/src/brpc/ub/timer/timer_mgr.h @@ -39,7 +39,7 @@ typedef struct { uint32_t fd; TimerFdCtxStatus status; uint32_t periodical; - pthread_spinlock_t spinLock; + pthread_spinlock_t spin_lock; } TimerFdCtx; RETURN_CODE TimerInit(void); diff --git a/src/brpc/ub/ub_endpoint.cpp b/src/brpc/ub/ub_endpoint.cpp index c5d06aeefc..6b7872c74d 100644 --- a/src/brpc/ub/ub_endpoint.cpp +++ b/src/brpc/ub/ub_endpoint.cpp @@ -235,7 +235,7 @@ bool HelloNegotiationValid(HelloMessage& msg) { return false; } -static const int WAIT_TIMEOUT_MS = 50; +static const int wait_timeout_ms = 50; int UBShmEndpoint::ReadFromFd(void* data, size_t len) { CHECK(data != NULL); @@ -243,7 +243,7 @@ int UBShmEndpoint::ReadFromFd(void* data, size_t len) { size_t received = 0; do { const int expected_val = _read_butex->load(butil::memory_order_acquire); - const timespec duetime = butil::milliseconds_from_now(WAIT_TIMEOUT_MS); + const timespec duetime = butil::milliseconds_from_now(wait_timeout_ms); nr = read(_socket->fd(), (uint8_t*)data + received, len - received); if (nr < 0) { if (errno == EAGAIN) { @@ -270,7 +270,7 @@ int UBShmEndpoint::WriteToFd(void* data, size_t len) { int nw = 0; size_t written = 0; do { - const timespec duetime = butil::milliseconds_from_now(WAIT_TIMEOUT_MS); + const timespec duetime = butil::milliseconds_from_now(wait_timeout_ms); nw = write(_socket->fd(), (uint8_t*)data + written, len - written); if (nw < 0) { if (errno == EAGAIN) { @@ -491,15 +491,15 @@ void* UBShmEndpoint::ProcessHandshakeAtServer(void* arg) { size_t local_shm_len = (size_t)(FLAGS_data_queue_size) * MB_TO_BYTE; // server端共享内存名称 SHM local_trx_shm = {NULL, local_shm_len, 0, {0}, (uint8_t)ep->_socket->fd()}; - char clientName[SHM_MAX_NAME_BUFF_LEN]; - strncpy(clientName, remote_msg.shm_name, SHM_MAX_NAME_BUFF_LEN); + char client_name[SHM_MAX_NAME_BUFF_LEN]; + strncpy(client_name, remote_msg.shm_name, SHM_MAX_NAME_BUFF_LEN); - char *clientIpPort = strrchr(clientName, '_'); - if (clientIpPort != NULL) { - *clientIpPort = '\0'; + char *client_ip_port = strrchr(client_name, '_'); + if (client_ip_port != NULL) { + *client_ip_port = '\0'; } int result = snprintf(local_trx_shm.name, SHM_MAX_NAME_BUFF_LEN, "%s_%s", - clientName, SERVER_SHM_NAME_SUFFIX); + client_name, SERVER_SHM_NAME_SUFFIX); if (UNLIKELY(result < 0)) { LOG(WARNING) << "Copy client shared memory name failed, ret=" << result; ub_transport->_ub_state = UBShmTransport::UB_OFF; @@ -693,7 +693,7 @@ void UBShmEndpoint::DeallocateResources() { } } -void UBShmEndpoint::PollIn(UBShmEndpoint* ep, uint32_t epEvent) { +void UBShmEndpoint::PollIn(UBShmEndpoint* ep, uint32_t ep_event) { SocketUniquePtr s; if (Socket::Address(ep->_socket->id(), &s) < 0) { return; @@ -703,7 +703,7 @@ void UBShmEndpoint::PollIn(UBShmEndpoint* ep, uint32_t epEvent) { InputMessageClosure last_msg; while (true) { - int ret = ep->_ub_ring->IsUbrTrxReadable(epEvent); + int ret = ep->_ub_ring->IsUbrTrxReadable(ep_event); if (ret < 0) { return; } @@ -755,7 +755,7 @@ void UBShmEndpoint::PollIn(UBShmEndpoint* ep, uint32_t epEvent) { } } -void UBShmEndpoint::PollOut(UBShmEndpoint* ep, uint32_t epEvent) { +void UBShmEndpoint::PollOut(UBShmEndpoint* ep, uint32_t ep_event) { SocketUniquePtr s; if (Socket::Address(ep->_socket->id(), &s) < 0) { return; diff --git a/src/brpc/ub/ub_endpoint.h b/src/brpc/ub/ub_endpoint.h index 4dc7419390..0d5ea49003 100644 --- a/src/brpc/ub/ub_endpoint.h +++ b/src/brpc/ub/ub_endpoint.h @@ -148,16 +148,15 @@ friend class Socket; // return -1 if encounter other errno (including EOF) int ReadFromFd(void* data, size_t len); - // Write at most len bytes from data to fd in _socket // wait for _epollout_butex if encounter EAGAIN // return -1 if encounter other errno int WriteToFd(void* data, size_t len); // Poll CQ and get the work completion - static void PollIn(UBShmEndpoint* ep, uint32_t epEvent); + static void PollIn(UBShmEndpoint* ep, uint32_t ep_event); - static void PollOut(UBShmEndpoint* ep, uint32_t epEvent); + static void PollOut(UBShmEndpoint* ep, uint32_t ep_event); // Try to read data on TCP fd in _socket inline void TryReadOnTcp(); diff --git a/src/brpc/ub/ub_helper.cpp b/src/brpc/ub/ub_helper.cpp index c58f69617f..62bb4cdd12 100644 --- a/src/brpc/ub/ub_helper.cpp +++ b/src/brpc/ub/ub_helper.cpp @@ -78,10 +78,10 @@ static void GlobalUBInitializeOrDieImpl() { g_ub_available.store(true, butil::memory_order_relaxed); } -static pthread_once_t initialize_UB_once = PTHREAD_ONCE_INIT; +static pthread_once_t initialize_ub_once = PTHREAD_ONCE_INIT; void GlobalUBInitializeOrDie() { - if (pthread_once(&initialize_UB_once, + if (pthread_once(&initialize_ub_once, GlobalUBInitializeOrDieImpl) != 0) { LOG(FATAL) << "Fail to pthread_once GlobalUBInitializeOrDie"; exit(1); diff --git a/src/brpc/ub/ub_ring.cpp b/src/brpc/ub/ub_ring.cpp index 0ca2766d58..b3bdd34a33 100644 --- a/src/brpc/ub/ub_ring.cpp +++ b/src/brpc/ub/ub_ring.cpp @@ -24,7 +24,7 @@ namespace brpc { namespace ub { -uint32_t g_sleepTime[UBR_TASK_STEP_NUM] = {0}; +uint32_t g_sleep_time[UBR_TASK_STEP_NUM] = {0}; #define TIME_COVERSION 1000 DEFINE_int32(ub_disconnect_timeout, 1, "Ubshm disconnection timeout."); DEFINE_int32(ub_connect_timeout, 1, "Ubshm connection timeout."); @@ -37,14 +37,14 @@ UBRing::UBRing() UBRing::~UBRing() {} -RETURN_CODE UBRing::UbrTrxMapShm(SHM *localShm, SHM *remoteShm) +RETURN_CODE UBRing::UbrTrxMapShm(SHM *local_shm, SHM *remote_shm) { - RETURN_CODE rc = UbrTrxMapLocalShm(localShm); + RETURN_CODE rc = UbrTrxMapLocalShm(local_shm); if (UNLIKELY(rc != HLC_OK)) { LOG(ERROR) << "Trx map local shared memory failed."; return rc; } - rc = UbrTrxMapRemoteShm(remoteShm); + rc = UbrTrxMapRemoteShm(remote_shm); if (UNLIKELY(rc != HLC_OK)) { LOG(ERROR) << "Trx map remote shared memory failed."; return rc; @@ -56,35 +56,35 @@ RETURN_CODE UBRing::UbrTrxClose() { if (UNLIKELY(UbrTrxCloseCheck(_trx) != HLC_OK)) { return HLC_ERR; } - ((UbrEventQMsg *)_trx->ubrRx.remoteTxEventQ.addr)->flag = UBR_STATE_CLOSING; + ((UbrEventQMsg *)_trx->ubr_rx.remote_tx_event_q.addr)->flag = UBR_STATE_CLOSING; - uint32_t disconnectTimeout = FLAGS_ub_disconnect_timeout; - uint64_t startTime = GetCurNanoSeconds(); + uint32_t disconnect_timeout = FLAGS_ub_disconnect_timeout; + uint64_t start_time = GetCurNanoSeconds(); - if (((UbrEventQMsg *)_trx->ubrTx.localTxEventQ.addr)->flag == UBR_STATE_CONNECTED) { - ((UbrEventQMsg *)_trx->ubrTx.localTxEventQ.addr)->flag = UBR_STATE_CLOSED; - _trx->ubrTx.trxState = UBR_STATE_CLOSED; + if (((UbrEventQMsg *)_trx->ubr_tx.local_tx_event_q.addr)->flag == UBR_STATE_CONNECTED) { + ((UbrEventQMsg *)_trx->ubr_tx.local_tx_event_q.addr)->flag = UBR_STATE_CLOSED; + _trx->ubr_tx.trx_state = UBR_STATE_CLOSED; } - ((UbrEventQMsg *)_trx->ubrTx.remoteRxEventQ.addr)->flag = UBR_STATE_CLOSED; - while (((UbrEventQMsg *)_trx->ubrRx.localRxEventQ.addr)->flag != UBR_STATE_CLOSED) { + ((UbrEventQMsg *)_trx->ubr_tx.remote_rx_event_q.addr)->flag = UBR_STATE_CLOSED; + while (((UbrEventQMsg *)_trx->ubr_rx.local_rx_event_q.addr)->flag != UBR_STATE_CLOSED) { UbrSetSleepTask(UBR_TASK_CLOSE); - if (HasTimedOut(startTime, disconnectTimeout) != HLC_OK) { - LOG(ERROR) << "Local shm " << _trx->localShm.name + if (HasTimedOut(start_time, disconnect_timeout) != HLC_OK) { + LOG(ERROR) << "Local shm " << _trx->local_shm.name << " wait for the peer to close the connection failed."; - _trx->ubrRx.trxState = UBR_STATE_CLOSED; - ClearTrxResource(_trx, startTime, UBR_SEND_CLOSE); + _trx->ubr_rx.trx_state = UBR_STATE_CLOSED; + ClearTrxResource(_trx, start_time, UBR_SEND_CLOSE); return HLC_ERR_TIMEOUT; } usleep(1); } - _trx->ubrRx.trxState = UBR_STATE_CLOSED; + _trx->ubr_rx.trx_state = UBR_STATE_CLOSED; RETURN_CODE rc; - if (UNLIKELY((rc = ClearTrxResource(_trx, startTime, UBR_SEND_CLOSE)) != HLC_OK)) { - LOG(ERROR) << "Trx close, clear trx resource failed, trx local name=" << _trx->localShm.name; + if (UNLIKELY((rc = ClearTrxResource(_trx, start_time, UBR_SEND_CLOSE)) != HLC_OK)) { + LOG(ERROR) << "Trx close, clear trx resource failed, trx local name=" << _trx->local_shm.name; return HLC_ERR; } - LOG(INFO) << "The peer is closed, local name=" << _trx->localShm.name; + LOG(INFO) << "The peer is closed, local name=" << _trx->local_shm.name; return HLC_OK; } @@ -94,29 +94,29 @@ RETURN_CODE UBRing::UbrAddCloseTimer() { return HLC_ERR; } - uint32_t eventQTimerInterval = FLAGS_ub_event_queue_timer_interval * TIME_COVERSION; - struct itimerspec timeSpec = { - .it_interval = {.tv_sec = 0, .tv_nsec = eventQTimerInterval}, + uint32_t event_q_timer_interval = FLAGS_ub_event_queue_timer_interval * TIME_COVERSION; + struct itimerspec time_spec = { + .it_interval = {.tv_sec = 0, .tv_nsec = event_q_timer_interval}, .it_value = {.tv_sec = 0, .tv_nsec = 1} }; - int timerFd = TimerStart(&timeSpec, UbrTrxCloseCallback, (void*)_trx); - if (UNLIKELY(timerFd == -1)) { - LOG(ERROR) << "Start ubr close timer failed, trx local name=" << _trx->localShm.name; + int timer_fd = TimerStart(&time_spec, UbrTrxCloseCallback, (void*)_trx); + if (UNLIKELY(timer_fd == -1)) { + LOG(ERROR) << "Start ubr close timer failed, trx local name=" << _trx->local_shm.name; return HLC_ERR; } - _trx->timerFd = timerFd; + _trx->timer_fd = timer_fd; return HLC_OK; } RETURN_CODE UBRing::UbrAddTimer() { if (UNLIKELY(UbrAddCloseTimer() != HLC_OK)) { - LOG(ERROR) << "Ubr " << _trx->localShm.name << " add closed timer failed."; + LOG(ERROR) << "Ubr " << _trx->local_shm.name << " add closed timer failed."; return HLC_ERR; } if (UNLIKELY(UbrAddHBTimer() != HLC_OK)) { - DeleteTimerSafe((uint32_t)_trx->timerFd); - LOG(ERROR) << "Ubr " << _trx->localShm.name << " add heartbeat timer failed."; + DeleteTimerSafe((uint32_t)_trx->timer_fd); + LOG(ERROR) << "Ubr " << _trx->local_shm.name << " add heartbeat timer failed."; return HLC_ERR; } return HLC_OK; @@ -128,34 +128,34 @@ void* UBRing::UbrTrxCloseCallback(void* args) { return nullptr; } - auto* localRxEventQ = (UbrEventQMsg *)trx->ubrRx.localRxEventQ.addr; - auto* localTxEventQ = (UbrEventQMsg *)trx->ubrTx.localTxEventQ.addr; - if (localRxEventQ->flag != UBR_STATE_CLOSED || localTxEventQ->flag == UBR_STATE_CLOSED) { + auto* local_rx_event_q = (UbrEventQMsg *)trx->ubr_rx.local_rx_event_q.addr; + auto* local_tx_event_q = (UbrEventQMsg *)trx->ubr_tx.local_tx_event_q.addr; + if (local_rx_event_q->flag != UBR_STATE_CLOSED || local_tx_event_q->flag == UBR_STATE_CLOSED) { return nullptr; } - trx->ubrRx.trxState = UBR_STATE_CLOSED; - int fd = (int)trx->localShm.fd; + trx->ubr_rx.trx_state = UBR_STATE_CLOSED; + int fd = (int)trx->local_shm.fd; do { - if (ATOMIC_LOAD(trx->closeCnt) == 0) { - LOG(ERROR) << "Trx close callback failed, exist other closing call, name=" << trx->localShm.name; + if (ATOMIC_LOAD(trx->close_cnt) == 0) { + LOG(ERROR) << "Trx close callback failed, exist other closing call, name=" << trx->local_shm.name; break; } - ATOMIC_SUB(trx->closeCnt, 1); + ATOMIC_SUB(trx->close_cnt, 1); - uint64_t startTime = GetCurNanoSeconds(); + uint64_t start_time = GetCurNanoSeconds(); - if (localTxEventQ->flag == UBR_STATE_CONNECTED || ATOMIC_LOAD(trx->closeCnt) == 1) { - localTxEventQ->flag = UBR_STATE_CLOSED; - trx->ubrTx.trxState = UBR_STATE_CLOSED; + if (local_tx_event_q->flag == UBR_STATE_CONNECTED || ATOMIC_LOAD(trx->close_cnt) == 1) { + local_tx_event_q->flag = UBR_STATE_CLOSED; + trx->ubr_tx.trx_state = UBR_STATE_CLOSED; } - UbrEventQMsg* remoteRxEventQ = (UbrEventQMsg *)trx->ubrTx.remoteRxEventQ.addr; - if (remoteRxEventQ == nullptr) { - LOG(ERROR) << "Trx close callback failed, " << trx->localShm.name << " remoteRxEventQ is NULL."; + UbrEventQMsg* remote_rx_event_q = (UbrEventQMsg *)trx->ubr_tx.remote_rx_event_q.addr; + if (remote_rx_event_q == nullptr) { + LOG(ERROR) << "Trx close callback failed, " << trx->local_shm.name << " remote_rx_event_q is NULL."; break; } - remoteRxEventQ->flag = UBR_STATE_CLOSED; - if (UNLIKELY(ClearTrxResource(trx, startTime, UBR_CALL_BACK_CLOSE, 1) != HLC_OK)) { - LOG(ERROR) << "Trx close callback failed, " << trx->localShm.name << " clear trx resource failed."; + remote_rx_event_q->flag = UBR_STATE_CLOSED; + if (UNLIKELY(ClearTrxResource(trx, start_time, UBR_CALL_BACK_CLOSE, 1) != HLC_OK)) { + LOG(ERROR) << "Trx close callback failed, " << trx->local_shm.name << " clear trx resource failed."; break; } } while (0); @@ -168,16 +168,16 @@ RETURN_CODE UBRing::UbrAddHBTimer() { return HLC_ERR; } - struct itimerspec timeSpec = { + struct itimerspec time_spec = { .it_interval = {.tv_sec = FLAGS_ub_hb_timer_interval, .tv_nsec = 0}, .it_value = {.tv_sec = 0, .tv_nsec = 1} }; - int timerFd = TimerStart(&timeSpec, UbrTrxHBCallback, (void*)_trx); - if (UNLIKELY(timerFd == -1)) { + int timer_fd = TimerStart(&time_spec, UbrTrxHBCallback, (void*)_trx); + if (UNLIKELY(timer_fd == -1)) { LOG(ERROR) << "Start ubr heartbeat timer failed."; return HLC_ERR; } - _trx->hbTimerFd = timerFd; + _trx->hb_timer_fd = timer_fd; return HLC_OK; } @@ -185,26 +185,26 @@ RETURN_CODE UBRing::UbrPassiveClearTrx(UbrTrx *trx, int fd, PASSIVE_DISC_TYPE ty if (UNLIKELY(UbrTrxCloseCheck(trx) != HLC_OK)) { return HLC_ERR; } - trx->ubrTx.trxState = UBR_STATE_CLOSED; - trx->ubrRx.trxState = UBR_STATE_CLOSED; - DeleteTimerSafe((uint32_t)trx->timerFd); - const char *typeName = NULL; + trx->ubr_tx.trx_state = UBR_STATE_CLOSED; + trx->ubr_rx.trx_state = UBR_STATE_CLOSED; + DeleteTimerSafe((uint32_t)trx->timer_fd); + const char *type_name = NULL; if (type == UBR_HEARTBEAT) { - DeleteTimer((uint32_t)trx->hbTimerFd); - typeName = "Trx heartbeat"; + DeleteTimer((uint32_t)trx->hb_timer_fd); + type_name = "Trx heartbeat"; } else if (type == UBR_UB_EVENT) { - DeleteTimerSafe((uint32_t)trx->hbTimerFd); - typeName = "Ub event callback"; + DeleteTimerSafe((uint32_t)trx->hb_timer_fd); + type_name = "Ub event callback"; } sleep(FLAGS_ub_flying_io_timeout); - int rc = ShmLocalFree(&trx->remoteShm); + int rc = ShmLocalFree(&trx->remote_shm); if (rc != HLC_OK) { - LOG(ERROR) << typeName << ", delete remote shm failed. ret=" << rc; + LOG(ERROR) << type_name << ", delete remote shm failed. ret=" << rc; } - rc = ShmLocalFree(&trx->localShm); + rc = ShmLocalFree(&trx->local_shm); if (rc != HLC_OK) { - LOG(ERROR) << typeName << ", delete local shm failed. ret=" << rc; + LOG(ERROR) << type_name << ", delete local shm failed. ret=" << rc; } UBRingManager::ReleaseUbrTrxFromMgr(trx); @@ -217,32 +217,32 @@ void* UBRing::UbrTrxHBCallback(void* args) { return NULL; } - auto* localDataStatus = (UbrDataStatusQMsg *)trx->ubrTx.localDataStatusQ.addr; - auto* remoteDataStatus = (UbrDataStatusQMsg *)trx->ubrRx.remoteDataStatusQ.addr; - if (UNLIKELY(localDataStatus == NULL || remoteDataStatus == NULL)) { + auto* local_data_status = (UbrDataStatusQMsg *)trx->ubr_tx.local_data_status_q.addr; + auto* remote_data_status = (UbrDataStatusQMsg *)trx->ubr_rx.remote_data_status_q.addr; + if (UNLIKELY(local_data_status == NULL || remote_data_status == NULL)) { LOG(ERROR) << "Heartbeat error, datastatus is NULL."; return NULL; } - if (trx->ubrTx.trxState != UBR_STATE_CONNECTED || trx->ubrRx.trxState != UBR_STATE_CONNECTED) { + if (trx->ubr_tx.trx_state != UBR_STATE_CONNECTED || trx->ubr_rx.trx_state != UBR_STATE_CONNECTED) { LOG_EVERY_SECOND(INFO) << "Heartbeat cannot be started, wait connected state."; return NULL; } - remoteDataStatus->heartBeat = 1; - if (localDataStatus->heartBeat == 1) { - localDataStatus->heartBeat = 0; - trx->ubrTx.hbRetryCnt = 0; + remote_data_status->heart_beat = 1; + if (local_data_status->heart_beat == 1) { + local_data_status->heart_beat = 0; + trx->ubr_tx.hb_retry_cnt = 0; return NULL; } - ++trx->ubrTx.hbRetryCnt; - if (trx->ubrTx.hbRetryCnt <= FLAGS_ub_hb_retry_cnt) { + ++trx->ubr_tx.hb_retry_cnt; + if (trx->ubr_tx.hb_retry_cnt <= FLAGS_ub_hb_retry_cnt) { return NULL; } - int fd = (int)trx->localShm.fd; - LOG(INFO) << "Hlc heartbeat, start to clear trx resource. hbTimerFd=" << fd << ", shmName=" << trx->localShm.name; + int fd = (int)trx->local_shm.fd; + LOG(INFO) << "Hlc heartbeat, start to clear trx resource. hbTimerFd=" << fd << ", shmName=" << trx->local_shm.name; UbrPassiveClearTrx(trx, fd, UBR_HEARTBEAT); LOG(INFO) << "Hlc heartbeat clear trx resource finish."; return NULL; @@ -254,17 +254,17 @@ RETURN_CODE UBRing::UbrAddAsynClearTimer(UbrTrx *trx) { return HLC_ERR; } - struct itimerspec timeSpec = { + struct itimerspec time_spec = { .it_interval = {.tv_sec = 0, .tv_nsec = 0}, .it_value = {.tv_sec = FLAGS_ub_flying_io_timeout, .tv_nsec = 0} }; - int timerFd = TimerStart(&timeSpec, UbrAsynClearCallback, (void*)trx); - if (UNLIKELY(timerFd == -1)) { - LOG(ERROR) << "Start ubr close timer failed, trx name=%s.", trx->localShm.name; + int timer_fd = TimerStart(&time_spec, UbrAsynClearCallback, (void*)trx); + if (UNLIKELY(timer_fd == -1)) { + LOG(ERROR) << "Start ubr close timer failed, trx name=%s.", trx->local_shm.name; return HLC_ERR; } - trx->clearTimerFd = timerFd; + trx->clear_timer_fd = timer_fd; return HLC_OK; } @@ -276,117 +276,117 @@ void *UBRing::UbrAsynClearCallback(void *args) return NULL; } - if (UNLIKELY(ShmRemoteFree(&trx->remoteShm) != HLC_OK)) { - LOG(ERROR) << "Trx close, remote shm " << trx->remoteShm.name << " free failed."; + if (UNLIKELY(ShmRemoteFree(&trx->remote_shm) != HLC_OK)) { + LOG(ERROR) << "Trx close, remote shm " << trx->remote_shm.name << " free failed."; } if (UNLIKELY(UbrTrxFreeShm(trx) != HLC_OK)) { - LOG(ERROR) << "Trx close, wait for local shm " << trx->localShm.name << " free fail."; + LOG(ERROR) << "Trx close, wait for local shm " << trx->local_shm.name << " free fail."; } if (UNLIKELY(UBRingManager::ReleaseUbrTrxFromMgr(trx) != HLC_OK)) { - LOG(ERROR) << "Trx close, release shm " << trx->localShm.name << " trx failed."; + LOG(ERROR) << "Trx close, release shm " << trx->local_shm.name << " trx failed."; } return NULL; } -int UBRing::UbrTrxSend(const void *buf, uint32_t bufLen) +int UBRing::UbrTrxSend(const void *buf, uint32_t buf_len) { if (UNLIKELY(CheckTrxSendPreCheck(_trx) != HLC_OK)) { return HLC_ERR; } // 1.2 计算空间 - auto *dataStatusMsg = (UbrDataStatusQMsg *)_trx->ubrTx.localDataStatusQ.addr; - auto *dataMsg = (UbrMsgFormat *)_trx->ubrTx.remoteDataQ.addr; - uint32_t cap = _trx->ubrTx.capacity; - uint32_t tail = dataStatusMsg->tail; - uint32_t remainChunkNum = - (_trx->ubrTx.writePos > tail) ? (tail + cap - _trx->ubrTx.writePos) : (tail - _trx->ubrTx.writePos); - uint32_t needMsgChunkNum = CalcUbrMsgChunkCnt(bufLen); - if (remainChunkNum < needMsgChunkNum) { + auto *data_status_msg = (UbrDataStatusQMsg *)_trx->ubr_tx.local_data_status_q.addr; + auto *data_msg = (UbrMsgFormat *)_trx->ubr_tx.remote_data_q.addr; + uint32_t cap = _trx->ubr_tx.capacity; + uint32_t tail = data_status_msg->tail; + uint32_t remain_chunk_num = + (_trx->ubr_tx.write_pos > tail) ? (tail + cap - _trx->ubr_tx.write_pos) : (tail - _trx->ubr_tx.write_pos); + uint32_t need_msg_chunk_num = CalcUbrMsgChunkCnt(buf_len); + if (remain_chunk_num < need_msg_chunk_num) { return HLC_RETRY; } - UbrMsgFormat *msg = &(_trx->ubrTx.localMsgSpace); - uint32_t totalSendLen = 0; - uint32_t remainBufLen = bufLen; - uint8_t isLastPkt = 0; - _trx->ubrTx.outIoId++; - ((UbrEventQMsg *)_trx->ubrTx.remoteRxEventQ.addr)->ioId = _trx->ubrTx.outIoId; - while (remainBufLen > 0) { - isLastPkt = (uint8_t)(remainBufLen <= UBR_MSG_PAYLOAD_LEN); - msg->header[UBR_MSG_FLAG_INDEX] = isLastPkt ? UBR_MSG_CHUNK_EOF : UBR_MSG_CHUNK_EXIST; - msg->header[UBR_MSG_LEN_INDEX] = isLastPkt ? (uint8_t)remainBufLen : UBR_MSG_PAYLOAD_LEN; + UbrMsgFormat *msg = &(_trx->ubr_tx.local_msg_space); + uint32_t total_send_len = 0; + uint32_t remain_buf_len = buf_len; + uint8_t is_last_pkt = 0; + _trx->ubr_tx.out_io_id++; + ((UbrEventQMsg *)_trx->ubr_tx.remote_rx_event_q.addr)->io_id = _trx->ubr_tx.out_io_id; + while (remain_buf_len > 0) { + is_last_pkt = (uint8_t)(remain_buf_len <= UBR_MSG_PAYLOAD_LEN); + msg->header[UBR_MSG_FLAG_INDEX] = is_last_pkt ? UBR_MSG_CHUNK_EOF : UBR_MSG_CHUNK_EXIST; + msg->header[UBR_MSG_LEN_INDEX] = is_last_pkt ? (uint8_t)remain_buf_len : UBR_MSG_PAYLOAD_LEN; msg->header[UBR_MSG_CUR_INDEX] = 0; - memcpy(msg->payload.inner, (const uint8_t *)buf + totalSendLen, msg->header[UBR_MSG_LEN_INDEX]); - Copy64Byte((int8_t *)&dataMsg[_trx->ubrTx.writePos], (int8_t *)msg); - _trx->ubrTx.writePos = (_trx->ubrTx.writePos + 1) % cap; - totalSendLen += msg->header[UBR_MSG_LEN_INDEX]; - remainBufLen -= msg->header[UBR_MSG_LEN_INDEX]; + memcpy(msg->payload.inner, (const uint8_t *)buf + total_send_len, msg->header[UBR_MSG_LEN_INDEX]); + Copy64Byte((int8_t *)&data_msg[_trx->ubr_tx.write_pos], (int8_t *)msg); + _trx->ubr_tx.write_pos = (_trx->ubr_tx.write_pos + 1) % cap; + total_send_len += msg->header[UBR_MSG_LEN_INDEX]; + remain_buf_len -= msg->header[UBR_MSG_LEN_INDEX]; } - return (int)totalSendLen; + return (int)total_send_len; } -int UBRing::UbrTrxRecv(void *buf, uint32_t bufLen) +int UBRing::UbrTrxRecv(void *buf, uint32_t buf_len) { RETURN_CODE rc = HLC_OK; - if (UNLIKELY((rc = CheckTrxRecvParam(_trx, buf, bufLen)) != HLC_OK)) { + if (UNLIKELY((rc = CheckTrxRecvParam(_trx, buf, buf_len)) != HLC_OK)) { return (rc == UBR_NOT_CONNECTED) ? 0 : rc; } - UbrMsgFormat *dataMsg = (UbrMsgFormat *)_trx->ubrRx.localDataQ.addr; - uint32_t readPosEnd = _trx->ubrRx.readPos; - uint8_t flag = dataMsg[readPosEnd].header[UBR_MSG_FLAG_INDEX]; + UbrMsgFormat *data_msg = (UbrMsgFormat *)_trx->ubr_rx.local_data_q.addr; + uint32_t read_pos_end = _trx->ubr_rx.read_pos; + uint8_t flag = data_msg[read_pos_end].header[UBR_MSG_FLAG_INDEX]; if (flag == UBR_MSG_CHUNK_NONE) { return HLC_RETRY; } - return UbrTrxRecvBlockMode(static_cast(buf), bufLen); + return UbrTrxRecvBlockMode(static_cast(buf), buf_len); } -int UBRing::UbrTrxRecvBlockMode(uint8_t *dest, uint32_t bufLen) +int UBRing::UbrTrxRecvBlockMode(uint8_t *dest, uint32_t buf_len) { RETURN_CODE rc = HLC_OK; - if (UNLIKELY((rc = CheckTrxRecvParam(_trx, dest, bufLen)) != HLC_OK)) { + if (UNLIKELY((rc = CheckTrxRecvParam(_trx, dest, buf_len)) != HLC_OK)) { return (rc == UBR_NOT_CONNECTED) ? 0 : rc; } - int32_t totalCopied = 0; - int32_t remainingLen = (int32_t)bufLen; - bool notEofEncountered = true; + int32_t total_copied = 0; + int32_t remaining_len = (int32_t)buf_len; + bool not_eof_encountered = true; - UbrRx *ubrRx = &_trx->ubrRx; - UbrMsgFormat *dataMsg = (UbrMsgFormat *)ubrRx->localDataQ.addr; - bool needUpdateEpollEofPos = ubrRx->readPos == ubrRx->epEofPos; + UbrRx *ubr_rx = &_trx->ubr_rx; + UbrMsgFormat *data_msg = (UbrMsgFormat *)ubr_rx->local_data_q.addr; + bool need_update_epoll_eof_pos = ubr_rx->read_pos == ubr_rx->ep_eof_pos; - while (notEofEncountered && remainingLen > 0) { + while (not_eof_encountered && remaining_len > 0) { if (UNLIKELY(CheckTrxRecvPreCheck(_trx) != HLC_OK)) { return HLC_ERR; } - UbrMsgFormat *currentChunk = &dataMsg[ubrRx->readPos]; - uint8_t flag = currentChunk->header[UBR_MSG_FLAG_INDEX]; + UbrMsgFormat *current_chunk = &data_msg[ubr_rx->read_pos]; + uint8_t flag = current_chunk->header[UBR_MSG_FLAG_INDEX]; if (flag == UBR_MSG_CHUNK_NONE) { continue; } if (flag == UBR_MSG_CHUNK_EOF) { - notEofEncountered = false; + not_eof_encountered = false; } - uint8_t chunkMsgLen = currentChunk->header[UBR_MSG_LEN_INDEX]; - uint8_t curIndex = currentChunk->header[UBR_MSG_CUR_INDEX]; - uint8_t availableData = chunkMsgLen - curIndex; - - int32_t copyLen = (remainingLen < availableData) ? remainingLen : availableData; - memcpy(dest + totalCopied, dataMsg[ubrRx->readPos].payload.inner + curIndex, (size_t)copyLen); - totalCopied += copyLen; - remainingLen -= copyLen; - currentChunk->header[UBR_MSG_CUR_INDEX] += (uint8_t)copyLen; - if (LIKELY(currentChunk->header[UBR_MSG_CUR_INDEX] == chunkMsgLen)) { - currentChunk->header[UBR_MSG_FLAG_INDEX] = UBR_MSG_CHUNK_NONE; + uint8_t chunk_msg_len = current_chunk->header[UBR_MSG_LEN_INDEX]; + uint8_t cur_index = current_chunk->header[UBR_MSG_CUR_INDEX]; + uint8_t available_data = chunk_msg_len - cur_index; + + int32_t copy_len = (remaining_len < available_data) ? remaining_len : available_data; + memcpy(dest + total_copied, data_msg[ubr_rx->read_pos].payload.inner + cur_index, (size_t)copy_len); + total_copied += copy_len; + remaining_len -= copy_len; + current_chunk->header[UBR_MSG_CUR_INDEX] += (uint8_t)copy_len; + if (LIKELY(current_chunk->header[UBR_MSG_CUR_INDEX] == chunk_msg_len)) { + current_chunk->header[UBR_MSG_FLAG_INDEX] = UBR_MSG_CHUNK_NONE; UpdateDataQTail(_trx); - ubrRx->readPos = (ubrRx->readPos + 1) % ubrRx->capacity; + ubr_rx->read_pos = (ubr_rx->read_pos + 1) % ubr_rx->capacity; } } - if (needUpdateEpollEofPos) { - ubrRx->epEofPos = ubrRx->readPos; + if (need_update_epoll_eof_pos) { + ubr_rx->ep_eof_pos = ubr_rx->read_pos; } - return (int)totalCopied; + return (int)total_copied; } ssize_t UBRing::UbrTrxWritev(const struct iovec *iov, int iovcnt) @@ -395,54 +395,54 @@ ssize_t UBRing::UbrTrxWritev(const struct iovec *iov, int iovcnt) return HLC_ERR; } - size_t bufLen = 0; + size_t buf_len = 0; for (int i = 0; i < iovcnt; i++) { - bufLen += iov[i].iov_len; + buf_len += iov[i].iov_len; } - RETURN_CODE rc = WritevHasEnoughSpace(bufLen); + RETURN_CODE rc = WritevHasEnoughSpace(buf_len); if (rc != HLC_OK) { return rc; } - UbrMsgFormat *dataMsg = (UbrMsgFormat *)_trx->ubrTx.remoteDataQ.addr; - UbrMsgFormat *msg = &(_trx->ubrTx.localMsgSpace); - int curIov = 0; - size_t curIovPos = 0; - ssize_t totalSendLen = 0; - size_t pktRemainN = 0; - size_t iovRemain = 0; + UbrMsgFormat *data_msg = (UbrMsgFormat *)_trx->ubr_tx.remote_data_q.addr; + UbrMsgFormat *msg = &(_trx->ubr_tx.local_msg_space); + int cur_iov = 0; + size_t cur_iov_pos = 0; + ssize_t total_send_len = 0; + size_t pkt_remain_n = 0; + size_t iov_remain = 0; size_t fulled = 0; - uint8_t isLastPkt = 0; - uint8_t curPktLen = 0; - _trx->ubrTx.outIoId++; - ((UbrEventQMsg *)_trx->ubrTx.remoteRxEventQ.addr)->ioId = _trx->ubrTx.outIoId; - while (bufLen > 0) { - isLastPkt = (uint8_t)(bufLen <= UBR_MSG_PAYLOAD_LEN); - curPktLen = isLastPkt ? (uint8_t)bufLen : UBR_MSG_PAYLOAD_LEN; - msg->header[UBR_MSG_FLAG_INDEX] = isLastPkt ? UBR_MSG_CHUNK_EOF : UBR_MSG_CHUNK_EXIST; - msg->header[UBR_MSG_LEN_INDEX] = curPktLen; + uint8_t is_last_pkt = 0; + uint8_t cur_pkt_len = 0; + _trx->ubr_tx.out_io_id++; + ((UbrEventQMsg *)_trx->ubr_tx.remote_rx_event_q.addr)->io_id = _trx->ubr_tx.out_io_id; + while (buf_len > 0) { + is_last_pkt = (uint8_t)(buf_len <= UBR_MSG_PAYLOAD_LEN); + cur_pkt_len = is_last_pkt ? (uint8_t)buf_len : UBR_MSG_PAYLOAD_LEN; + msg->header[UBR_MSG_FLAG_INDEX] = is_last_pkt ? UBR_MSG_CHUNK_EOF : UBR_MSG_CHUNK_EXIST; + msg->header[UBR_MSG_LEN_INDEX] = cur_pkt_len; msg->header[UBR_MSG_CUR_INDEX] = 0; - pktRemainN = curPktLen; - while (curIov < iovcnt && pktRemainN > 0) { - iovRemain = (iov[curIov].iov_len - curIovPos); - fulled = iovRemain > pktRemainN ? pktRemainN : iovRemain; - memcpy((msg->payload.inner + (curPktLen - (uint8_t)pktRemainN)), - (uint8_t *)(iov[curIov].iov_base) + curIovPos, + pkt_remain_n = cur_pkt_len; + while (cur_iov < iovcnt && pkt_remain_n > 0) { + iov_remain = (iov[cur_iov].iov_len - cur_iov_pos); + fulled = iov_remain > pkt_remain_n ? pkt_remain_n : iov_remain; + memcpy((msg->payload.inner + (cur_pkt_len - (uint8_t)pkt_remain_n)), + (uint8_t *)(iov[cur_iov].iov_base) + cur_iov_pos, fulled); - pktRemainN -= fulled; - curIovPos += fulled; - if (curIovPos == iov[curIov].iov_len) { - curIov++; - curIovPos = 0; + pkt_remain_n -= fulled; + cur_iov_pos += fulled; + if (cur_iov_pos == iov[cur_iov].iov_len) { + cur_iov++; + cur_iov_pos = 0; } } - Copy64Byte((int8_t *)&dataMsg[_trx->ubrTx.writePos], (int8_t *)msg); - _trx->ubrTx.writePos = (_trx->ubrTx.writePos + 1) % _trx->ubrTx.capacity; - totalSendLen += (ssize_t)curPktLen; - bufLen -= (int)curPktLen; + Copy64Byte((int8_t *)&data_msg[_trx->ubr_tx.write_pos], (int8_t *)msg); + _trx->ubr_tx.write_pos = (_trx->ubr_tx.write_pos + 1) % _trx->ubr_tx.capacity; + total_send_len += (ssize_t)cur_pkt_len; + buf_len -= (int)cur_pkt_len; } - return totalSendLen; + return total_send_len; } ssize_t UBRing::UbrTrxReadv(const struct iovec *iov, int iovcnt) @@ -451,9 +451,9 @@ ssize_t UBRing::UbrTrxReadv(const struct iovec *iov, int iovcnt) if (UNLIKELY((rc = CheckTrxRecvParam(_trx, iov, (uint32_t)iovcnt)) != HLC_OK)) { return (rc == UBR_NOT_CONNECTED) ? 0 : rc; } - UbrMsgFormat *dataMsg = (UbrMsgFormat *)_trx->ubrRx.localDataQ.addr; - uint32_t readPosEnd = _trx->ubrRx.readPos; - uint8_t flag = dataMsg[readPosEnd].header[UBR_MSG_FLAG_INDEX]; + UbrMsgFormat *data_msg = (UbrMsgFormat *)_trx->ubr_rx.local_data_q.addr; + uint32_t read_pos_end = _trx->ubr_rx.read_pos; + uint8_t flag = data_msg[read_pos_end].header[UBR_MSG_FLAG_INDEX]; if (flag == UBR_MSG_CHUNK_NONE) { errno = EAGAIN; return -1; @@ -474,108 +474,108 @@ ssize_t UBRing::UbrTrxReadvBlockMode(const struct iovec *iov, int iovcnt) return (rc == UBR_NOT_CONNECTED) ? 0 : rc; } - size_t remainBufLen = 0; + size_t remain_buf_len = 0; for (int i = 0; i < iovcnt; i++) { - remainBufLen += iov[i].iov_len; + remain_buf_len += iov[i].iov_len; } - bool needUpdateEpollEofPos = _trx->ubrRx.readPos == _trx->ubrRx.epEofPos; - ssize_t totalRecvLen = StartReadv(_trx, iov, iovcnt, remainBufLen); + bool need_update_epoll_eof_pos = _trx->ubr_rx.read_pos == _trx->ubr_rx.ep_eof_pos; + ssize_t total_recv_len = StartReadv(_trx, iov, iovcnt, remain_buf_len); - if (needUpdateEpollEofPos) { - _trx->ubrRx.epEofPos = _trx->ubrRx.readPos; + if (need_update_epoll_eof_pos) { + _trx->ubr_rx.ep_eof_pos = _trx->ubr_rx.read_pos; } - return totalRecvLen; + return total_recv_len; } -RETURN_CODE UBRing::IsUbrTrxReadable(uint32_t epEvent) +RETURN_CODE UBRing::IsUbrTrxReadable(uint32_t ep_event) { if (UNLIKELY(_trx == NULL)) { LOG(ERROR) << "The trx to be checked is NULL."; return HLC_ERR; } - if (UNLIKELY(_trx->localShm.addr == NULL)) { - LOG(ERROR) << "The trx localShm to be checked is NULL."; + if (UNLIKELY(_trx->local_shm.addr == NULL)) { + LOG(ERROR) << "The trx local_shm to be checked is NULL."; return HLC_ERR; } - if (UNLIKELY(_trx->ubrTx.trxState != UBR_STATE_CONNECTED)) { + if (UNLIKELY(_trx->ubr_tx.trx_state != UBR_STATE_CONNECTED)) { // TODO mwj 这几块的日志是否需要删除 // LOG(ERROR) << "The trx is not connected state."; return HLC_ERR; } - uint64_t ioId = ((UbrEventQMsg *)_trx->ubrRx.localRxEventQ.addr)->ioId; - if ((epEvent & EPOLLET) && ioId == _trx->ubrRx.inIoId) { + uint64_t io_id = ((UbrEventQMsg *)_trx->ubr_rx.local_rx_event_q.addr)->io_id; + if ((ep_event & EPOLLET) && io_id == _trx->ubr_rx.in_io_id) { return MPA_MUXER_NOT_READY; } - uint32_t readPosEnd = _trx->ubrRx.readPos; - if (epEvent & EPOLLET) { - readPosEnd = _trx->ubrRx.epEofPos; + uint32_t read_pos_end = _trx->ubr_rx.read_pos; + if (ep_event & EPOLLET) { + read_pos_end = _trx->ubr_rx.ep_eof_pos; } - UbrMsgFormat *dataMsg = (UbrMsgFormat *)_trx->ubrRx.localDataQ.addr; - uint8_t flag = dataMsg[readPosEnd].header[UBR_MSG_FLAG_INDEX]; + UbrMsgFormat *data_msg = (UbrMsgFormat *)_trx->ubr_rx.local_data_q.addr; + uint8_t flag = data_msg[read_pos_end].header[UBR_MSG_FLAG_INDEX]; if (flag == UBR_MSG_CHUNK_NONE) { return MPA_MUXER_NOT_READY; } - if (epEvent & EPOLLET) { - _trx->ubrRx.inIoId = ioId; + if (ep_event & EPOLLET) { + _trx->ubr_rx.in_io_id = io_id; } return HLC_OK; } -RETURN_CODE UBRing::IsUbrTrxWriteable(uint32_t epEvent) +RETURN_CODE UBRing::IsUbrTrxWriteable(uint32_t ep_event) { if (UNLIKELY(_trx == NULL)) { LOG(ERROR) << "The trx to be checked is NULL."; return HLC_ERR; } - if (UNLIKELY(_trx->localShm.addr == NULL)) { - LOG(ERROR) << "The trx localShm to be checked is NULL."; + if (UNLIKELY(_trx->local_shm.addr == NULL)) { + LOG(ERROR) << "The trx local_shm to be checked is NULL."; return HLC_ERR; } - if (UNLIKELY((UbrEventQMsg *)_trx->ubrTx.localTxEventQ.addr == NULL)) { - LOG(ERROR) << "The trx localTxEventQ addr is NULL."; + if (UNLIKELY((UbrEventQMsg *)_trx->ubr_tx.local_tx_event_q.addr == NULL)) { + LOG(ERROR) << "The trx local_tx_event_q addr is NULL."; return HLC_ERR; } - if (UNLIKELY((UbrEventQMsg *)_trx->ubrTx.localDataStatusQ.addr == NULL)) { - LOG(ERROR) << "The trx localDataStatusQ addr is NULL."; + if (UNLIKELY((UbrEventQMsg *)_trx->ubr_tx.local_data_status_q.addr == NULL)) { + LOG(ERROR) << "The trx local_data_status_q addr is NULL."; return HLC_ERR; } - if (UNLIKELY(_trx->ubrTx.trxState != UBR_STATE_CONNECTED)) { + if (UNLIKELY(_trx->ubr_tx.trx_state != UBR_STATE_CONNECTED)) { LOG(ERROR) << "The trx is not connected state."; return HLC_ERR; } - UbrDataStatusQMsg *dataStatusMsg = (UbrDataStatusQMsg *)_trx->ubrTx.localDataStatusQ.addr; - uint32_t cap = _trx->ubrTx.capacity; - uint32_t tail = dataStatusMsg->tail; - uint32_t remainChunkNum = - (_trx->ubrTx.writePos > tail) ? (tail + cap - _trx->ubrTx.writePos) : (tail - _trx->ubrTx.writePos); - if (remainChunkNum == 0) { - _trx->ubrTx.epLastCap = remainChunkNum; + UbrDataStatusQMsg *data_status_msg = (UbrDataStatusQMsg *)_trx->ubr_tx.local_data_status_q.addr; + uint32_t cap = _trx->ubr_tx.capacity; + uint32_t tail = data_status_msg->tail; + uint32_t remain_chunk_num = + (_trx->ubr_tx.write_pos > tail) ? (tail + cap - _trx->ubr_tx.write_pos) : (tail - _trx->ubr_tx.write_pos); + if (remain_chunk_num == 0) { + _trx->ubr_tx.ep_last_cap = remain_chunk_num; return MPA_MUXER_NOT_READY; } - if ((epEvent & EPOLLET) && (_trx->ubrTx.epLastCap >= remainChunkNum)) { - _trx->ubrTx.epLastCap = remainChunkNum; + if ((ep_event & EPOLLET) && (_trx->ubr_tx.ep_last_cap >= remain_chunk_num)) { + _trx->ubr_tx.ep_last_cap = remain_chunk_num; return MPA_MUXER_NOT_READY; } - _trx->ubrTx.epLastCap = remainChunkNum; + _trx->ubr_tx.ep_last_cap = remain_chunk_num; return HLC_OK; } -RETURN_CODE UBRing::UbrSetTimeout(UbrTaskStep taskType, int timeout) +RETURN_CODE UBRing::UbrSetTimeout(UbrTaskStep task_type, int timeout) { - if (taskType >= UBR_TASK_STEP_NUM || timeout < 0) { + if (task_type >= UBR_TASK_STEP_NUM || timeout < 0) { LOG(ERROR) << "Set timeout failed, invalid task type."; return HLC_ERR; } - g_sleepTime[taskType] = (uint32_t)timeout; - LOG(INFO) << "Set timeout success, taskType=" << taskType << ", timeout=" << timeout; + g_sleep_time[task_type] = (uint32_t)timeout; + LOG(INFO) << "Set timeout success, task_type=" << task_type << ", timeout=" << timeout; return HLC_OK; } @@ -587,32 +587,32 @@ RETURN_CODE UBRing::UbrTrxFreeShm(UbrTrx *trx) } RETURN_CODE rc = HLC_OK; - rc = ShmMunmap(&trx->localShm); + rc = ShmMunmap(&trx->local_shm); if (UNLIKELY(rc != HLC_OK)) { - LOG(ERROR) << "Trx close, local unmap " << trx->localShm.name << " shm fail."; + LOG(ERROR) << "Trx close, local unmap " << trx->local_shm.name << " shm fail."; return HLC_ERR; } - rc = ShmFree(&trx->localShm); + rc = ShmFree(&trx->local_shm); if (UNLIKELY(rc != HLC_OK)) { if (UNLIKELY(rc == SHM_ERR_RESOURCE_ATTACHED || rc == SHM_ERR_NOT_FOUND)) { - LOG(INFO) << "Wait for " << trx->remoteShm.name << " remote free shm."; + LOG(INFO) << "Wait for " << trx->remote_shm.name << " remote free shm."; return HLC_OK; } - LOG(ERROR) << "Wait for " << trx->localShm.name << " local shm free fail."; + LOG(ERROR) << "Wait for " << trx->local_shm.name << " local shm free fail."; return HLC_ERR; } - size_t nameLen = strlen(trx->remoteShm.name); - if (!(nameLen <= 0 || nameLen > SHM_MAX_NAME_LEN || trx->remoteShm.len <= 0)) { - rc = ShmFree(&trx->remoteShm); + size_t name_len = strlen(trx->remote_shm.name); + if (!(name_len <= 0 || name_len > SHM_MAX_NAME_LEN || trx->remote_shm.len <= 0)) { + rc = ShmFree(&trx->remote_shm); } if (rc != HLC_OK) { if (rc == SHM_ERR_RESOURCE_ATTACHED || rc == SHM_ERR_NOT_FOUND) { - LOG(INFO) << "Wait for " << trx->remoteShm.name << " remote free shm."; + LOG(INFO) << "Wait for " << trx->remote_shm.name << " remote free shm."; return HLC_OK; } - LOG(ERROR) << "Wait for " << trx->remoteShm.name << " remote shm free fail."; + LOG(ERROR) << "Wait for " << trx->remote_shm.name << " remote shm free fail."; return HLC_ERR; } @@ -648,7 +648,7 @@ void UBRing::PrewriteUbrTx(UbrTx *tx) if (tx == NULL) { return; } - PreWriteAddr(tx->remoteDataQ.addr, tx->capacity * sizeof(UbrMsgFormat)); + PreWriteAddr(tx->remote_data_q.addr, tx->capacity * sizeof(UbrMsgFormat)); } void UBRing::PrewriteUbrRx(UbrRx *rx) @@ -656,92 +656,92 @@ void UBRing::PrewriteUbrRx(UbrRx *rx) if (rx == NULL) { return; } - PreWriteAddr(rx->localDataQ.addr, rx->capacity * sizeof(UbrMsgFormat)); + PreWriteAddr(rx->local_data_q.addr, rx->capacity * sizeof(UbrMsgFormat)); } -RETURN_CODE UBRing::UbrTrxMapLocalShm(SHM *localShm) +RETURN_CODE UBRing::UbrTrxMapLocalShm(SHM *local_shm) { if (UNLIKELY(_trx == NULL)) { LOG(ERROR) << "Trx map Shared memory failed, trx is null."; return HLC_ERR; } - if (UNLIKELY(localShm == NULL)) { - LOG(ERROR) << "Trx map Shared memory failed, localShm is null."; + if (UNLIKELY(local_shm == NULL)) { + LOG(ERROR) << "Trx map Shared memory failed, local_shm is null."; return HLC_ERR; } - _trx->localShm = *localShm; - _trx->ubrTx.localTxEventQ.addr = localShm->addr + TX_EVENTQ_ADDR_OFFSET; - _trx->ubrTx.localTxEventQ.len = UBR_EVENTQ_LEN; - _trx->ubrRx.localRxEventQ.addr = localShm->addr + RX_EVENTQ_ADDR_OFFSET; - _trx->ubrRx.localRxEventQ.len = UBR_EVENTQ_LEN; - _trx->ubrTx.localDataStatusQ.addr = localShm->addr + DATASTATUSQ_ADDR_OFFSET; - _trx->ubrTx.localDataStatusQ.len = UBR_DATASTATUSQ_LEN; - size_t addrAlignedOffset = Aligned64Offset(localShm->addr + DATAQ_ADDR_OFFSET); - LOG(DEBUG) << "UbrRx's localDataQ address will aligned with offset=" << addrAlignedOffset; - _trx->ubrRx.localDataQ.addr = localShm->addr + DATAQ_ADDR_OFFSET + addrAlignedOffset; - _trx->ubrRx.localDataQ.len = localShm->len - DATAQ_ADDR_OFFSET - addrAlignedOffset; + _trx->local_shm = *local_shm; + _trx->ubr_tx.local_tx_event_q.addr = local_shm->addr + TX_EVENTQ_ADDR_OFFSET; + _trx->ubr_tx.local_tx_event_q.len = UBR_EVENTQ_LEN; + _trx->ubr_rx.local_rx_event_q.addr = local_shm->addr + RX_EVENTQ_ADDR_OFFSET; + _trx->ubr_rx.local_rx_event_q.len = UBR_EVENTQ_LEN; + _trx->ubr_tx.local_data_status_q.addr = local_shm->addr + DATASTATUSQ_ADDR_OFFSET; + _trx->ubr_tx.local_data_status_q.len = UBR_DATASTATUSQ_LEN; + size_t addr_aligned_offset = Aligned64Offset(local_shm->addr + DATAQ_ADDR_OFFSET); + LOG(DEBUG) << "UbrRx's local_data_q address will aligned with offset=" << addr_aligned_offset; + _trx->ubr_rx.local_data_q.addr = local_shm->addr + DATAQ_ADDR_OFFSET + addr_aligned_offset; + _trx->ubr_rx.local_data_q.len = local_shm->len - DATAQ_ADDR_OFFSET - addr_aligned_offset; return HLC_OK; } -RETURN_CODE UBRing::UbrTrxMapRemoteShm(SHM *remoteShm) +RETURN_CODE UBRing::UbrTrxMapRemoteShm(SHM *remote_shm) { if (UNLIKELY(_trx == NULL)) { LOG(ERROR) << "Trx map Shared memory failed, trx is null."; return HLC_ERR; } - if (UNLIKELY(remoteShm == NULL)) { - LOG(ERROR) << "Trx map Shared memory failed, remoteShm is null."; + if (UNLIKELY(remote_shm == NULL)) { + LOG(ERROR) << "Trx map Shared memory failed, remote_shm is null."; return HLC_ERR; } - _trx->remoteShm = *remoteShm; - _trx->ubrRx.remoteTxEventQ.addr = remoteShm->addr + TX_EVENTQ_ADDR_OFFSET; - _trx->ubrRx.remoteTxEventQ.len = UBR_EVENTQ_LEN; - _trx->ubrTx.remoteRxEventQ.addr = remoteShm->addr + RX_EVENTQ_ADDR_OFFSET; - _trx->ubrTx.remoteRxEventQ.len = UBR_EVENTQ_LEN; - _trx->ubrRx.remoteDataStatusQ.addr = remoteShm->addr + DATASTATUSQ_ADDR_OFFSET; - _trx->ubrRx.remoteDataStatusQ.len = UBR_DATASTATUSQ_LEN; - size_t addrAlignedOffset = Aligned64Offset(remoteShm->addr + DATAQ_ADDR_OFFSET); - LOG(DEBUG) << "UbrTx's remoteDataQ will aligned with offset=" << addrAlignedOffset; - _trx->ubrTx.remoteDataQ.addr = remoteShm->addr + DATAQ_ADDR_OFFSET + addrAlignedOffset; - _trx->ubrTx.remoteDataQ.len = remoteShm->len - DATAQ_ADDR_OFFSET - addrAlignedOffset; + _trx->remote_shm = *remote_shm; + _trx->ubr_rx.remote_tx_event_q.addr = remote_shm->addr + TX_EVENTQ_ADDR_OFFSET; + _trx->ubr_rx.remote_tx_event_q.len = UBR_EVENTQ_LEN; + _trx->ubr_tx.remote_rx_event_q.addr = remote_shm->addr + RX_EVENTQ_ADDR_OFFSET; + _trx->ubr_tx.remote_rx_event_q.len = UBR_EVENTQ_LEN; + _trx->ubr_rx.remote_data_status_q.addr = remote_shm->addr + DATASTATUSQ_ADDR_OFFSET; + _trx->ubr_rx.remote_data_status_q.len = UBR_DATASTATUSQ_LEN; + size_t addr_aligned_offset = Aligned64Offset(remote_shm->addr + DATAQ_ADDR_OFFSET); + LOG(DEBUG) << "UbrTx's remote_data_q will aligned with offset=" << addr_aligned_offset; + _trx->ubr_tx.remote_data_q.addr = remote_shm->addr + DATAQ_ADDR_OFFSET + addr_aligned_offset; + _trx->ubr_tx.remote_data_q.len = remote_shm->len - DATAQ_ADDR_OFFSET - addr_aligned_offset; return HLC_OK; } -RETURN_CODE UBRing::UbrServerTrxInit(SHM *localShm, SHM *remoteShm) +RETURN_CODE UBRing::UbrServerTrxInit(SHM *local_shm, SHM *remote_shm) { - RETURN_CODE rc = UbrTrxMapShm(localShm, remoteShm); + RETURN_CODE rc = UbrTrxMapShm(local_shm, remote_shm); if (UNLIKELY(rc != HLC_OK)) { LOG(ERROR) <<"Trx map shared memory failed."; return rc; } - uint32_t localDataMsgCap = (uint32_t)(_trx->ubrRx.localDataQ.len / UBR_MSG_LEN); - uint32_t remoteDataMsgCap = (uint32_t)(_trx->ubrTx.remoteDataQ.len / UBR_MSG_LEN); - _trx->ubrRx.capacity = localDataMsgCap; - _trx->ubrTx.capacity = remoteDataMsgCap; - rc = UBRingManager::GetHlcDealMsgMaxCnt(_trx->ubrRx.capacity, &_trx->ubrRx.dealMsgMaxCnt); + uint32_t local_data_msg_cap = (uint32_t)(_trx->ubr_rx.local_data_q.len / UBR_MSG_LEN); + uint32_t remote_data_msg_cap = (uint32_t)(_trx->ubr_tx.remote_data_q.len / UBR_MSG_LEN); + _trx->ubr_rx.capacity = local_data_msg_cap; + _trx->ubr_tx.capacity = remote_data_msg_cap; + rc = UBRingManager::GetHlcDealMsgMaxCnt(_trx->ubr_rx.capacity, &_trx->ubr_rx.deal_msg_max_cnt); if (UNLIKELY(rc != HLC_OK)) { LOG(ERROR) << "Get hlc deal msg max cnt."; return rc; } - PrewriteUbrRx(&_trx->ubrRx); - PrewriteUbrTx(&_trx->ubrTx); + PrewriteUbrRx(&_trx->ubr_rx); + PrewriteUbrTx(&_trx->ubr_tx); - ((UbrDataStatusQMsg *)(_trx->ubrTx.localDataStatusQ.addr))->tail = remoteDataMsgCap - 1; - ((UbrDataStatusQMsg *)(_trx->ubrRx.remoteDataStatusQ.addr))->tail = localDataMsgCap - 1; + ((UbrDataStatusQMsg *)(_trx->ubr_tx.local_data_status_q.addr))->tail = remote_data_msg_cap - 1; + ((UbrDataStatusQMsg *)(_trx->ubr_rx.remote_data_status_q.addr))->tail = local_data_msg_cap - 1; if (UNLIKELY(UbrAddTimer() != HLC_OK)) { - LOG(ERROR) << "Ubr add timer failed, localName=" << localShm->name; + LOG(ERROR) << "Ubr add timer failed, localName=" << local_shm->name; return HLC_ERR; } - ((UbrDataStatusQMsg *)(_trx->ubrTx.localDataStatusQ.addr))->timeout = FLAGS_ub_connect_timeout; - ((UbrDataStatusQMsg *)(_trx->ubrRx.remoteDataStatusQ.addr))->timeout = FLAGS_ub_connect_timeout; + ((UbrDataStatusQMsg *)(_trx->ubr_tx.local_data_status_q.addr))->timeout = FLAGS_ub_connect_timeout; + ((UbrDataStatusQMsg *)(_trx->ubr_rx.remote_data_status_q.addr))->timeout = FLAGS_ub_connect_timeout; - ((UbrEventQMsg *)_trx->ubrTx.remoteRxEventQ.addr)->flag = UBR_STATE_CONNECTED; - ((UbrEventQMsg *)_trx->ubrRx.localRxEventQ.addr)->flag = UBR_STATE_CONNECTED; - _trx->ubrTx.trxState = UBR_STATE_CONNECTED; - _trx->ubrRx.trxState = UBR_STATE_CONNECTED; + ((UbrEventQMsg *)_trx->ubr_tx.remote_rx_event_q.addr)->flag = UBR_STATE_CONNECTED; + ((UbrEventQMsg *)_trx->ubr_rx.local_rx_event_q.addr)->flag = UBR_STATE_CONNECTED; + _trx->ubr_tx.trx_state = UBR_STATE_CONNECTED; + _trx->ubr_rx.trx_state = UBR_STATE_CONNECTED; return HLC_OK; } @@ -757,8 +757,8 @@ int UBRing::UbrAllocateServerShm(SHM* remote_trx_shm, SHM* local_trx_shm) { return -1; } - UbrTrx **ubrTrxPtr = &_trx; - if (UNLIKELY((UBRingManager::AcquireUbrTrxFromMgr(ubrTrxPtr)) != HLC_OK)) { + UbrTrx **ubr_trx_ptr = &_trx; + if (UNLIKELY((UBRingManager::AcquireUbrTrxFromMgr(ubr_trx_ptr)) != HLC_OK)) { LOG(ERROR) << "Acquire ubrtrx failed."; ShmRemoteFree(remote_trx_shm); ShmLocalFree(local_trx_shm); @@ -797,182 +797,182 @@ int UBRing::UbrMapRemoteShm(SHM *local_trx_shm, const char *local_name) LOG(ERROR) << "Connect Trx failed, local shm name=" << local_trx_shm->name; return -1; } - PrewriteUbrRx(&_trx->ubrRx); - PrewriteUbrTx(&_trx->ubrTx); - ((UbrEventQMsg *)_trx->ubrRx.remoteTxEventQ.addr)->flag = UBR_STATE_CONNECTED; - ((UbrEventQMsg *)_trx->ubrRx.localRxEventQ.addr)->flag = UBR_STATE_CONNECTED; - _trx->ubrTx.trxState = UBR_STATE_CONNECTED; - _trx->ubrRx.trxState = UBR_STATE_CONNECTED; + PrewriteUbrRx(&_trx->ubr_rx); + PrewriteUbrTx(&_trx->ubr_tx); + ((UbrEventQMsg *)_trx->ubr_rx.remote_tx_event_q.addr)->flag = UBR_STATE_CONNECTED; + ((UbrEventQMsg *)_trx->ubr_rx.local_rx_event_q.addr)->flag = UBR_STATE_CONNECTED; + _trx->ubr_tx.trx_state = UBR_STATE_CONNECTED; + _trx->ubr_rx.trx_state = UBR_STATE_CONNECTED; return 0; } -RETURN_CODE UBRing::UbrMapRemoteShmAddTimer(SHM *localTrxShm, const char *localName) +RETURN_CODE UBRing::UbrMapRemoteShmAddTimer(SHM *local_trx_shm, const char *local_name) { - uint64_t startTime = GetCurNanoSeconds(); + uint64_t start_time = GetCurNanoSeconds(); - size_t remoteServerLen = UBR_MSG_LEN * (((UbrDataStatusQMsg *)(_trx->ubrTx.localDataStatusQ.addr))->tail + 1) + + size_t remote_server_len = UBR_MSG_LEN * (((UbrDataStatusQMsg *)(_trx->ubr_tx.local_data_status_q.addr))->tail + 1) + UBR_MSG_LEN * ((DATAQ_ADDR_OFFSET / UBR_MSG_LEN) + 1); - SHM remoteTrxShm = {NULL, remoteServerLen, 0, {0}, localTrxShm->fd}; - int result = snprintf(remoteTrxShm.name, + SHM remote_trx_shm = {NULL, remote_server_len, 0, {0}, local_trx_shm->fd}; + int result = snprintf(remote_trx_shm.name, SHM_MAX_NAME_BUFF_LEN, "%s_%s_%s", SHM_NAME_PREFIX, - localName, + local_name, SERVER_SHM_NAME_SUFFIX); if (UNLIKELY(result < 0)) { - LOG(ERROR) << "Copy server shared memory name failed, localName=%s, ret=%d.", localName, result; + LOG(ERROR) << "Copy server shared memory name failed, localName=%s, ret=%d.", local_name, result; return HLC_ERR; } UbrSetSleepTask(UBR_TASK_CONNECT_MAP_FRONT); - RETURN_CODE rc = ApplyAndMapRemoteShm(&remoteTrxShm); + RETURN_CODE rc = ApplyAndMapRemoteShm(&remote_trx_shm); if (UNLIKELY(rc != HLC_OK)) { - LOG(ERROR) << "Connect Trx map shared memory failed, remote shm=" << remoteTrxShm.name; + LOG(ERROR) << "Connect Trx map shared memory failed, remote shm=" << remote_trx_shm.name; return rc; } if (UNLIKELY(UbrAddTimer() != HLC_OK)) { - LOG(ERROR) << "Ubr add timer failed, localName=" << localName; - ShmRemoteFree(&remoteTrxShm); + LOG(ERROR) << "Ubr add timer failed, localName=" << local_name; + ShmRemoteFree(&remote_trx_shm); return HLC_ERR; } UbrSetSleepTask(UBR_TASK_CONNECT_MAP_AFTER); - uint32_t timeout = ((UbrDataStatusQMsg *)(_trx->ubrTx.localDataStatusQ.addr))->timeout; - if (HasTimedOut(startTime, timeout) != HLC_OK) { - LOG(ERROR) << "Local shm " << localTrxShm->name << " wait for connect remote map timeout."; - DeleteTimerSafe((uint32_t)_trx->hbTimerFd); - DeleteTimerSafe((uint32_t)_trx->timerFd); - ShmRemoteFree(&remoteTrxShm); + uint32_t timeout = ((UbrDataStatusQMsg *)(_trx->ubr_tx.local_data_status_q.addr))->timeout; + if (HasTimedOut(start_time, timeout) != HLC_OK) { + LOG(ERROR) << "Local shm " << local_trx_shm->name << " wait for connect remote map timeout."; + DeleteTimerSafe((uint32_t)_trx->hb_timer_fd); + DeleteTimerSafe((uint32_t)_trx->timer_fd); + ShmRemoteFree(&remote_trx_shm); return HLC_ERR_TIMEOUT; } return HLC_OK; } -RETURN_CODE UBRing::ApplyAndMapLocalShm(SHM *localTrxShm, const char *localName) +RETURN_CODE UBRing::ApplyAndMapLocalShm(SHM *local_trx_shm, const char *local_name) { - if (UNLIKELY(_trx == NULL || localTrxShm == NULL)) { - LOG(ERROR) << "Trx map Shared memory failed, trx is null, localName=" << localName; + if (UNLIKELY(_trx == NULL || local_trx_shm == NULL)) { + LOG(ERROR) << "Trx map Shared memory failed, trx is null, localName=" << local_name; return HLC_ERR; } - int result = snprintf(localTrxShm->name, + int result = snprintf(local_trx_shm->name, SHM_MAX_NAME_BUFF_LEN, "%s_%s_%s", SHM_NAME_PREFIX, - localName, + local_name, CLIENT_SHM_NAME_SUFFIX); if (UNLIKELY(result < 0)) { - LOG(ERROR) << "Copy client localTrx shared memory name failed, localName=" << localName << ", ret=" << result; + LOG(ERROR) << "Copy client localTrx shared memory name failed, localName=" << local_name << ", ret=" << result; return HLC_ERR; } - RETURN_CODE rc = ShmLocalCalloc(localTrxShm); + RETURN_CODE rc = ShmLocalCalloc(local_trx_shm); if (UNLIKELY(rc != HLC_OK)) { - LOG(ERROR) << "Trx apply local shared memory failed, local shm name=" << localTrxShm->name; + LOG(ERROR) << "Trx apply local shared memory failed, local shm name=" << local_trx_shm->name; if (rc == SHM_ERR_EXIST || rc == SHM_ERR_NOT_FOUND) { rc = UBR_ERR_ADDR_IN_USE; } UBRingManager::ReleaseUbrTrxFromMgr(_trx); return rc; } - rc = UbrTrxMapLocalShm(localTrxShm); + rc = UbrTrxMapLocalShm(local_trx_shm); if (UNLIKELY(rc != HLC_OK)) { - LOG(ERROR) << "Trx map local shared memory failed, local shm name=" << localTrxShm->name; - ShmLocalFree(localTrxShm); + LOG(ERROR) << "Trx map local shared memory failed, local shm name=" << local_trx_shm->name; + ShmLocalFree(local_trx_shm); UBRingManager::ReleaseUbrTrxFromMgr(_trx); return rc; } - ((UbrDataStatusQMsg *)_trx->ubrTx.localDataStatusQ.addr)->timeout = FLAGS_ub_connect_timeout; - _trx->ubrRx.capacity = (uint32_t)(_trx->ubrRx.localDataQ.len / UBR_MSG_LEN); - rc = UBRingManager::GetHlcDealMsgMaxCnt(_trx->ubrRx.capacity, &_trx->ubrRx.dealMsgMaxCnt); + ((UbrDataStatusQMsg *)_trx->ubr_tx.local_data_status_q.addr)->timeout = FLAGS_ub_connect_timeout; + _trx->ubr_rx.capacity = (uint32_t)(_trx->ubr_rx.local_data_q.len / UBR_MSG_LEN); + rc = UBRingManager::GetHlcDealMsgMaxCnt(_trx->ubr_rx.capacity, &_trx->ubr_rx.deal_msg_max_cnt); if (rc != HLC_OK) { - LOG(ERROR) << "Get hlc deal msg max cnt, local shm name=" << localTrxShm->name; - ShmLocalFree(localTrxShm); + LOG(ERROR) << "Get hlc deal msg max cnt, local shm name=" << local_trx_shm->name; + ShmLocalFree(local_trx_shm); UBRingManager::ReleaseUbrTrxFromMgr(_trx); return rc; } return HLC_OK; } -RETURN_CODE UBRing::ApplyAndMapRemoteShm(SHM *remoteTrxShm) +RETURN_CODE UBRing::ApplyAndMapRemoteShm(SHM *remote_trx_shm) { - RETURN_CODE rc = ShmRemoteMalloc(remoteTrxShm); + RETURN_CODE rc = ShmRemoteMalloc(remote_trx_shm); if (UNLIKELY(rc != HLC_OK)) { LOG(ERROR) << "Trx apply remote shared memory failed."; return rc; } - rc = UbrTrxMapRemoteShm(remoteTrxShm); + rc = UbrTrxMapRemoteShm(remote_trx_shm); if (UNLIKELY(rc != HLC_OK)) { LOG(ERROR) << "Trx map shared memory failed."; - ShmRemoteFree(remoteTrxShm); + ShmRemoteFree(remote_trx_shm); return rc; } - _trx->ubrTx.capacity = (uint32_t)(_trx->ubrTx.remoteDataQ.len / UBR_MSG_LEN); + _trx->ubr_tx.capacity = (uint32_t)(_trx->ubr_tx.remote_data_q.len / UBR_MSG_LEN); return HLC_OK; } -RETURN_CODE UBRing::WritevHasEnoughSpace(size_t bufLen) +RETURN_CODE UBRing::WritevHasEnoughSpace(size_t buf_len) { - UbrDataStatusQMsg *dataStatusMsg = (UbrDataStatusQMsg *)_trx->ubrTx.localDataStatusQ.addr; - uint32_t cap = _trx->ubrTx.capacity; - uint32_t tail = dataStatusMsg->tail; - uint32_t remainChunkNum = - (_trx->ubrTx.writePos > tail) ? (tail + cap - _trx->ubrTx.writePos) : (tail - _trx->ubrTx.writePos); - uint32_t needMsgChunkNum = CalcUbrMsgChunkCnt((uint32_t)bufLen); - if (remainChunkNum < needMsgChunkNum) { + UbrDataStatusQMsg *data_status_msg = (UbrDataStatusQMsg *)_trx->ubr_tx.local_data_status_q.addr; + uint32_t cap = _trx->ubr_tx.capacity; + uint32_t tail = data_status_msg->tail; + uint32_t remain_chunk_num = + (_trx->ubr_tx.write_pos > tail) ? (tail + cap - _trx->ubr_tx.write_pos) : (tail - _trx->ubr_tx.write_pos); + uint32_t need_msg_chunk_num = CalcUbrMsgChunkCnt((uint32_t)buf_len); + if (remain_chunk_num < need_msg_chunk_num) { return HLC_RETRY; } return HLC_OK; } -RETURN_CODE UBRing::UbrClearResourceCheck(UbrTrx *trx, uint64_t startTime, UbrCloseType closeType) +RETURN_CODE UBRing::UbrClearResourceCheck(UbrTrx *trx, uint64_t start_time, UbrCloseType close_type) { if (UNLIKELY(trx == NULL)) { LOG(ERROR) << "Trx close failed, trx is null."; return HLC_ERR; } - UbrEventQMsg* localTxEventQ = (UbrEventQMsg *)trx->ubrTx.localTxEventQ.addr; - while (ATOMIC_LOAD(trx->closeCnt) == 1 && localTxEventQ->flag == UBR_STATE_CLOSING) { - if (HasTimedOut(startTime, FLAGS_ub_disconnect_timeout) != HLC_OK) { + UbrEventQMsg *local_tx_event_q = (UbrEventQMsg *)trx->ubr_tx.local_tx_event_q.addr; + while (ATOMIC_LOAD(trx->close_cnt) == 1 && local_tx_event_q->flag == UBR_STATE_CLOSING) { + if (HasTimedOut(start_time, FLAGS_ub_disconnect_timeout) != HLC_OK) { LOG(ERROR) << "Trx close failed, wait close time out."; break; } usleep(1); } - int firstClearExpected = UBR_CLOSE_FIRST; - int secondClearExpected = UBR_CLOSE_SECOND; - if (localTxEventQ->flag == UBR_STATE_CLOSING) { - if (ATOMIC_COMPARE_EXCHANGE_STRONG(trx->closeState, firstClearExpected, UBR_CLOSE_SECOND)) { - LOG(ERROR) << "Trx close, exist process is closing, name=" << trx->localShm.name; + int first_clear_expected = UBR_CLOSE_FIRST; + int second_clear_expected = UBR_CLOSE_SECOND; + if (local_tx_event_q->flag == UBR_STATE_CLOSING) { + if (ATOMIC_COMPARE_EXCHANGE_STRONG(trx->close_state, first_clear_expected, UBR_CLOSE_SECOND)) { + LOG(ERROR) << "Trx close, exist process is closing, name=" << trx->local_shm.name; return HLC_REENTRY; - } else if (ATOMIC_COMPARE_EXCHANGE_STRONG(trx->closeState, secondClearExpected, UBR_CLOSE_END)) { - localTxEventQ->flag = UBR_STATE_CLOSED; - trx->ubrTx.trxState = UBR_STATE_CLOSED; + } else if (ATOMIC_COMPARE_EXCHANGE_STRONG(trx->close_state, second_clear_expected, UBR_CLOSE_END)) { + local_tx_event_q->flag = UBR_STATE_CLOSED; + trx->ubr_tx.trx_state = UBR_STATE_CLOSED; } } - if (closeType == UBR_SEND_CLOSE) { - DeleteTimerSafe((uint32_t)trx->timerFd); + if (close_type == UBR_SEND_CLOSE) { + DeleteTimerSafe((uint32_t)trx->timer_fd); } else { - DeleteTimer((uint32_t)trx->timerFd); + DeleteTimer((uint32_t)trx->timer_fd); } - DeleteTimerSafe((uint32_t)trx->hbTimerFd); + DeleteTimerSafe((uint32_t)trx->hb_timer_fd); return HLC_OK; } -RETURN_CODE UBRing::ClearTrxResource(UbrTrx *trx, uint64_t startTime, UbrCloseType closeType, int op) +RETURN_CODE UBRing::ClearTrxResource(UbrTrx *trx, uint64_t start_time, UbrCloseType close_type, int op) { - UbrEventQMsg* localTxEventQ = (UbrEventQMsg *)trx->ubrTx.localTxEventQ.addr; - RETURN_CODE rc = UbrClearResourceCheck(trx, startTime, closeType); + UbrEventQMsg *local_tx_event_q = (UbrEventQMsg *)trx->ubr_tx.local_tx_event_q.addr; + RETURN_CODE rc = UbrClearResourceCheck(trx, start_time, close_type); if (rc != HLC_OK) { return rc; } rc = UbrAddAsynClearTimer(trx); if (rc != HLC_OK) { - LOG(ERROR) << "Trx close, add " << trx->localShm.name << " close clear timer failed."; + LOG(ERROR) << "Trx close, add " << trx->local_shm.name << " close clear timer failed."; return HLC_ERR; } @@ -986,63 +986,63 @@ RETURN_CODE UBRing::UbrTrxCloseCheck(UbrTrx *trx) return HLC_ERR; } int expected = MAX_CLOSE_COUNT; - if (!ATOMIC_COMPARE_EXCHANGE_STRONG(trx->closeCnt, expected, MAX_CLOSE_COUNT - 1)) { - LOG(ERROR) << "Trx close failed, exist other close acquire, trx local name=" << trx->localShm.name; + if (!ATOMIC_COMPARE_EXCHANGE_STRONG(trx->close_cnt, expected, MAX_CLOSE_COUNT - 1)) { + LOG(ERROR) << "Trx close failed, exist other close acquire, trx local name=" << trx->local_shm.name; return HLC_ERR; } - if (UNLIKELY(trx->ubrTx.localTxEventQ.addr == NULL)) { - LOG(ERROR) << "Trx close failed, localTxEventQ addr is NULL, trx local name=" << trx->localShm.name; + if (UNLIKELY(trx->ubr_tx.local_tx_event_q.addr == NULL)) { + LOG(ERROR) << "Trx close failed, local_tx_event_q addr is NULL, trx local name=" << trx->local_shm.name; return HLC_ERR; } return HLC_OK; } -ssize_t UBRing::StartReadv(UbrTrx *trx, const struct iovec *iov, int iovcnt, size_t remainBufLen) +ssize_t UBRing::StartReadv(UbrTrx *trx, const struct iovec *iov, int iovcnt, size_t remain_buf_len) { - ssize_t totalRecvLen = 0; - int iovIndex = 0; - size_t iovPos = 0; - UbrMsgFormat *dataMsg = (UbrMsgFormat *)trx->ubrRx.localDataQ.addr; - bool notEofEncountered = true; - while (notEofEncountered && remainBufLen > 0) { + ssize_t total_recv_len = 0; + int iov_index = 0; + size_t iov_pos = 0; + UbrMsgFormat *data_msg = (UbrMsgFormat *)trx->ubr_rx.local_data_q.addr; + bool not_eof_encountered = true; + while (not_eof_encountered && remain_buf_len > 0) { if (UNLIKELY(CheckTrxRecvPreCheck(trx) != HLC_OK)) { return HLC_ERR; } - UbrMsgFormat *currentChunk = &dataMsg[trx->ubrRx.readPos]; - uint8_t flag = currentChunk->header[UBR_MSG_FLAG_INDEX]; + UbrMsgFormat *current_chunk = &data_msg[trx->ubr_rx.read_pos]; + uint8_t flag = current_chunk->header[UBR_MSG_FLAG_INDEX]; if (flag == UBR_MSG_CHUNK_NONE) { continue; } if (flag == UBR_MSG_CHUNK_EOF) { - notEofEncountered = false; + not_eof_encountered = false; } - uint8_t chunkMsgLen = currentChunk->header[UBR_MSG_LEN_INDEX]; - uint8_t curIndex = currentChunk->header[UBR_MSG_CUR_INDEX]; - uint8_t recvLen = - remainBufLen > (size_t)(chunkMsgLen - curIndex) ? (chunkMsgLen - curIndex) : (uint8_t)remainBufLen; - while (iovIndex < iovcnt && recvLen > 0) { - size_t copyLen = - recvLen > (iov[iovIndex].iov_len - iovPos) ? iov[iovIndex].iov_len - iovPos : (size_t)recvLen; - memcpy((uint8_t *)iov[iovIndex].iov_base + iovPos, currentChunk->payload.inner + curIndex, copyLen); - recvLen -= (uint8_t)copyLen; - iovPos += copyLen; - curIndex += (uint8_t)copyLen; - if (iovPos == iov[iovIndex].iov_len) { - iovIndex++; - iovPos = 0; + uint8_t chunk_msg_len = current_chunk->header[UBR_MSG_LEN_INDEX]; + uint8_t cur_index = current_chunk->header[UBR_MSG_CUR_INDEX]; + uint8_t recv_len = + remain_buf_len > (size_t)(chunk_msg_len - cur_index) ? (chunk_msg_len - cur_index) : (uint8_t)remain_buf_len; + while (iov_index < iovcnt && recv_len > 0) { + size_t copy_len = + recv_len > (iov[iov_index].iov_len - iov_pos) ? iov[iov_index].iov_len - iov_pos : (size_t)recv_len; + memcpy((uint8_t *)iov[iov_index].iov_base + iov_pos, current_chunk->payload.inner + cur_index, copy_len); + recv_len -= (uint8_t)copy_len; + iov_pos += copy_len; + cur_index += (uint8_t)copy_len; + if (iov_pos == iov[iov_index].iov_len) { + iov_index++; + iov_pos = 0; } - remainBufLen -= copyLen; - totalRecvLen += (ssize_t)copyLen; + remain_buf_len -= copy_len; + total_recv_len += (ssize_t)copy_len; } - currentChunk->header[UBR_MSG_CUR_INDEX] = curIndex; - if (currentChunk->header[UBR_MSG_CUR_INDEX] == chunkMsgLen) { - currentChunk->header[UBR_MSG_FLAG_INDEX] = UBR_MSG_CHUNK_NONE; + current_chunk->header[UBR_MSG_CUR_INDEX] = cur_index; + if (current_chunk->header[UBR_MSG_CUR_INDEX] == chunk_msg_len) { + current_chunk->header[UBR_MSG_FLAG_INDEX] = UBR_MSG_CHUNK_NONE; UpdateDataQTail(trx); - trx->ubrRx.readPos = (trx->ubrRx.readPos + 1) % trx->ubrRx.capacity; + trx->ubr_rx.read_pos = (trx->ubr_rx.read_pos + 1) % trx->ubr_rx.capacity; } } - return totalRecvLen; + return total_recv_len; } } // namespace ub } // namespace brpc \ No newline at end of file diff --git a/src/brpc/ub/ub_ring.h b/src/brpc/ub/ub_ring.h index 9c9e635de9..d1afd204fc 100644 --- a/src/brpc/ub/ub_ring.h +++ b/src/brpc/ub/ub_ring.h @@ -29,7 +29,7 @@ namespace brpc { namespace ub { DECLARE_int32(ub_flying_io_timeout); -extern uint32_t g_sleepTime[UBR_TASK_STEP_NUM]; +extern uint32_t g_sleep_time[UBR_TASK_STEP_NUM]; class UBRing { public: @@ -37,7 +37,7 @@ class UBRing { ~UBRing(); DISALLOW_COPY_AND_ASSIGN(UBRing); - RETURN_CODE UbrTrxMapShm(SHM *localShm, SHM *remoteShm); + RETURN_CODE UbrTrxMapShm(SHM *local_shm, SHM *remote_shm); RETURN_CODE UbrTrxClose(); @@ -57,44 +57,44 @@ class UBRing { static void *UbrAsynClearCallback(void *args); - int UbrTrxSend(const void *buf, uint32_t bufLen); + int UbrTrxSend(const void *buf, uint32_t buf_len); - int UbrTrxRecv(void *buf, uint32_t bufLen); + int UbrTrxRecv(void *buf, uint32_t buf_len); - int UbrTrxRecvBlockMode(uint8_t *dest, uint32_t bufLen); + int UbrTrxRecvBlockMode(uint8_t *dest, uint32_t buf_len); ssize_t UbrTrxWritev(const struct iovec *iov, int iovcnt); ssize_t UbrTrxReadv(const struct iovec *iov, int iovcnt); ssize_t UbrTrxReadvBlockMode(const struct iovec *iov, int iovcnt); - RETURN_CODE IsUbrTrxReadable(uint32_t epEvent); + RETURN_CODE IsUbrTrxReadable(uint32_t ep_event); - RETURN_CODE IsUbrTrxWriteable(uint32_t epEvent); + RETURN_CODE IsUbrTrxWriteable(uint32_t ep_event); - RETURN_CODE UbrSetTimeout(UbrTaskStep taskType, int timeout); + RETURN_CODE UbrSetTimeout(UbrTaskStep task_type, int timeout); static RETURN_CODE UbrTrxFreeShm(UbrTrx *trx); void PrewriteUbrTx(UbrTx *tx); void PrewriteUbrRx(UbrRx *rx); - static inline void UbrSetSleepTask(UbrTaskStep taskType) + static inline void UbrSetSleepTask(UbrTaskStep task_type) { - if (taskType >= UBR_TASK_STEP_NUM || taskType < 0) { + if (task_type >= UBR_TASK_STEP_NUM || task_type < 0) { return; } - uint32_t type = (uint32_t)taskType; - sleep(g_sleepTime[type]); + uint32_t type = (uint32_t)task_type; + sleep(g_sleep_time[type]); return; } - static inline RETURN_CODE CheckTrxConnectParam(const char *listenerName, const char *localName) + static inline RETURN_CODE CheckTrxConnectParam(const char *listener_name, const char *local_name) { - if (UNLIKELY(listenerName == NULL)) { + if (UNLIKELY(listener_name == NULL)) { LOG(ERROR) << "The request listener name is null."; return HLC_ERR; } - if (UNLIKELY(localName == NULL)) { + if (UNLIKELY(local_name == NULL)) { LOG(ERROR) << "The request trx shared memory name is null."; return HLC_ERR; } @@ -107,39 +107,39 @@ class UBRing { int UbrAllocateLocalShm(SHM *local_trx_shm, const char *shm_name); - RETURN_CODE UbrMapRemoteShmAddTimer(SHM *localTrxShm, const char *localName); + RETURN_CODE UbrMapRemoteShmAddTimer(SHM *local_trx_shm, const char *local_name); static inline RETURN_CODE CheckTrxSendPreCheck(UbrTrx *trx) { - if (UNLIKELY(trx->ubrTx.trxState != UBR_STATE_CONNECTED)) { + if (UNLIKELY(trx->ubr_tx.trx_state != UBR_STATE_CONNECTED)) { LOG(ERROR) << "Trx send failed, trx is not connected state."; return HLC_ERR; } return HLC_OK; } - static RETURN_CODE CheckTrxRecvParam(UbrTrx *trx, const void *buf, uint32_t bufLen) + static RETURN_CODE CheckTrxRecvParam(UbrTrx *trx, const void *buf, uint32_t buf_len) { if (UNLIKELY(trx == NULL)) { LOG(ERROR) << "Trx recv failed, trx is null."; return HLC_ERR; } - if (UNLIKELY((UbrEventQMsg *)trx->ubrRx.localRxEventQ.addr == NULL)) { - LOG(ERROR) << "Trx send failed, localTxEventQ addr is NULL."; + if (UNLIKELY((UbrEventQMsg *)trx->ubr_rx.local_rx_event_q.addr == NULL)) { + LOG(ERROR) << "Trx send failed, local_tx_event_q addr is NULL."; return HLC_ERR; } - if (UNLIKELY(trx->ubrRx.trxState != UBR_STATE_CONNECTED)) { - LOG(ERROR) << "Trx recv failed, trx is not connected statep=" << trx->ubrRx.trxState; + if (UNLIKELY(trx->ubr_rx.trx_state != UBR_STATE_CONNECTED)) { + LOG(ERROR) << "Trx recv failed, trx is not connected statep=" << trx->ubr_rx.trx_state; return UBR_NOT_CONNECTED; } if (UNLIKELY(buf == NULL)) { LOG(ERROR) << "Trx recv failed, buf is null."; return HLC_ERR; } - if (UNLIKELY(bufLen == 0)) { - LOG(ERROR) << "Trx recv failed, bufLen is 0."; + if (UNLIKELY(buf_len == 0)) { + LOG(ERROR) << "Trx recv failed, buf_len is 0."; return HLC_ERR; } return HLC_OK; @@ -147,7 +147,7 @@ class UBRing { static inline RETURN_CODE CheckTrxRecvPreCheck(UbrTrx *trx) { - if (UNLIKELY(trx->ubrRx.trxState != UBR_STATE_CONNECTED)) { + if (UNLIKELY(trx->ubr_rx.trx_state != UBR_STATE_CONNECTED)) { LOG(ERROR) << "Trx recv failed, trx is not connected state."; return HLC_ERR; } @@ -156,7 +156,7 @@ class UBRing { static inline void UpdateDataQTail(UbrTrx *trx) { - ((UbrDataStatusQMsg *)trx->ubrRx.remoteDataStatusQ.addr)->tail = trx->ubrRx.readPos; + ((UbrDataStatusQMsg *)trx->ubr_rx.remote_data_status_q.addr)->tail = trx->ubr_rx.read_pos; } static RETURN_CODE UbrTrxCallbackCheck(UbrTrx *trx) @@ -165,34 +165,34 @@ class UBRing { LOG(ERROR) << "Trx close callback failed, trx is null."; return HLC_ERR; } - if (UNLIKELY(trx->localShm.addr == NULL)) { - LOG(ERROR) << "Trx close failed, localShm addr is NULL."; + if (UNLIKELY(trx->local_shm.addr == NULL)) { + LOG(ERROR) << "Trx close failed, local_shm addr is NULL."; return HLC_ERR; } - if (UNLIKELY(trx->ubrRx.localRxEventQ.addr == NULL)) { - LOG(ERROR) << "Trx close failed, localRxEventQ addr is NULL."; + if (UNLIKELY(trx->ubr_rx.local_rx_event_q.addr == NULL)) { + LOG(ERROR) << "Trx close failed, local_rx_event_q addr is NULL."; return HLC_ERR; } - if (UNLIKELY(trx->ubrTx.localTxEventQ.addr == NULL)) { - LOG(ERROR) << "Trx close failed, localTxEventQ addr is NULL."; + if (UNLIKELY(trx->ubr_tx.local_tx_event_q.addr == NULL)) { + LOG(ERROR) << "Trx close failed, local_tx_event_q addr is NULL."; return HLC_ERR; } return HLC_OK; } private: - RETURN_CODE UbrTrxMapLocalShm(SHM *localShm); - RETURN_CODE UbrTrxMapRemoteShm(SHM *remoteShm); - RETURN_CODE ApplyAndMapLocalShm(SHM *localTrxShm, const char *localName); - RETURN_CODE ApplyAndMapRemoteShm(SHM *remoteTrxShm); + RETURN_CODE UbrTrxMapLocalShm(SHM *local_shm); + RETURN_CODE UbrTrxMapRemoteShm(SHM *remote_shm); + RETURN_CODE ApplyAndMapLocalShm(SHM *local_trx_shm, const char *local_name); + RETURN_CODE ApplyAndMapRemoteShm(SHM *remote_trx_shm); static RETURN_CODE UbrTrxCloseCheck(UbrTrx *trx); - void ReleaseFileLock(int lockFd); - ssize_t StartReadv(UbrTrx *trx, const struct iovec *iov, int iovcnt, size_t remainBufLen); + void ReleaseFileLock(int lock_fd); + ssize_t StartReadv(UbrTrx *trx, const struct iovec *iov, int iovcnt, size_t remain_buf_len); void PreWriteAddr(uint8_t *addr, size_t len); - RETURN_CODE WritevHasEnoughSpace(size_t bufLen); - RETURN_CODE UbrServerTrxInit(SHM *localShm, SHM *remoteShm); - static RETURN_CODE UbrClearResourceCheck(UbrTrx *trx, uint64_t startTime, UbrCloseType closeType); - static RETURN_CODE ClearTrxResource(UbrTrx *trx, uint64_t startTime, UbrCloseType closeType, int op=0); + RETURN_CODE WritevHasEnoughSpace(size_t buf_len); + RETURN_CODE UbrServerTrxInit(SHM *local_shm, SHM *remote_shm); + static RETURN_CODE UbrClearResourceCheck(UbrTrx *trx, uint64_t start_time, UbrCloseType close_type); + static RETURN_CODE ClearTrxResource(UbrTrx *trx, uint64_t start_time, UbrCloseType close_type, int op=0); UbrTrx* _trx{nullptr}; }; diff --git a/src/brpc/ub/ub_ring_manager.cpp b/src/brpc/ub/ub_ring_manager.cpp index dfb99188f4..9ef3d25b60 100644 --- a/src/brpc/ub/ub_ring_manager.cpp +++ b/src/brpc/ub/ub_ring_manager.cpp @@ -24,18 +24,18 @@ namespace ub { DEFINE_int32(ubr_max_managed_num, 1024, "maximum number of managed ubring"); DEFINE_int32(tail_update_after_read, 8, "Position of the tail update after the read"); -UbrMgr UBRingManager::g_ubrMgr; -UbrLinkInfoMgr UBRingManager::g_linkInfoMgr; -pthread_mutex_t UBRingManager::g_ubrTrxMgrMtx = PTHREAD_MUTEX_INITIALIZER; -pthread_mutex_t UBRingManager::g_ubrListenerMgrMtx = PTHREAD_MUTEX_INITIALIZER; -pthread_mutex_t UBRingManager::g_linkInfoMgrMtx = PTHREAD_MUTEX_INITIALIZER; +UbrMgr UBRingManager::g_ubr_mgr; +UbrLinkInfoMgr UBRingManager::g_link_info_mgr; +pthread_mutex_t UBRingManager::g_ubr_trx_mgr_mtx = PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t UBRingManager::g_ubr_listener_mgr_mtx = PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t UBRingManager::g_link_info_mgr_mtx = PTHREAD_MUTEX_INITIALIZER; -uint64_t g_ubrTrxNum = 0; -uint64_t g_ubEventCnt = 0; -uint64_t g_ubrListenerNum = 0; +uint64_t g_ubr_trx_num = 0; +uint64_t g_ub_event_cnt = 0; +uint64_t g_ubr_listener_num = 0; -RETURN_CODE UBRingManager::GetHlcDealMsgMaxCnt(const uint32_t capacity, uint32_t *dealMsgMaxCnt) { - if (UNLIKELY(dealMsgMaxCnt == NULL)) { +RETURN_CODE UBRingManager::GetHlcDealMsgMaxCnt(const uint32_t capacity, uint32_t *deal_msg_max_cnt) { + if (UNLIKELY(deal_msg_max_cnt == NULL)) { LOG(ERROR) << "Get update factor failed, dealMsgMaxCnt is null."; return HLC_ERR; } @@ -43,16 +43,16 @@ RETURN_CODE UBRingManager::GetHlcDealMsgMaxCnt(const uint32_t capacity, uint32_t LOG(ERROR) << "Get update factor failed, factor is 0."; return HLC_ERR; } - *dealMsgMaxCnt = capacity / FLAGS_tail_update_after_read; + *deal_msg_max_cnt = capacity / FLAGS_tail_update_after_read; return HLC_OK; } RETURN_CODE UBRingManager::UbrMgrDefault() { - g_ubrMgr.trxNum = 0; - g_ubrMgr.trxCap = FLAGS_ubr_max_managed_num; - g_ubrMgr.trxMgrUnitStatus = NULL; - g_ubrMgr.trxMgr = NULL; + g_ubr_mgr.trx_num = 0; + g_ubr_mgr.trx_cap = FLAGS_ubr_max_managed_num; + g_ubr_mgr.trx_mgr_unit_status = NULL; + g_ubr_mgr.trx_mgr = NULL; return HLC_OK; } @@ -63,19 +63,19 @@ RETURN_CODE UBRingManager::UbrMgrInit() { return rc; } - size_t trxMgrSize = g_ubrMgr.trxCap * sizeof(UbrTrx); - g_ubrMgr.trxMgr = (UbrTrx *)malloc(trxMgrSize); - size_t trxMgrStatusSize = g_ubrMgr.trxCap * sizeof(UbrMgrUnitStatus); - g_ubrMgr.trxMgrUnitStatus = (UbrMgrUnitStatus *)malloc(trxMgrStatusSize); - if (UNLIKELY(g_ubrMgr.trxMgr == NULL || - g_ubrMgr.trxMgrUnitStatus == NULL)) { + size_t trx_mgr_size = g_ubr_mgr.trx_cap * sizeof(UbrTrx); + g_ubr_mgr.trx_mgr = (UbrTrx *)malloc(trx_mgr_size); + size_t trx_mgr_status_size = g_ubr_mgr.trx_cap * sizeof(UbrMgrUnitStatus); + g_ubr_mgr.trx_mgr_unit_status = (UbrMgrUnitStatus *)malloc(trx_mgr_status_size); + if (UNLIKELY(g_ubr_mgr.trx_mgr == NULL || + g_ubr_mgr.trx_mgr_unit_status == NULL)) { LOG(ERROR) << "Ubr manager memory allocation failed."; UbrMgrFini(); return HLC_ERR; } - memset(g_ubrMgr.trxMgr, 0, trxMgrSize); - memset(g_ubrMgr.trxMgrUnitStatus, UBR_MGR_UNIT_FREE, trxMgrStatusSize); + memset(g_ubr_mgr.trx_mgr, 0, trx_mgr_size); + memset(g_ubr_mgr.trx_mgr_unit_status, UBR_MGR_UNIT_FREE, trx_mgr_status_size); LinkInfoInit(); return HLC_OK; return UBR_NOT_CONNECTED; @@ -83,15 +83,15 @@ RETURN_CODE UBRingManager::UbrMgrInit() { void UBRingManager::UbrMgrFini() { { - LOCK_GUARD(g_ubrTrxMgrMtx); - FREE_PTR(g_ubrMgr.trxMgr); - FREE_PTR(g_ubrMgr.trxMgrUnitStatus); + LOCK_GUARD(g_ubr_trx_mgr_mtx); + FREE_PTR(g_ubr_mgr.trx_mgr); + FREE_PTR(g_ubr_mgr.trx_mgr_unit_status); } { - LOCK_GUARD(g_ubrListenerMgrMtx); + LOCK_GUARD(g_ubr_listener_mgr_mtx); } - g_ubrMgr.trxNum = 0; - g_ubrMgr.trxCap = 0; + g_ubr_mgr.trx_num = 0; + g_ubr_mgr.trx_cap = 0; LinkInfoFini(); } @@ -101,28 +101,28 @@ RETURN_CODE UBRingManager::AcquireUbrTrxFromMgr(UbrTrx **trx) { return HLC_ERR; } - if (UNLIKELY(g_ubrMgr.trxMgr == NULL)) { + if (UNLIKELY(g_ubr_mgr.trx_mgr == NULL)) { LOG(ERROR) << "Acquire trx failed, trxMgr is null."; return HLC_ERR; } - LOCK_GUARD(g_ubrTrxMgrMtx); - if (g_ubrMgr.trxNum >= g_ubrMgr.trxCap) { + LOCK_GUARD(g_ubr_trx_mgr_mtx); + if (g_ubr_mgr.trx_num >= g_ubr_mgr.trx_cap) { LOG(ERROR) << "Acquire trx failed, trx number is full."; return HLC_ERR; } - for (uint32_t i = 0; i < g_ubrMgr.trxCap; ++i) { - if (g_ubrMgr.trxMgrUnitStatus[i] == UBR_MGR_UNIT_FREE) { - memset(&g_ubrMgr.trxMgr[i], 0, sizeof(UbrTrx)); - g_ubrMgr.trxMgrUnitStatus[i] = UBR_MGR_UNIT_USED; - *trx = &g_ubrMgr.trxMgr[i]; - (*trx)->trxMgrIndex = i; - (*trx)->ubrId = g_ubrTrxNum; - (*trx)->closeState = UBR_CLOSE_FIRST; - (*trx)->closeCnt = MAX_CLOSE_COUNT; - ++g_ubrMgr.trxNum; - ++g_ubrTrxNum; + for (uint32_t i = 0; i < g_ubr_mgr.trx_cap; ++i) { + if (g_ubr_mgr.trx_mgr_unit_status[i] == UBR_MGR_UNIT_FREE) { + memset(&g_ubr_mgr.trx_mgr[i], 0, sizeof(UbrTrx)); + g_ubr_mgr.trx_mgr_unit_status[i] = UBR_MGR_UNIT_USED; + *trx = &g_ubr_mgr.trx_mgr[i]; + (*trx)->trx_mgr_index = i; + (*trx)->ubr_id = g_ubr_trx_num; + (*trx)->close_state = UBR_CLOSE_FIRST; + (*trx)->close_cnt = MAX_CLOSE_COUNT; + ++g_ubr_mgr.trx_num; + ++g_ubr_trx_num; return HLC_OK; } } @@ -136,125 +136,125 @@ RETURN_CODE UBRingManager::ReleaseUbrTrxFromMgr(UbrTrx *trx) { return HLC_ERR; } - trx->localShm.addr = NULL; - trx->ubrTx.localTxEventQ.addr = NULL; - trx->ubrTx.localDataStatusQ.addr = NULL; - trx->ubrRx.localRxEventQ.addr = NULL; - trx->ubrRx.remoteDataStatusQ.addr = NULL; - if (UNLIKELY(g_ubrMgr.trxMgr == NULL)) { + trx->local_shm.addr = NULL; + trx->ubr_tx.local_tx_event_q.addr = NULL; + trx->ubr_tx.local_data_status_q.addr = NULL; + trx->ubr_rx.local_rx_event_q.addr = NULL; + trx->ubr_rx.remote_data_status_q.addr = NULL; + if (UNLIKELY(g_ubr_mgr.trx_mgr == NULL)) { LOG(ERROR) << "Release trx failed, trxMgr is null."; return HLC_ERR; } - LOCK_GUARD(g_ubrTrxMgrMtx); - if (g_ubrMgr.trxNum == 0) { + LOCK_GUARD(g_ubr_trx_mgr_mtx); + if (g_ubr_mgr.trx_num == 0) { LOG(ERROR) << "Release trx failed, trx number is 0."; return HLC_ERR; } - uint32_t idx = trx->trxMgrIndex; - if (g_ubrMgr.trxMgrUnitStatus[idx] == UBR_MGR_UNIT_FREE) { + uint32_t idx = trx->trx_mgr_index; + if (g_ubr_mgr.trx_mgr_unit_status[idx] == UBR_MGR_UNIT_FREE) { LOG(ERROR) << "Release trx failed, trx is not in manager."; return HLC_ERR; } - g_ubrMgr.trxMgrUnitStatus[idx] = UBR_MGR_UNIT_FREE; - --g_ubrMgr.trxNum; + g_ubr_mgr.trx_mgr_unit_status[idx] = UBR_MGR_UNIT_FREE; + --g_ubr_mgr.trx_num; return HLC_OK; } void UBRingManager::LinkInfoInit(void) { - size_t linkInfoMgrSize = FLAGS_ubr_max_managed_num * sizeof(UbrLinkInfo); - g_linkInfoMgr.allLinkInfo = (UbrLinkInfo*) malloc(linkInfoMgrSize); - if (g_linkInfoMgr.allLinkInfo == NULL) { + size_t link_info_mgr_size = FLAGS_ubr_max_managed_num * sizeof(UbrLinkInfo); + g_link_info_mgr.all_link_info = (UbrLinkInfo*) malloc(link_info_mgr_size); + if (g_link_info_mgr.all_link_info == NULL) { LOG(ERROR) << "allLinkInfo is NULL"; LinkInfoFini(); return; } - g_linkInfoMgr.linkMgrUnitStatus = (UbrMgrUnitStatus*) malloc(linkInfoMgrSize); - if (g_linkInfoMgr.linkMgrUnitStatus == NULL) { + g_link_info_mgr.link_mgr_unit_status = (UbrMgrUnitStatus*) malloc(link_info_mgr_size); + if (g_link_info_mgr.link_mgr_unit_status == NULL) { LinkInfoFini(); return; } - memset(g_linkInfoMgr.allLinkInfo, 0, linkInfoMgrSize); - memset(g_linkInfoMgr.linkMgrUnitStatus, 0, linkInfoMgrSize); + memset(g_link_info_mgr.all_link_info, 0, link_info_mgr_size); + memset(g_link_info_mgr.link_mgr_unit_status, 0, link_info_mgr_size); } void UBRingManager::LinkInfoFini(void) { - if (g_linkInfoMgr.linkMgrUnitStatus == NULL || g_linkInfoMgr.allLinkInfo == NULL) { + if (g_link_info_mgr.link_mgr_unit_status == NULL || g_link_info_mgr.all_link_info == NULL) { LOG(ERROR) << "LinkInfo is NULL"; return; } { - LOCK_GUARD(g_linkInfoMgrMtx); - FREE_PTR(g_linkInfoMgr.allLinkInfo); - FREE_PTR(g_linkInfoMgr.linkMgrUnitStatus); + LOCK_GUARD(g_link_info_mgr_mtx); + FREE_PTR(g_link_info_mgr.all_link_info); + FREE_PTR(g_link_info_mgr.link_mgr_unit_status); } - g_linkInfoMgr.linkNum = 0; + g_link_info_mgr.link_num = 0; } -void UBRingManager::AcquireLinkInfoToMgr(const char *listenerName, UbrTrx *trx) { - if (listenerName == NULL || trx == NULL) { +void UBRingManager::AcquireLinkInfoToMgr(const char *listener_name, UbrTrx *trx) { + if (listener_name == NULL || trx == NULL) { LOG(ERROR) << "LinkInfo acquire fail."; return; } - if (g_linkInfoMgr.linkMgrUnitStatus == NULL || g_linkInfoMgr.allLinkInfo == NULL) { + if (g_link_info_mgr.link_mgr_unit_status == NULL || g_link_info_mgr.all_link_info == NULL) { LOG(ERROR) << "LinkInfo is NULL."; return; } - uint32_t ubrIndex = trx->trxMgrIndex; - char* connectName = trx->localShm.name; - if (g_linkInfoMgr.linkMgrUnitStatus[ubrIndex] == UBR_MGR_UNIT_FREE) { - strncpy(g_linkInfoMgr.allLinkInfo[ubrIndex].connectName, - connectName, SHM_MAX_NAME_BUFF_LEN); - strncpy(g_linkInfoMgr.allLinkInfo[ubrIndex].listenerName, - listenerName, SHM_MAX_NAME_BUFF_LEN); - g_linkInfoMgr.linkMgrUnitStatus[ubrIndex] = UBR_MGR_UNIT_USED; - g_linkInfoMgr.linkNum++; + uint32_t ubr_index = trx->trx_mgr_index; + char* connect_name = trx->local_shm.name; + if (g_link_info_mgr.link_mgr_unit_status[ubr_index] == UBR_MGR_UNIT_FREE) { + strncpy(g_link_info_mgr.all_link_info[ubr_index].connect_name, + connect_name, SHM_MAX_NAME_BUFF_LEN); + strncpy(g_link_info_mgr.all_link_info[ubr_index].listener_name, + listener_name, SHM_MAX_NAME_BUFF_LEN); + g_link_info_mgr.link_mgr_unit_status[ubr_index] = UBR_MGR_UNIT_USED; + g_link_info_mgr.link_num++; } } void UBRingManager::ReleaseLinkInfoFromMgr(UbrTrx *trx) { - if (trx == NULL || g_linkInfoMgr.linkMgrUnitStatus == NULL) { + if (trx == NULL || g_link_info_mgr.link_mgr_unit_status == NULL) { LOG(ERROR) << "LinkInfo release fail."; return; } - if (g_linkInfoMgr.linkMgrUnitStatus[trx->trxMgrIndex] == UBR_MGR_UNIT_FREE) { + if (g_link_info_mgr.link_mgr_unit_status[trx->trx_mgr_index] == UBR_MGR_UNIT_FREE) { LOG(ERROR) << "Release linkInfo failed, trx is not in manager."; return; } - g_linkInfoMgr.linkMgrUnitStatus[trx->trxMgrIndex] = UBR_MGR_UNIT_FREE; - g_linkInfoMgr.linkNum--; + g_link_info_mgr.link_mgr_unit_status[trx->trx_mgr_index] = UBR_MGR_UNIT_FREE; + g_link_info_mgr.link_num--; } -int32_t UBRingManager::UbEventCallback(const char *shmName) +int32_t UBRingManager::UbEventCallback(const char *shm_name) { - if (UNLIKELY(shmName == NULL)) { + if (UNLIKELY(shm_name == NULL)) { LOG(ERROR) << "Ub event callback failed, shm name is null."; return HLC_ERR; } - if (UNLIKELY(g_ubrMgr.trxMgr == NULL)) { + if (UNLIKELY(g_ubr_mgr.trx_mgr == NULL)) { LOG(ERROR) << "Ub event callback failed, trx mgr is null."; return HLC_ERR; } - LOG(DEBUG) << "Ub event callback is processing. shm_name=" << shmName; + LOG(DEBUG) << "Ub event callback is processing. shm_name=" << shm_name; - for (uint32_t i = 0; i < g_ubrMgr.trxCap; ++i) { - if (g_ubrMgr.trxMgrUnitStatus[i] == UBR_MGR_UNIT_FREE) { + for (uint32_t i = 0; i < g_ubr_mgr.trx_cap; ++i) { + if (g_ubr_mgr.trx_mgr_unit_status[i] == UBR_MGR_UNIT_FREE) { continue; } - if (strcmp(g_ubrMgr.trxMgr[i].localShm.name, shmName) == 0 || // 故障链路为该trx的本端shm - strcmp(g_ubrMgr.trxMgr[i].remoteShm.name, shmName) == 0) { // 故障链路为该trx的对端shm - ++g_ubEventCnt; - int fd = (int)g_ubrMgr.trxMgr[i].localShm.fd; + if (strcmp(g_ubr_mgr.trx_mgr[i].local_shm.name, shm_name) == 0 || + strcmp(g_ubr_mgr.trx_mgr[i].remote_shm.name, shm_name) == 0) { + ++g_ub_event_cnt; + int fd = (int)g_ubr_mgr.trx_mgr[i].local_shm.fd; LOG(INFO) << "Ub event callback, the fd of the faulty link is " << fd; - return UBRing::UbrPassiveClearTrx(&g_ubrMgr.trxMgr[i], fd, UBR_UB_EVENT); + return UBRing::UbrPassiveClearTrx(&g_ubr_mgr.trx_mgr[i], fd, UBR_UB_EVENT); } } return HLC_ERR; diff --git a/src/brpc/ub/ub_ring_manager.h b/src/brpc/ub/ub_ring_manager.h index 14bc5d27ac..9e5f848596 100644 --- a/src/brpc/ub/ub_ring_manager.h +++ b/src/brpc/ub/ub_ring_manager.h @@ -31,21 +31,21 @@ typedef enum { } UbrMgrUnitStatus; typedef struct TagUbrMgr { - uint32_t trxNum; - uint32_t trxCap; - UbrTrx *trxMgr; - UbrMgrUnitStatus *trxMgrUnitStatus; + uint32_t trx_num; + uint32_t trx_cap; + UbrTrx *trx_mgr; + UbrMgrUnitStatus *trx_mgr_unit_status; } UbrMgr; typedef struct TagUbrLinkInfo { - char connectName[SHM_MAX_NAME_BUFF_LEN]; - char listenerName[SHM_MAX_NAME_BUFF_LEN]; + char connect_name[SHM_MAX_NAME_BUFF_LEN]; + char listener_name[SHM_MAX_NAME_BUFF_LEN]; } UbrLinkInfo; typedef struct TagUbrLinkInfoMgr { - uint32_t linkNum; - UbrLinkInfo* allLinkInfo; - UbrMgrUnitStatus *linkMgrUnitStatus; + uint32_t link_num; + UbrLinkInfo* all_link_info; + UbrMgrUnitStatus *link_mgr_unit_status; } UbrLinkInfoMgr; class UBRingManager { @@ -54,7 +54,7 @@ class UBRingManager { UbrMgrFini(); } - static RETURN_CODE GetHlcDealMsgMaxCnt(const uint32_t capacity, uint32_t *dealMsgMaxCnt); + static RETURN_CODE GetHlcDealMsgMaxCnt(const uint32_t capacity, uint32_t *deal_msg_max_cnt); static RETURN_CODE UbrMgrDefault(); @@ -68,19 +68,19 @@ class UBRingManager { static void LinkInfoInit(void); static void LinkInfoFini(void); - static void AcquireLinkInfoToMgr(const char* listenerName, UbrTrx *trx); + static void AcquireLinkInfoToMgr(const char* listener_name, UbrTrx *trx); static void ReleaseLinkInfoFromMgr(UbrTrx* trx); - static int32_t UbEventCallback(const char *shmName); + static int32_t UbEventCallback(const char *shm_name); private: UBRingManager() { } - static UbrMgr g_ubrMgr; - static UbrLinkInfoMgr g_linkInfoMgr; - static pthread_mutex_t g_ubrTrxMgrMtx; - static pthread_mutex_t g_ubrListenerMgrMtx; - static pthread_mutex_t g_linkInfoMgrMtx; + static UbrMgr g_ubr_mgr; + static UbrLinkInfoMgr g_link_info_mgr; + static pthread_mutex_t g_ubr_trx_mgr_mtx; + static pthread_mutex_t g_ubr_listener_mgr_mtx; + static pthread_mutex_t g_link_info_mgr_mtx; }; } } diff --git a/src/brpc/ub/ubr_msg.h b/src/brpc/ub/ubr_msg.h index 69d7aeec45..a205230bf4 100644 --- a/src/brpc/ub/ubr_msg.h +++ b/src/brpc/ub/ubr_msg.h @@ -43,10 +43,10 @@ typedef struct __attribute__((aligned(64))) TagUbrMsgFormat { uint8_t header[UBR_MSG_HEADER_LEN]; } UbrMsgFormat; -static inline uint32_t CalcUbrMsgChunkCnt(uint32_t bufLen) +static inline uint32_t CalcUbrMsgChunkCnt(uint32_t buf_len) { - uint32_t msgChunkNum = (bufLen + UBR_MSG_PAYLOAD_LEN - 1) / UBR_MSG_PAYLOAD_LEN; - return msgChunkNum; + uint32_t msg_chunk_num = (buf_len + UBR_MSG_PAYLOAD_LEN - 1) / UBR_MSG_PAYLOAD_LEN; + return msg_chunk_num; } } } diff --git a/src/brpc/ub/ubr_trx.h b/src/brpc/ub/ubr_trx.h index b3702496d1..ccba5f0c95 100644 --- a/src/brpc/ub/ubr_trx.h +++ b/src/brpc/ub/ubr_trx.h @@ -47,8 +47,8 @@ namespace brpc { namespace ub { -extern RETURN_CODE(*g_BeforeTcpClose)(int); -extern RETURN_CODE(*g_AfterTcpClose)(int); +extern RETURN_CODE(*g_before_tcp_close)(int); +extern RETURN_CODE(*g_after_tcp_close)(int); typedef enum { UBR_STATE_NONE, @@ -86,11 +86,11 @@ typedef enum { typedef struct TagUbrDataStatusQMsg { uint32_t tail; uint32_t timeout; - uint8_t heartBeat; + uint8_t heart_beat; } UbrDataStatusQMsg; typedef struct TagUbrEventQMsg { - uint64_t ioId; + uint64_t io_id; EventQState flag; } UbrEventQMsg; @@ -100,62 +100,62 @@ typedef struct TagUbrAddrInfo { } UbrAddrInfo; typedef struct TagUbrTx { - UbrAddrInfo remoteDataQ; - UbrAddrInfo remoteRxEventQ; - UbrAddrInfo localDataStatusQ; - UbrAddrInfo localTxEventQ; - uint64_t outIoId; - uint32_t writePos; + UbrAddrInfo remote_data_q; + UbrAddrInfo remote_rx_event_q; + UbrAddrInfo local_data_status_q; + UbrAddrInfo local_tx_event_q; + uint64_t out_io_id; + uint32_t write_pos; uint32_t capacity; - UbrMsgFormat localMsgSpace; - uint32_t hbRetryCnt; - uint32_t epLastCap; - volatile EventQState trxState; + UbrMsgFormat local_msg_space; + uint32_t hb_retry_cnt; + uint32_t ep_last_cap; + volatile EventQState trx_state; } UbrTx; typedef struct TagUbrRx { - UbrAddrInfo localDataQ; - UbrAddrInfo localRxEventQ; - UbrAddrInfo remoteDataStatusQ; - UbrAddrInfo remoteTxEventQ; - uint64_t inIoId; - uint32_t readPos; + UbrAddrInfo local_data_q; + UbrAddrInfo local_rx_event_q; + UbrAddrInfo remote_data_status_q; + UbrAddrInfo remote_tx_event_q; + uint64_t in_io_id; + uint32_t read_pos; uint32_t capacity; - uint32_t dealMsgNum; - uint32_t dealMsgMaxCnt; - uint32_t epEofPos; - volatile EventQState trxState; + uint32_t deal_msg_num; + uint32_t deal_msg_max_cnt; + uint32_t ep_eof_pos; + volatile EventQState trx_state; } UbrRx; typedef struct TagUbrTrx { - UbrTx ubrTx; - UbrRx ubrRx; - uint64_t ubrId; - uint32_t trxMgrIndex; + UbrTx ubr_tx; + UbrRx ubr_rx; + uint64_t ubr_id; + uint32_t trx_mgr_index; UbrTrxType type; - SHM localShm; - SHM remoteShm; - int timerFd; - int hbTimerFd; - int clearTimerFd; - AtomicInt closeCnt; - AtomicInt closeState; + SHM local_shm; + SHM remote_shm; + int timer_fd; + int hb_timer_fd; + int clear_timer_fd; + AtomicInt close_cnt; + AtomicInt close_state; } UbrTrx; typedef struct TagFileLock { - int lockFd; - char* lockPath; + int lock_fd; + char* lock_path; } FileLock; typedef struct TagUbrLinkLock { - int fileLockNum; - FileLock* fileLock; + int file_lock_num; + FileLock* file_lock; } UbrLinkLock; typedef enum { UBR_UB_EVENT, UBR_HEARTBEAT, -}PASSIVE_DISC_TYPE; +} PASSIVE_DISC_TYPE; } } From 643b80f8e7b37af4fc6f2dbd43c6656eb50fe86c Mon Sep 17 00:00:00 2001 From: zchuango Date: Sun, 19 Apr 2026 10:55:37 +0000 Subject: [PATCH 59/84] optimize the log message and some field name --- example/ubring_performance/CMakeLists.txt | 10 +- example/ubring_performance/client.cpp | 3 + src/brpc/input_messenger.h | 5 +- src/brpc/socket.h | 6 +- src/brpc/ub/common/thread_lock.h | 118 -- src/brpc/ub/rack_mem/declare_shm_ubs.h.tmpl | 40 - src/brpc/ub/ub_ring.cpp | 1048 ----------------- src/brpc/ub/ub_ring_manager.cpp | 263 ----- src/brpc/ub_transport.cpp | 24 +- src/brpc/ub_transport.h | 8 +- src/brpc/{ub => ubring}/common/common.h | 25 +- src/brpc/ubring/common/thread_lock.h | 118 ++ .../ubring/rack_mem/declare_shm_ubs.h.tmpl | 40 + src/brpc/{ub => ubring}/rack_mem/ubs_mem.h | 4 +- .../{ub => ubring}/rack_mem/ubs_mem_def.h | 0 .../{ub => ubring}/rack_mem/ubshmem_stub.cpp | 4 +- src/brpc/{ub => ubring}/shm/shm_def.h | 6 +- src/brpc/{ub => ubring}/shm/shm_ipc.cpp | 32 +- src/brpc/{ub => ubring}/shm/shm_ipc.h | 2 +- src/brpc/{ub => ubring}/shm/shm_mgr.cpp | 38 +- src/brpc/{ub => ubring}/shm/shm_mgr.h | 6 +- src/brpc/{ub => ubring}/shm/shm_ubs.cpp | 307 ++--- src/brpc/{ub => ubring}/shm/shm_ubs.h | 2 +- src/brpc/{ub => ubring}/timer/timer_mgr.cpp | 278 ++--- src/brpc/{ub => ubring}/timer/timer_mgr.h | 6 +- src/brpc/{ub => ubring}/ub_endpoint.cpp | 76 +- src/brpc/{ub => ubring}/ub_endpoint.h | 18 +- src/brpc/{ub => ubring}/ub_helper.cpp | 22 +- src/brpc/{ub => ubring}/ub_helper.h | 8 +- src/brpc/ubring/ub_ring.cpp | 1048 +++++++++++++++++ src/brpc/{ub => ubring}/ub_ring.h | 128 +- src/brpc/ubring/ub_ring_manager.cpp | 263 +++++ src/brpc/{ub => ubring}/ub_ring_manager.h | 40 +- src/brpc/{ub => ubring}/ubr_msg.h | 8 +- src/brpc/{ub => ubring}/ubr_trx.h | 92 +- src/butil/iobuf.cpp | 2 +- src/butil/iobuf.h | 4 +- 37 files changed, 2059 insertions(+), 2043 deletions(-) delete mode 100644 src/brpc/ub/common/thread_lock.h delete mode 100644 src/brpc/ub/rack_mem/declare_shm_ubs.h.tmpl delete mode 100644 src/brpc/ub/ub_ring.cpp delete mode 100644 src/brpc/ub/ub_ring_manager.cpp rename src/brpc/{ub => ubring}/common/common.h (91%) create mode 100644 src/brpc/ubring/common/thread_lock.h create mode 100644 src/brpc/ubring/rack_mem/declare_shm_ubs.h.tmpl rename src/brpc/{ub => ubring}/rack_mem/ubs_mem.h (98%) rename src/brpc/{ub => ubring}/rack_mem/ubs_mem_def.h (100%) rename src/brpc/{ub => ubring}/rack_mem/ubshmem_stub.cpp (95%) rename src/brpc/{ub => ubring}/shm/shm_def.h (96%) rename src/brpc/{ub => ubring}/shm/shm_ipc.cpp (92%) rename src/brpc/{ub => ubring}/shm/shm_ipc.h (98%) rename src/brpc/{ub => ubring}/shm/shm_mgr.cpp (90%) rename src/brpc/{ub => ubring}/shm/shm_mgr.h (93%) rename src/brpc/{ub => ubring}/shm/shm_ubs.cpp (65%) rename src/brpc/{ub => ubring}/shm/shm_ubs.h (98%) rename src/brpc/{ub => ubring}/timer/timer_mgr.cpp (50%) rename src/brpc/{ub => ubring}/timer/timer_mgr.h (94%) rename src/brpc/{ub => ubring}/ub_endpoint.cpp (92%) rename src/brpc/{ub => ubring}/ub_endpoint.h (94%) rename src/brpc/{ub => ubring}/ub_helper.cpp (88%) rename src/brpc/{ub => ubring}/ub_helper.h (94%) create mode 100644 src/brpc/ubring/ub_ring.cpp rename src/brpc/{ub => ubring}/ub_ring.h (54%) create mode 100644 src/brpc/ubring/ub_ring_manager.cpp rename src/brpc/{ub => ubring}/ub_ring_manager.h (66%) rename src/brpc/{ub => ubring}/ubr_msg.h (88%) rename src/brpc/{ub => ubring}/ubr_trx.h (69%) diff --git a/example/ubring_performance/CMakeLists.txt b/example/ubring_performance/CMakeLists.txt index cbccdbc983..ba4b1bf333 100644 --- a/example/ubring_performance/CMakeLists.txt +++ b/example/ubring_performance/CMakeLists.txt @@ -126,10 +126,8 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") ) endif() -if(BRPC_WITH_UBRING) - add_executable(ubring_performance_client client.cpp ${PROTO_SRC} ${PROTO_HEADER}) - add_executable(ubring_performance_server server.cpp ${PROTO_SRC} ${PROTO_HEADER}) +add_executable(ubring_performance_client client.cpp ${PROTO_SRC} ${PROTO_HEADER}) +add_executable(ubring_performance_server server.cpp ${PROTO_SRC} ${PROTO_HEADER}) - target_link_libraries(ubring_performance_client ${BRPC_LIB} ${DYNAMIC_LIB}) - target_link_libraries(ubring_performance_server ${BRPC_LIB} ${DYNAMIC_LIB}) -endif() \ No newline at end of file +target_link_libraries(ubring_performance_client ${BRPC_LIB} ${DYNAMIC_LIB}) +target_link_libraries(ubring_performance_server ${BRPC_LIB} ${DYNAMIC_LIB}) \ No newline at end of file diff --git a/example/ubring_performance/client.cpp b/example/ubring_performance/client.cpp index d9e7b8403b..492596b664 100644 --- a/example/ubring_performance/client.cpp +++ b/example/ubring_performance/client.cpp @@ -275,6 +275,9 @@ void Test(int thread_num, int attachment_size) { for (int k = 0; k < thread_num; ++k) { bthread_start_background(&tid[k], &BTHREAD_ATTR_NORMAL, DeleteTest, tests[k]); } + for (int k = 0; k < thread_num; ++k) { + bthread_join(tid[k], NULL); + } } int main(int argc, char* argv[]) { diff --git a/src/brpc/input_messenger.h b/src/brpc/input_messenger.h index 2982996239..5203c02505 100644 --- a/src/brpc/input_messenger.h +++ b/src/brpc/input_messenger.h @@ -29,6 +29,9 @@ namespace brpc { namespace rdma { class RdmaEndpoint; } +namespace ubring { +class UBShmEndpoint; +} class TcpTransport; struct InputMessageHandler { // The callback to cut a message from `source'. @@ -93,7 +96,7 @@ class InputMessenger : public SocketUser { friend class Socket; friend class TcpTransport; friend class rdma::RdmaEndpoint; -friend class ub::UBShmEndpoint; +friend class ubring::UBShmEndpoint; public: explicit InputMessenger(size_t capacity = 128); ~InputMessenger(); diff --git a/src/brpc/socket.h b/src/brpc/socket.h index 467db7c202..167cc8f418 100644 --- a/src/brpc/socket.h +++ b/src/brpc/socket.h @@ -57,7 +57,7 @@ namespace rdma { class RdmaEndpoint; class RdmaConnect; } -namespace ub { +namespace ubring { class UBShmEndpoint; class UBConnect; } @@ -320,8 +320,8 @@ friend class policy::RtmpContext; friend class schan::ChannelBalancer; friend class rdma::RdmaEndpoint; friend class rdma::RdmaConnect; -friend class ub::UBShmEndpoint; -friend class ub::UBConnect; +friend class ubring::UBShmEndpoint; +friend class ubring::UBConnect; friend class UBShmTransport; friend class HealthCheckTask; friend class OnAppHealthCheckDone; diff --git a/src/brpc/ub/common/thread_lock.h b/src/brpc/ub/common/thread_lock.h deleted file mode 100644 index 42713460d8..0000000000 --- a/src/brpc/ub/common/thread_lock.h +++ /dev/null @@ -1,118 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef BRPC_THREAD_LOCK_H -#define BRPC_THREAD_LOCK_H -#include -#include -#include -#include -#include -#include "brpc/ub/common/common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -static inline void UnlockMutex(pthread_mutex_t **mtx) -{ - if (LIKELY(mtx != NULL && *mtx != NULL)) { - pthread_mutex_unlock(*mtx); - } else { - LOG(ERROR) << "Invalid input for mtx."; - } -} - -#define LOCK_GUARD(mtx_ptr) \ - pthread_mutex_t *__attribute__((cleanup(UnlockMutex))) _mtx_ptr = ({ \ - pthread_mutex_lock(&(mtx_ptr)); \ - &(mtx_ptr); \ - }) - -static inline void UnlockSpinLock(pthread_spinlock_t **spin_lock) -{ - if (LIKELY(spin_lock != NULL && *spin_lock != NULL)) { - pthread_spin_unlock(*spin_lock); - } else { - LOG(ERROR) << "Invalid input for spin_lock."; - } -} - -#define SPIN_LOCK_GUARD(spin_lock_ptr) \ - pthread_spinlock_t *__attribute__((cleanup(UnlockSpinLock))) _spin_lock_ptr = ({ \ - pthread_spin_lock(&(spin_lock_ptr)); \ - &(spin_lock_ptr); \ - }) - -static inline void UnlockRWLock(pthread_rwlock_t **rw_lock) -{ - if (LIKELY(rw_lock != NULL && *rw_lock != NULL)) { - pthread_rwlock_unlock(*rw_lock); - } else { - LOG(ERROR) << "Invalid input for rw_lock."; - } -} - -#define R_LOCK_GUARD(read_lock_ptr) \ - pthread_rwlock_t *__attribute__((cleanup(UnlockRWLock))) _read_lock_ptr = ({ \ - pthread_rwlock_rdlock(&(read_lock_ptr)); \ - &(read_lock_ptr); \ - }) - -#define W_LOCK_GUARD(write_lock_ptr) \ - pthread_rwlock_t *__attribute__((cleanup(UnlockRWLock))) _write_lock_ptr = ({ \ - pthread_rwlock_wrlock(&(write_lock_ptr)); \ - &(write_lock_ptr); \ - }) - -static inline void PostSemWithClose(sem_t **sem) -{ - if (LIKELY(sem != NULL && *sem != NULL)) { - sem_post(*sem); - sem_close(*sem); - *sem = NULL; - sem = NULL; - } else { - LOG(ERROR) << "Invalid input for semaphore."; - } -} - -static inline void PostSem(sem_t **sem) -{ - if (LIKELY(sem != NULL && *sem != NULL)) { - sem_post(*sem); - } else { - LOG(ERROR) << "Invalid input for semaphore."; - } -} - -#define SEMAPHORE_WAIT_GUARD_WITH_CLOSE(sem_ptr) \ - sem_t *__attribute__((cleanup(PostSemWithClose))) _sem_ptr = ({ \ - sem_wait(sem_ptr); \ - sem_ptr; \ - }) - -#define SEMAPHORE_WAIT_GUARD(sem_ptr) \ - sem_t *__attribute__((cleanup(PostSem))) _sem_ptr = ({ \ - sem_wait(sem_ptr); \ - sem_ptr; \ - }) - -#ifdef __cplusplus -} -#endif -#endif //BRPC_THREAD_LOCK_H \ No newline at end of file diff --git a/src/brpc/ub/rack_mem/declare_shm_ubs.h.tmpl b/src/brpc/ub/rack_mem/declare_shm_ubs.h.tmpl deleted file mode 100644 index fc1d3d9835..0000000000 --- a/src/brpc/ub/rack_mem/declare_shm_ubs.h.tmpl +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef HLC_MK_UBSM -#error Do not include this file unless you know what you are doing. -#endif - -#ifndef HLC_MK_UBSM_OPTIONAL -#define HLC_MK_UBSM_OPTIONAL HLC_MK_UBSM -#endif - -HLC_MK_UBSM(int, ubsmem_init_attributes, (ubsmem_options_t *ubsm_shmem_opts)); - -HLC_MK_UBSM(int, ubsmem_initialize, (const ubsmem_options_t *ubsm_shmem_opts)); - -HLC_MK_UBSM(int, ubsmem_finalize, (void)); - -HLC_MK_UBSM(int, ubsmem_set_logger_level, (int level)); - -HLC_MK_UBSM(int, ubsmem_set_extern_logger, (void (*func)(int level, const char *msg))); - -HLC_MK_UBSM(int, ubsmem_lookup_regions, (ubsmem_regions_t* regions)); - -HLC_MK_UBSM(int, ubsmem_create_region, (const char *region_name, size_t size, const ubsmem_region_attributes_t *reg_attr)); - -HLC_MK_UBSM(int, ubsmem_destroy_region, (const char *region_name)); - -HLC_MK_UBSM(int, ubsmem_shmem_allocate,(const char *region_name, const char *name, size_t size, mode_t mode, - uint64_t flags)); - -HLC_MK_UBSM(int, ubsmem_shmem_deallocate, (const char *name)); - -HLC_MK_UBSM(int, ubsmem_shmem_map, (void *addr, size_t length, int prot, int flags, const char *name, off_t offset, - void **local_ptr)); - -HLC_MK_UBSM(int, ubsmem_shmem_unmap, (void *local_ptr, size_t length)); - -HLC_MK_UBSM(int, ubsmem_shmem_faults_register, (shmem_faults_func registerFunc)); - -HLC_MK_UBSM(int, ubsmem_local_nid_query, (uint32_t *nid)); - -#undef HLC_MK_UBSM_OPTIONAL -#undef HLC_MK_UBSM \ No newline at end of file diff --git a/src/brpc/ub/ub_ring.cpp b/src/brpc/ub/ub_ring.cpp deleted file mode 100644 index b3bdd34a33..0000000000 --- a/src/brpc/ub/ub_ring.cpp +++ /dev/null @@ -1,1048 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include -#include -#include -#include "butil/logging.h" -#include "brpc/ub/ub_ring.h" - -namespace brpc { -namespace ub { -uint32_t g_sleep_time[UBR_TASK_STEP_NUM] = {0}; -#define TIME_COVERSION 1000 -DEFINE_int32(ub_disconnect_timeout, 1, "Ubshm disconnection timeout."); -DEFINE_int32(ub_connect_timeout, 1, "Ubshm connection timeout."); -DEFINE_int32(ub_hb_timer_interval, 1, "Heartbeat timer interval."); -DEFINE_int32(ub_hb_retry_cnt, 3, "Heartbeat retry times."); -DEFINE_int32(ub_event_queue_timer_interval, 100, "Interval of the disconnection timer."); - -UBRing::UBRing() -{} -UBRing::~UBRing() -{} - -RETURN_CODE UBRing::UbrTrxMapShm(SHM *local_shm, SHM *remote_shm) -{ - RETURN_CODE rc = UbrTrxMapLocalShm(local_shm); - if (UNLIKELY(rc != HLC_OK)) { - LOG(ERROR) << "Trx map local shared memory failed."; - return rc; - } - rc = UbrTrxMapRemoteShm(remote_shm); - if (UNLIKELY(rc != HLC_OK)) { - LOG(ERROR) << "Trx map remote shared memory failed."; - return rc; - } - return HLC_OK; -} - -RETURN_CODE UBRing::UbrTrxClose() { - if (UNLIKELY(UbrTrxCloseCheck(_trx) != HLC_OK)) { - return HLC_ERR; - } - ((UbrEventQMsg *)_trx->ubr_rx.remote_tx_event_q.addr)->flag = UBR_STATE_CLOSING; - - uint32_t disconnect_timeout = FLAGS_ub_disconnect_timeout; - uint64_t start_time = GetCurNanoSeconds(); - - if (((UbrEventQMsg *)_trx->ubr_tx.local_tx_event_q.addr)->flag == UBR_STATE_CONNECTED) { - ((UbrEventQMsg *)_trx->ubr_tx.local_tx_event_q.addr)->flag = UBR_STATE_CLOSED; - _trx->ubr_tx.trx_state = UBR_STATE_CLOSED; - } - - ((UbrEventQMsg *)_trx->ubr_tx.remote_rx_event_q.addr)->flag = UBR_STATE_CLOSED; - while (((UbrEventQMsg *)_trx->ubr_rx.local_rx_event_q.addr)->flag != UBR_STATE_CLOSED) { - UbrSetSleepTask(UBR_TASK_CLOSE); - if (HasTimedOut(start_time, disconnect_timeout) != HLC_OK) { - LOG(ERROR) << "Local shm " << _trx->local_shm.name - << " wait for the peer to close the connection failed."; - _trx->ubr_rx.trx_state = UBR_STATE_CLOSED; - ClearTrxResource(_trx, start_time, UBR_SEND_CLOSE); - return HLC_ERR_TIMEOUT; - } - usleep(1); - } - _trx->ubr_rx.trx_state = UBR_STATE_CLOSED; - RETURN_CODE rc; - if (UNLIKELY((rc = ClearTrxResource(_trx, start_time, UBR_SEND_CLOSE)) != HLC_OK)) { - LOG(ERROR) << "Trx close, clear trx resource failed, trx local name=" << _trx->local_shm.name; - return HLC_ERR; - } - LOG(INFO) << "The peer is closed, local name=" << _trx->local_shm.name; - return HLC_OK; -} - -RETURN_CODE UBRing::UbrAddCloseTimer() { - if (UNLIKELY(_trx == NULL)) { - LOG(ERROR) << "Trx add close timer failed, trx is null."; - return HLC_ERR; - } - - uint32_t event_q_timer_interval = FLAGS_ub_event_queue_timer_interval * TIME_COVERSION; - struct itimerspec time_spec = { - .it_interval = {.tv_sec = 0, .tv_nsec = event_q_timer_interval}, - .it_value = {.tv_sec = 0, .tv_nsec = 1} - }; - int timer_fd = TimerStart(&time_spec, UbrTrxCloseCallback, (void*)_trx); - if (UNLIKELY(timer_fd == -1)) { - LOG(ERROR) << "Start ubr close timer failed, trx local name=" << _trx->local_shm.name; - return HLC_ERR; - } - _trx->timer_fd = timer_fd; - return HLC_OK; -} - -RETURN_CODE UBRing::UbrAddTimer() { - if (UNLIKELY(UbrAddCloseTimer() != HLC_OK)) { - LOG(ERROR) << "Ubr " << _trx->local_shm.name << " add closed timer failed."; - return HLC_ERR; - } - - if (UNLIKELY(UbrAddHBTimer() != HLC_OK)) { - DeleteTimerSafe((uint32_t)_trx->timer_fd); - LOG(ERROR) << "Ubr " << _trx->local_shm.name << " add heartbeat timer failed."; - return HLC_ERR; - } - return HLC_OK; -} - -void* UBRing::UbrTrxCloseCallback(void* args) { - auto* trx = (UbrTrx*) args; - if (UNLIKELY(UBRing::UbrTrxCallbackCheck(trx) != HLC_OK)) { - return nullptr; - } - - auto* local_rx_event_q = (UbrEventQMsg *)trx->ubr_rx.local_rx_event_q.addr; - auto* local_tx_event_q = (UbrEventQMsg *)trx->ubr_tx.local_tx_event_q.addr; - if (local_rx_event_q->flag != UBR_STATE_CLOSED || local_tx_event_q->flag == UBR_STATE_CLOSED) { - return nullptr; - } - trx->ubr_rx.trx_state = UBR_STATE_CLOSED; - int fd = (int)trx->local_shm.fd; - do { - if (ATOMIC_LOAD(trx->close_cnt) == 0) { - LOG(ERROR) << "Trx close callback failed, exist other closing call, name=" << trx->local_shm.name; - break; - } - ATOMIC_SUB(trx->close_cnt, 1); - - uint64_t start_time = GetCurNanoSeconds(); - - if (local_tx_event_q->flag == UBR_STATE_CONNECTED || ATOMIC_LOAD(trx->close_cnt) == 1) { - local_tx_event_q->flag = UBR_STATE_CLOSED; - trx->ubr_tx.trx_state = UBR_STATE_CLOSED; - } - UbrEventQMsg* remote_rx_event_q = (UbrEventQMsg *)trx->ubr_tx.remote_rx_event_q.addr; - if (remote_rx_event_q == nullptr) { - LOG(ERROR) << "Trx close callback failed, " << trx->local_shm.name << " remote_rx_event_q is NULL."; - break; - } - remote_rx_event_q->flag = UBR_STATE_CLOSED; - if (UNLIKELY(ClearTrxResource(trx, start_time, UBR_CALL_BACK_CLOSE, 1) != HLC_OK)) { - LOG(ERROR) << "Trx close callback failed, " << trx->local_shm.name << " clear trx resource failed."; - break; - } - } while (0); - return nullptr; -} - -RETURN_CODE UBRing::UbrAddHBTimer() { - if (UNLIKELY(_trx == NULL)) { - LOG(ERROR) << "Trx add heartbeat timer failed, trx is null."; - return HLC_ERR; - } - - struct itimerspec time_spec = { - .it_interval = {.tv_sec = FLAGS_ub_hb_timer_interval, .tv_nsec = 0}, - .it_value = {.tv_sec = 0, .tv_nsec = 1} - }; - int timer_fd = TimerStart(&time_spec, UbrTrxHBCallback, (void*)_trx); - if (UNLIKELY(timer_fd == -1)) { - LOG(ERROR) << "Start ubr heartbeat timer failed."; - return HLC_ERR; - } - _trx->hb_timer_fd = timer_fd; - return HLC_OK; -} - -RETURN_CODE UBRing::UbrPassiveClearTrx(UbrTrx *trx, int fd, PASSIVE_DISC_TYPE type) { - if (UNLIKELY(UbrTrxCloseCheck(trx) != HLC_OK)) { - return HLC_ERR; - } - trx->ubr_tx.trx_state = UBR_STATE_CLOSED; - trx->ubr_rx.trx_state = UBR_STATE_CLOSED; - DeleteTimerSafe((uint32_t)trx->timer_fd); - const char *type_name = NULL; - if (type == UBR_HEARTBEAT) { - DeleteTimer((uint32_t)trx->hb_timer_fd); - type_name = "Trx heartbeat"; - } else if (type == UBR_UB_EVENT) { - DeleteTimerSafe((uint32_t)trx->hb_timer_fd); - type_name = "Ub event callback"; - } - sleep(FLAGS_ub_flying_io_timeout); - - int rc = ShmLocalFree(&trx->remote_shm); - if (rc != HLC_OK) { - LOG(ERROR) << type_name << ", delete remote shm failed. ret=" << rc; - } - rc = ShmLocalFree(&trx->local_shm); - if (rc != HLC_OK) { - LOG(ERROR) << type_name << ", delete local shm failed. ret=" << rc; - } - - UBRingManager::ReleaseUbrTrxFromMgr(trx); - return HLC_OK; -} - -void* UBRing::UbrTrxHBCallback(void* args) { - auto* trx = (UbrTrx*) args; - if (UNLIKELY(UbrTrxCallbackCheck(trx) != HLC_OK)) { - return NULL; - } - - auto* local_data_status = (UbrDataStatusQMsg *)trx->ubr_tx.local_data_status_q.addr; - auto* remote_data_status = (UbrDataStatusQMsg *)trx->ubr_rx.remote_data_status_q.addr; - if (UNLIKELY(local_data_status == NULL || remote_data_status == NULL)) { - LOG(ERROR) << "Heartbeat error, datastatus is NULL."; - return NULL; - } - - if (trx->ubr_tx.trx_state != UBR_STATE_CONNECTED || trx->ubr_rx.trx_state != UBR_STATE_CONNECTED) { - LOG_EVERY_SECOND(INFO) << "Heartbeat cannot be started, wait connected state."; - return NULL; - } - - remote_data_status->heart_beat = 1; - if (local_data_status->heart_beat == 1) { - local_data_status->heart_beat = 0; - trx->ubr_tx.hb_retry_cnt = 0; - return NULL; - } - - ++trx->ubr_tx.hb_retry_cnt; - if (trx->ubr_tx.hb_retry_cnt <= FLAGS_ub_hb_retry_cnt) { - return NULL; - } - - int fd = (int)trx->local_shm.fd; - LOG(INFO) << "Hlc heartbeat, start to clear trx resource. hbTimerFd=" << fd << ", shmName=" << trx->local_shm.name; - UbrPassiveClearTrx(trx, fd, UBR_HEARTBEAT); - LOG(INFO) << "Hlc heartbeat clear trx resource finish."; - return NULL; -} - -RETURN_CODE UBRing::UbrAddAsynClearTimer(UbrTrx *trx) { - if (UNLIKELY(trx == NULL)) { - LOG(ERROR) << "Trx add close timer failed, trx is null."; - return HLC_ERR; - } - - struct itimerspec time_spec = { - .it_interval = {.tv_sec = 0, .tv_nsec = 0}, - .it_value = {.tv_sec = FLAGS_ub_flying_io_timeout, .tv_nsec = 0} - }; - - int timer_fd = TimerStart(&time_spec, UbrAsynClearCallback, (void*)trx); - if (UNLIKELY(timer_fd == -1)) { - LOG(ERROR) << "Start ubr close timer failed, trx name=%s.", trx->local_shm.name; - return HLC_ERR; - } - trx->clear_timer_fd = timer_fd; - return HLC_OK; -} - -void *UBRing::UbrAsynClearCallback(void *args) -{ - auto* trx = (UbrTrx*) args; - if (UNLIKELY(trx == NULL)) { - LOG(ERROR) << "Trx close, trx is null."; - return NULL; - } - - if (UNLIKELY(ShmRemoteFree(&trx->remote_shm) != HLC_OK)) { - LOG(ERROR) << "Trx close, remote shm " << trx->remote_shm.name << " free failed."; - } - - if (UNLIKELY(UbrTrxFreeShm(trx) != HLC_OK)) { - LOG(ERROR) << "Trx close, wait for local shm " << trx->local_shm.name << " free fail."; - } - - if (UNLIKELY(UBRingManager::ReleaseUbrTrxFromMgr(trx) != HLC_OK)) { - LOG(ERROR) << "Trx close, release shm " << trx->local_shm.name << " trx failed."; - } - return NULL; -} - -int UBRing::UbrTrxSend(const void *buf, uint32_t buf_len) -{ - if (UNLIKELY(CheckTrxSendPreCheck(_trx) != HLC_OK)) { - return HLC_ERR; - } - // 1.2 计算空间 - auto *data_status_msg = (UbrDataStatusQMsg *)_trx->ubr_tx.local_data_status_q.addr; - auto *data_msg = (UbrMsgFormat *)_trx->ubr_tx.remote_data_q.addr; - uint32_t cap = _trx->ubr_tx.capacity; - uint32_t tail = data_status_msg->tail; - uint32_t remain_chunk_num = - (_trx->ubr_tx.write_pos > tail) ? (tail + cap - _trx->ubr_tx.write_pos) : (tail - _trx->ubr_tx.write_pos); - uint32_t need_msg_chunk_num = CalcUbrMsgChunkCnt(buf_len); - if (remain_chunk_num < need_msg_chunk_num) { - return HLC_RETRY; - } - UbrMsgFormat *msg = &(_trx->ubr_tx.local_msg_space); - uint32_t total_send_len = 0; - uint32_t remain_buf_len = buf_len; - uint8_t is_last_pkt = 0; - _trx->ubr_tx.out_io_id++; - ((UbrEventQMsg *)_trx->ubr_tx.remote_rx_event_q.addr)->io_id = _trx->ubr_tx.out_io_id; - while (remain_buf_len > 0) { - is_last_pkt = (uint8_t)(remain_buf_len <= UBR_MSG_PAYLOAD_LEN); - msg->header[UBR_MSG_FLAG_INDEX] = is_last_pkt ? UBR_MSG_CHUNK_EOF : UBR_MSG_CHUNK_EXIST; - msg->header[UBR_MSG_LEN_INDEX] = is_last_pkt ? (uint8_t)remain_buf_len : UBR_MSG_PAYLOAD_LEN; - msg->header[UBR_MSG_CUR_INDEX] = 0; - memcpy(msg->payload.inner, (const uint8_t *)buf + total_send_len, msg->header[UBR_MSG_LEN_INDEX]); - Copy64Byte((int8_t *)&data_msg[_trx->ubr_tx.write_pos], (int8_t *)msg); - _trx->ubr_tx.write_pos = (_trx->ubr_tx.write_pos + 1) % cap; - total_send_len += msg->header[UBR_MSG_LEN_INDEX]; - remain_buf_len -= msg->header[UBR_MSG_LEN_INDEX]; - } - return (int)total_send_len; -} - -int UBRing::UbrTrxRecv(void *buf, uint32_t buf_len) -{ - RETURN_CODE rc = HLC_OK; - if (UNLIKELY((rc = CheckTrxRecvParam(_trx, buf, buf_len)) != HLC_OK)) { - return (rc == UBR_NOT_CONNECTED) ? 0 : rc; - } - UbrMsgFormat *data_msg = (UbrMsgFormat *)_trx->ubr_rx.local_data_q.addr; - uint32_t read_pos_end = _trx->ubr_rx.read_pos; - uint8_t flag = data_msg[read_pos_end].header[UBR_MSG_FLAG_INDEX]; - if (flag == UBR_MSG_CHUNK_NONE) { - return HLC_RETRY; - } - return UbrTrxRecvBlockMode(static_cast(buf), buf_len); -} - -int UBRing::UbrTrxRecvBlockMode(uint8_t *dest, uint32_t buf_len) -{ - RETURN_CODE rc = HLC_OK; - if (UNLIKELY((rc = CheckTrxRecvParam(_trx, dest, buf_len)) != HLC_OK)) { - return (rc == UBR_NOT_CONNECTED) ? 0 : rc; - } - - int32_t total_copied = 0; - int32_t remaining_len = (int32_t)buf_len; - bool not_eof_encountered = true; - - UbrRx *ubr_rx = &_trx->ubr_rx; - UbrMsgFormat *data_msg = (UbrMsgFormat *)ubr_rx->local_data_q.addr; - bool need_update_epoll_eof_pos = ubr_rx->read_pos == ubr_rx->ep_eof_pos; - - while (not_eof_encountered && remaining_len > 0) { - if (UNLIKELY(CheckTrxRecvPreCheck(_trx) != HLC_OK)) { - return HLC_ERR; - } - UbrMsgFormat *current_chunk = &data_msg[ubr_rx->read_pos]; - uint8_t flag = current_chunk->header[UBR_MSG_FLAG_INDEX]; - if (flag == UBR_MSG_CHUNK_NONE) { - continue; - } - if (flag == UBR_MSG_CHUNK_EOF) { - not_eof_encountered = false; - } - uint8_t chunk_msg_len = current_chunk->header[UBR_MSG_LEN_INDEX]; - uint8_t cur_index = current_chunk->header[UBR_MSG_CUR_INDEX]; - uint8_t available_data = chunk_msg_len - cur_index; - - int32_t copy_len = (remaining_len < available_data) ? remaining_len : available_data; - memcpy(dest + total_copied, data_msg[ubr_rx->read_pos].payload.inner + cur_index, (size_t)copy_len); - total_copied += copy_len; - remaining_len -= copy_len; - current_chunk->header[UBR_MSG_CUR_INDEX] += (uint8_t)copy_len; - if (LIKELY(current_chunk->header[UBR_MSG_CUR_INDEX] == chunk_msg_len)) { - current_chunk->header[UBR_MSG_FLAG_INDEX] = UBR_MSG_CHUNK_NONE; - UpdateDataQTail(_trx); - ubr_rx->read_pos = (ubr_rx->read_pos + 1) % ubr_rx->capacity; - } - } - if (need_update_epoll_eof_pos) { - ubr_rx->ep_eof_pos = ubr_rx->read_pos; - } - return (int)total_copied; -} - -ssize_t UBRing::UbrTrxWritev(const struct iovec *iov, int iovcnt) -{ - if (UNLIKELY(CheckTrxSendPreCheck(_trx) != HLC_OK)) { - return HLC_ERR; - } - - size_t buf_len = 0; - for (int i = 0; i < iovcnt; i++) { - buf_len += iov[i].iov_len; - } - RETURN_CODE rc = WritevHasEnoughSpace(buf_len); - if (rc != HLC_OK) { - return rc; - } - - UbrMsgFormat *data_msg = (UbrMsgFormat *)_trx->ubr_tx.remote_data_q.addr; - UbrMsgFormat *msg = &(_trx->ubr_tx.local_msg_space); - int cur_iov = 0; - size_t cur_iov_pos = 0; - ssize_t total_send_len = 0; - size_t pkt_remain_n = 0; - size_t iov_remain = 0; - size_t fulled = 0; - uint8_t is_last_pkt = 0; - uint8_t cur_pkt_len = 0; - _trx->ubr_tx.out_io_id++; - ((UbrEventQMsg *)_trx->ubr_tx.remote_rx_event_q.addr)->io_id = _trx->ubr_tx.out_io_id; - while (buf_len > 0) { - is_last_pkt = (uint8_t)(buf_len <= UBR_MSG_PAYLOAD_LEN); - cur_pkt_len = is_last_pkt ? (uint8_t)buf_len : UBR_MSG_PAYLOAD_LEN; - msg->header[UBR_MSG_FLAG_INDEX] = is_last_pkt ? UBR_MSG_CHUNK_EOF : UBR_MSG_CHUNK_EXIST; - msg->header[UBR_MSG_LEN_INDEX] = cur_pkt_len; - msg->header[UBR_MSG_CUR_INDEX] = 0; - pkt_remain_n = cur_pkt_len; - while (cur_iov < iovcnt && pkt_remain_n > 0) { - iov_remain = (iov[cur_iov].iov_len - cur_iov_pos); - fulled = iov_remain > pkt_remain_n ? pkt_remain_n : iov_remain; - memcpy((msg->payload.inner + (cur_pkt_len - (uint8_t)pkt_remain_n)), - (uint8_t *)(iov[cur_iov].iov_base) + cur_iov_pos, - fulled); - pkt_remain_n -= fulled; - cur_iov_pos += fulled; - if (cur_iov_pos == iov[cur_iov].iov_len) { - cur_iov++; - cur_iov_pos = 0; - } - } - - Copy64Byte((int8_t *)&data_msg[_trx->ubr_tx.write_pos], (int8_t *)msg); - _trx->ubr_tx.write_pos = (_trx->ubr_tx.write_pos + 1) % _trx->ubr_tx.capacity; - total_send_len += (ssize_t)cur_pkt_len; - buf_len -= (int)cur_pkt_len; - } - return total_send_len; -} - -ssize_t UBRing::UbrTrxReadv(const struct iovec *iov, int iovcnt) -{ - RETURN_CODE rc = HLC_OK; - if (UNLIKELY((rc = CheckTrxRecvParam(_trx, iov, (uint32_t)iovcnt)) != HLC_OK)) { - return (rc == UBR_NOT_CONNECTED) ? 0 : rc; - } - UbrMsgFormat *data_msg = (UbrMsgFormat *)_trx->ubr_rx.local_data_q.addr; - uint32_t read_pos_end = _trx->ubr_rx.read_pos; - uint8_t flag = data_msg[read_pos_end].header[UBR_MSG_FLAG_INDEX]; - if (flag == UBR_MSG_CHUNK_NONE) { - errno = EAGAIN; - return -1; - } - ssize_t nr = UbrTrxReadvBlockMode(iov, iovcnt); - if (UNLIKELY(nr == -1)) { - LOG(ERROR) << "Non-blocking readv msg in failed, connection has been closed."; - errno = EPIPE; - return -1; - } - return nr; -} - -ssize_t UBRing::UbrTrxReadvBlockMode(const struct iovec *iov, int iovcnt) -{ - RETURN_CODE rc = HLC_OK; - if (UNLIKELY((rc = CheckTrxRecvParam(_trx, iov, (uint32_t)iovcnt)) != HLC_OK)) { - return (rc == UBR_NOT_CONNECTED) ? 0 : rc; - } - - size_t remain_buf_len = 0; - for (int i = 0; i < iovcnt; i++) { - remain_buf_len += iov[i].iov_len; - } - - bool need_update_epoll_eof_pos = _trx->ubr_rx.read_pos == _trx->ubr_rx.ep_eof_pos; - ssize_t total_recv_len = StartReadv(_trx, iov, iovcnt, remain_buf_len); - - if (need_update_epoll_eof_pos) { - _trx->ubr_rx.ep_eof_pos = _trx->ubr_rx.read_pos; - } - return total_recv_len; -} - -RETURN_CODE UBRing::IsUbrTrxReadable(uint32_t ep_event) -{ - if (UNLIKELY(_trx == NULL)) { - LOG(ERROR) << "The trx to be checked is NULL."; - return HLC_ERR; - } - if (UNLIKELY(_trx->local_shm.addr == NULL)) { - LOG(ERROR) << "The trx local_shm to be checked is NULL."; - return HLC_ERR; - } - if (UNLIKELY(_trx->ubr_tx.trx_state != UBR_STATE_CONNECTED)) { - // TODO mwj 这几块的日志是否需要删除 - // LOG(ERROR) << "The trx is not connected state."; - return HLC_ERR; - } - - uint64_t io_id = ((UbrEventQMsg *)_trx->ubr_rx.local_rx_event_q.addr)->io_id; - if ((ep_event & EPOLLET) && io_id == _trx->ubr_rx.in_io_id) { - return MPA_MUXER_NOT_READY; - } - - uint32_t read_pos_end = _trx->ubr_rx.read_pos; - if (ep_event & EPOLLET) { - read_pos_end = _trx->ubr_rx.ep_eof_pos; - } - - UbrMsgFormat *data_msg = (UbrMsgFormat *)_trx->ubr_rx.local_data_q.addr; - uint8_t flag = data_msg[read_pos_end].header[UBR_MSG_FLAG_INDEX]; - if (flag == UBR_MSG_CHUNK_NONE) { - return MPA_MUXER_NOT_READY; - } - if (ep_event & EPOLLET) { - _trx->ubr_rx.in_io_id = io_id; - } - return HLC_OK; -} - -RETURN_CODE UBRing::IsUbrTrxWriteable(uint32_t ep_event) -{ - if (UNLIKELY(_trx == NULL)) { - LOG(ERROR) << "The trx to be checked is NULL."; - return HLC_ERR; - } - if (UNLIKELY(_trx->local_shm.addr == NULL)) { - LOG(ERROR) << "The trx local_shm to be checked is NULL."; - return HLC_ERR; - } - if (UNLIKELY((UbrEventQMsg *)_trx->ubr_tx.local_tx_event_q.addr == NULL)) { - LOG(ERROR) << "The trx local_tx_event_q addr is NULL."; - return HLC_ERR; - } - if (UNLIKELY((UbrEventQMsg *)_trx->ubr_tx.local_data_status_q.addr == NULL)) { - LOG(ERROR) << "The trx local_data_status_q addr is NULL."; - return HLC_ERR; - } - - if (UNLIKELY(_trx->ubr_tx.trx_state != UBR_STATE_CONNECTED)) { - LOG(ERROR) << "The trx is not connected state."; - return HLC_ERR; - } - - UbrDataStatusQMsg *data_status_msg = (UbrDataStatusQMsg *)_trx->ubr_tx.local_data_status_q.addr; - uint32_t cap = _trx->ubr_tx.capacity; - uint32_t tail = data_status_msg->tail; - uint32_t remain_chunk_num = - (_trx->ubr_tx.write_pos > tail) ? (tail + cap - _trx->ubr_tx.write_pos) : (tail - _trx->ubr_tx.write_pos); - if (remain_chunk_num == 0) { - _trx->ubr_tx.ep_last_cap = remain_chunk_num; - return MPA_MUXER_NOT_READY; - } - - if ((ep_event & EPOLLET) && (_trx->ubr_tx.ep_last_cap >= remain_chunk_num)) { - _trx->ubr_tx.ep_last_cap = remain_chunk_num; - return MPA_MUXER_NOT_READY; - } - _trx->ubr_tx.ep_last_cap = remain_chunk_num; - return HLC_OK; -} - -RETURN_CODE UBRing::UbrSetTimeout(UbrTaskStep task_type, int timeout) -{ - if (task_type >= UBR_TASK_STEP_NUM || timeout < 0) { - LOG(ERROR) << "Set timeout failed, invalid task type."; - return HLC_ERR; - } - - g_sleep_time[task_type] = (uint32_t)timeout; - LOG(INFO) << "Set timeout success, task_type=" << task_type << ", timeout=" << timeout; - return HLC_OK; -} - -RETURN_CODE UBRing::UbrTrxFreeShm(UbrTrx *trx) -{ - if (trx == NULL) { - LOG(ERROR) << "Trx is NULL."; - return HLC_ERR; - } - - RETURN_CODE rc = HLC_OK; - rc = ShmMunmap(&trx->local_shm); - if (UNLIKELY(rc != HLC_OK)) { - LOG(ERROR) << "Trx close, local unmap " << trx->local_shm.name << " shm fail."; - return HLC_ERR; - } - - rc = ShmFree(&trx->local_shm); - if (UNLIKELY(rc != HLC_OK)) { - if (UNLIKELY(rc == SHM_ERR_RESOURCE_ATTACHED || rc == SHM_ERR_NOT_FOUND)) { - LOG(INFO) << "Wait for " << trx->remote_shm.name << " remote free shm."; - return HLC_OK; - } - LOG(ERROR) << "Wait for " << trx->local_shm.name << " local shm free fail."; - return HLC_ERR; - } - - size_t name_len = strlen(trx->remote_shm.name); - if (!(name_len <= 0 || name_len > SHM_MAX_NAME_LEN || trx->remote_shm.len <= 0)) { - rc = ShmFree(&trx->remote_shm); - } - if (rc != HLC_OK) { - if (rc == SHM_ERR_RESOURCE_ATTACHED || rc == SHM_ERR_NOT_FOUND) { - LOG(INFO) << "Wait for " << trx->remote_shm.name << " remote free shm."; - return HLC_OK; - } - LOG(ERROR) << "Wait for " << trx->remote_shm.name << " remote shm free fail."; - return HLC_ERR; - } - - return HLC_OK; -} - -void UBRing::PreWriteAddr(uint8_t *addr, size_t len) -{ - if (addr == NULL) { - return; - } - - size_t i = 0; - while (i < len) { - if (i + sizeof(uint64_t) <= len) { - *(uint64_t *)(addr + i) = (uint64_t)0; - i += sizeof(uint64_t); - } else if (i + sizeof(uint32_t) < len) { - *(uint32_t *)(addr + i) = (uint32_t)0; - i += sizeof(uint32_t); - } else if (i + sizeof(uint16_t) < len) { - *(uint16_t *)(addr + i) = (uint16_t)0; - i += sizeof(uint16_t); - } else { - *(addr + i) = (uint8_t)0; - i += sizeof(uint8_t); - } - } -} - -void UBRing::PrewriteUbrTx(UbrTx *tx) -{ - if (tx == NULL) { - return; - } - PreWriteAddr(tx->remote_data_q.addr, tx->capacity * sizeof(UbrMsgFormat)); -} - -void UBRing::PrewriteUbrRx(UbrRx *rx) -{ - if (rx == NULL) { - return; - } - PreWriteAddr(rx->local_data_q.addr, rx->capacity * sizeof(UbrMsgFormat)); -} - -RETURN_CODE UBRing::UbrTrxMapLocalShm(SHM *local_shm) -{ - if (UNLIKELY(_trx == NULL)) { - LOG(ERROR) << "Trx map Shared memory failed, trx is null."; - return HLC_ERR; - } - if (UNLIKELY(local_shm == NULL)) { - LOG(ERROR) << "Trx map Shared memory failed, local_shm is null."; - return HLC_ERR; - } - _trx->local_shm = *local_shm; - _trx->ubr_tx.local_tx_event_q.addr = local_shm->addr + TX_EVENTQ_ADDR_OFFSET; - _trx->ubr_tx.local_tx_event_q.len = UBR_EVENTQ_LEN; - _trx->ubr_rx.local_rx_event_q.addr = local_shm->addr + RX_EVENTQ_ADDR_OFFSET; - _trx->ubr_rx.local_rx_event_q.len = UBR_EVENTQ_LEN; - _trx->ubr_tx.local_data_status_q.addr = local_shm->addr + DATASTATUSQ_ADDR_OFFSET; - _trx->ubr_tx.local_data_status_q.len = UBR_DATASTATUSQ_LEN; - size_t addr_aligned_offset = Aligned64Offset(local_shm->addr + DATAQ_ADDR_OFFSET); - LOG(DEBUG) << "UbrRx's local_data_q address will aligned with offset=" << addr_aligned_offset; - _trx->ubr_rx.local_data_q.addr = local_shm->addr + DATAQ_ADDR_OFFSET + addr_aligned_offset; - _trx->ubr_rx.local_data_q.len = local_shm->len - DATAQ_ADDR_OFFSET - addr_aligned_offset; - return HLC_OK; -} - -RETURN_CODE UBRing::UbrTrxMapRemoteShm(SHM *remote_shm) -{ - if (UNLIKELY(_trx == NULL)) { - LOG(ERROR) << "Trx map Shared memory failed, trx is null."; - return HLC_ERR; - } - if (UNLIKELY(remote_shm == NULL)) { - LOG(ERROR) << "Trx map Shared memory failed, remote_shm is null."; - return HLC_ERR; - } - _trx->remote_shm = *remote_shm; - _trx->ubr_rx.remote_tx_event_q.addr = remote_shm->addr + TX_EVENTQ_ADDR_OFFSET; - _trx->ubr_rx.remote_tx_event_q.len = UBR_EVENTQ_LEN; - _trx->ubr_tx.remote_rx_event_q.addr = remote_shm->addr + RX_EVENTQ_ADDR_OFFSET; - _trx->ubr_tx.remote_rx_event_q.len = UBR_EVENTQ_LEN; - _trx->ubr_rx.remote_data_status_q.addr = remote_shm->addr + DATASTATUSQ_ADDR_OFFSET; - _trx->ubr_rx.remote_data_status_q.len = UBR_DATASTATUSQ_LEN; - size_t addr_aligned_offset = Aligned64Offset(remote_shm->addr + DATAQ_ADDR_OFFSET); - LOG(DEBUG) << "UbrTx's remote_data_q will aligned with offset=" << addr_aligned_offset; - _trx->ubr_tx.remote_data_q.addr = remote_shm->addr + DATAQ_ADDR_OFFSET + addr_aligned_offset; - _trx->ubr_tx.remote_data_q.len = remote_shm->len - DATAQ_ADDR_OFFSET - addr_aligned_offset; - return HLC_OK; -} - -RETURN_CODE UBRing::UbrServerTrxInit(SHM *local_shm, SHM *remote_shm) -{ - RETURN_CODE rc = UbrTrxMapShm(local_shm, remote_shm); - if (UNLIKELY(rc != HLC_OK)) { - LOG(ERROR) <<"Trx map shared memory failed."; - return rc; - } - - uint32_t local_data_msg_cap = (uint32_t)(_trx->ubr_rx.local_data_q.len / UBR_MSG_LEN); - uint32_t remote_data_msg_cap = (uint32_t)(_trx->ubr_tx.remote_data_q.len / UBR_MSG_LEN); - _trx->ubr_rx.capacity = local_data_msg_cap; - _trx->ubr_tx.capacity = remote_data_msg_cap; - rc = UBRingManager::GetHlcDealMsgMaxCnt(_trx->ubr_rx.capacity, &_trx->ubr_rx.deal_msg_max_cnt); - if (UNLIKELY(rc != HLC_OK)) { - LOG(ERROR) << "Get hlc deal msg max cnt."; - return rc; - } - PrewriteUbrRx(&_trx->ubr_rx); - PrewriteUbrTx(&_trx->ubr_tx); - - ((UbrDataStatusQMsg *)(_trx->ubr_tx.local_data_status_q.addr))->tail = remote_data_msg_cap - 1; - ((UbrDataStatusQMsg *)(_trx->ubr_rx.remote_data_status_q.addr))->tail = local_data_msg_cap - 1; - - if (UNLIKELY(UbrAddTimer() != HLC_OK)) { - LOG(ERROR) << "Ubr add timer failed, localName=" << local_shm->name; - return HLC_ERR; - } - - ((UbrDataStatusQMsg *)(_trx->ubr_tx.local_data_status_q.addr))->timeout = FLAGS_ub_connect_timeout; - ((UbrDataStatusQMsg *)(_trx->ubr_rx.remote_data_status_q.addr))->timeout = FLAGS_ub_connect_timeout; - - ((UbrEventQMsg *)_trx->ubr_tx.remote_rx_event_q.addr)->flag = UBR_STATE_CONNECTED; - ((UbrEventQMsg *)_trx->ubr_rx.local_rx_event_q.addr)->flag = UBR_STATE_CONNECTED; - _trx->ubr_tx.trx_state = UBR_STATE_CONNECTED; - _trx->ubr_rx.trx_state = UBR_STATE_CONNECTED; - return HLC_OK; -} - -int UBRing::UbrAllocateServerShm(SHM* remote_trx_shm, SHM* local_trx_shm) { - UbrSetSleepTask(UBR_TASK_ACCEPT_MAP_FRONT); - if (UNLIKELY((ShmRemoteMalloc(remote_trx_shm)) != HLC_OK)) { - LOG(ERROR) << "Trx apply remote shared memory failed."; - return -1; - } - - if (UNLIKELY((ShmLocalCalloc(local_trx_shm)) != HLC_OK)) { - LOG(ERROR) << "Trx apply local shared memory failed."; - return -1; - } - - UbrTrx **ubr_trx_ptr = &_trx; - if (UNLIKELY((UBRingManager::AcquireUbrTrxFromMgr(ubr_trx_ptr)) != HLC_OK)) { - LOG(ERROR) << "Acquire ubrtrx failed."; - ShmRemoteFree(remote_trx_shm); - ShmLocalFree(local_trx_shm); - return -1; - } - _trx->type = TCP_TRX; - if (UNLIKELY((UbrServerTrxInit(local_trx_shm, remote_trx_shm)) != HLC_OK)) { - LOG(ERROR) << "Server trx init failed."; - ShmRemoteFree(remote_trx_shm); - UbrTrxFreeShm(_trx); - UBRingManager::ReleaseUbrTrxFromMgr(_trx); - return -1; - } - return 0; -} - -int UBRing::UbrAllocateLocalShm(SHM *local_trx_shm, const char *shm_name) -{ - if (UNLIKELY((UBRingManager::AcquireUbrTrxFromMgr(&(_trx))) != HLC_OK)) { - LOG(ERROR) << "Acquire ubrtrx failed, localName=" << shm_name; - return -1; - } - - _trx->type = TCP_TRX; - if (UNLIKELY((ApplyAndMapLocalShm(local_trx_shm, shm_name)) != HLC_OK)) { - LOG(ERROR) << "Trx apply or map local shared memory failed, localName=" << shm_name; - return -1; - } - return 0; -} - -int UBRing::UbrMapRemoteShm(SHM *local_trx_shm, const char *local_name) -{ - RETURN_CODE rc = UbrMapRemoteShmAddTimer(local_trx_shm, local_name); - if (UNLIKELY(rc != HLC_OK)) { - LOG(ERROR) << "Connect Trx failed, local shm name=" << local_trx_shm->name; - return -1; - } - PrewriteUbrRx(&_trx->ubr_rx); - PrewriteUbrTx(&_trx->ubr_tx); - ((UbrEventQMsg *)_trx->ubr_rx.remote_tx_event_q.addr)->flag = UBR_STATE_CONNECTED; - ((UbrEventQMsg *)_trx->ubr_rx.local_rx_event_q.addr)->flag = UBR_STATE_CONNECTED; - _trx->ubr_tx.trx_state = UBR_STATE_CONNECTED; - _trx->ubr_rx.trx_state = UBR_STATE_CONNECTED; - return 0; -} - -RETURN_CODE UBRing::UbrMapRemoteShmAddTimer(SHM *local_trx_shm, const char *local_name) -{ - uint64_t start_time = GetCurNanoSeconds(); - - size_t remote_server_len = UBR_MSG_LEN * (((UbrDataStatusQMsg *)(_trx->ubr_tx.local_data_status_q.addr))->tail + 1) + - UBR_MSG_LEN * ((DATAQ_ADDR_OFFSET / UBR_MSG_LEN) + 1); - SHM remote_trx_shm = {NULL, remote_server_len, 0, {0}, local_trx_shm->fd}; - int result = snprintf(remote_trx_shm.name, - SHM_MAX_NAME_BUFF_LEN, - "%s_%s_%s", - SHM_NAME_PREFIX, - local_name, - SERVER_SHM_NAME_SUFFIX); - if (UNLIKELY(result < 0)) { - LOG(ERROR) << "Copy server shared memory name failed, localName=%s, ret=%d.", local_name, result; - return HLC_ERR; - } - UbrSetSleepTask(UBR_TASK_CONNECT_MAP_FRONT); - RETURN_CODE rc = ApplyAndMapRemoteShm(&remote_trx_shm); - if (UNLIKELY(rc != HLC_OK)) { - LOG(ERROR) << "Connect Trx map shared memory failed, remote shm=" << remote_trx_shm.name; - return rc; - } - - if (UNLIKELY(UbrAddTimer() != HLC_OK)) { - LOG(ERROR) << "Ubr add timer failed, localName=" << local_name; - ShmRemoteFree(&remote_trx_shm); - return HLC_ERR; - } - - UbrSetSleepTask(UBR_TASK_CONNECT_MAP_AFTER); - - uint32_t timeout = ((UbrDataStatusQMsg *)(_trx->ubr_tx.local_data_status_q.addr))->timeout; - if (HasTimedOut(start_time, timeout) != HLC_OK) { - LOG(ERROR) << "Local shm " << local_trx_shm->name << " wait for connect remote map timeout."; - DeleteTimerSafe((uint32_t)_trx->hb_timer_fd); - DeleteTimerSafe((uint32_t)_trx->timer_fd); - ShmRemoteFree(&remote_trx_shm); - return HLC_ERR_TIMEOUT; - } - - return HLC_OK; -} - -RETURN_CODE UBRing::ApplyAndMapLocalShm(SHM *local_trx_shm, const char *local_name) -{ - if (UNLIKELY(_trx == NULL || local_trx_shm == NULL)) { - LOG(ERROR) << "Trx map Shared memory failed, trx is null, localName=" << local_name; - return HLC_ERR; - } - int result = snprintf(local_trx_shm->name, - SHM_MAX_NAME_BUFF_LEN, - "%s_%s_%s", - SHM_NAME_PREFIX, - local_name, - CLIENT_SHM_NAME_SUFFIX); - if (UNLIKELY(result < 0)) { - LOG(ERROR) << "Copy client localTrx shared memory name failed, localName=" << local_name << ", ret=" << result; - return HLC_ERR; - } - - RETURN_CODE rc = ShmLocalCalloc(local_trx_shm); - if (UNLIKELY(rc != HLC_OK)) { - LOG(ERROR) << "Trx apply local shared memory failed, local shm name=" << local_trx_shm->name; - if (rc == SHM_ERR_EXIST || rc == SHM_ERR_NOT_FOUND) { - rc = UBR_ERR_ADDR_IN_USE; - } - UBRingManager::ReleaseUbrTrxFromMgr(_trx); - return rc; - } - rc = UbrTrxMapLocalShm(local_trx_shm); - if (UNLIKELY(rc != HLC_OK)) { - LOG(ERROR) << "Trx map local shared memory failed, local shm name=" << local_trx_shm->name; - ShmLocalFree(local_trx_shm); - UBRingManager::ReleaseUbrTrxFromMgr(_trx); - return rc; - } - ((UbrDataStatusQMsg *)_trx->ubr_tx.local_data_status_q.addr)->timeout = FLAGS_ub_connect_timeout; - _trx->ubr_rx.capacity = (uint32_t)(_trx->ubr_rx.local_data_q.len / UBR_MSG_LEN); - rc = UBRingManager::GetHlcDealMsgMaxCnt(_trx->ubr_rx.capacity, &_trx->ubr_rx.deal_msg_max_cnt); - if (rc != HLC_OK) { - LOG(ERROR) << "Get hlc deal msg max cnt, local shm name=" << local_trx_shm->name; - ShmLocalFree(local_trx_shm); - UBRingManager::ReleaseUbrTrxFromMgr(_trx); - return rc; - } - return HLC_OK; -} - -RETURN_CODE UBRing::ApplyAndMapRemoteShm(SHM *remote_trx_shm) -{ - RETURN_CODE rc = ShmRemoteMalloc(remote_trx_shm); - if (UNLIKELY(rc != HLC_OK)) { - LOG(ERROR) << "Trx apply remote shared memory failed."; - return rc; - } - rc = UbrTrxMapRemoteShm(remote_trx_shm); - if (UNLIKELY(rc != HLC_OK)) { - LOG(ERROR) << "Trx map shared memory failed."; - ShmRemoteFree(remote_trx_shm); - return rc; - } - _trx->ubr_tx.capacity = (uint32_t)(_trx->ubr_tx.remote_data_q.len / UBR_MSG_LEN); - return HLC_OK; -} - -RETURN_CODE UBRing::WritevHasEnoughSpace(size_t buf_len) -{ - UbrDataStatusQMsg *data_status_msg = (UbrDataStatusQMsg *)_trx->ubr_tx.local_data_status_q.addr; - uint32_t cap = _trx->ubr_tx.capacity; - uint32_t tail = data_status_msg->tail; - uint32_t remain_chunk_num = - (_trx->ubr_tx.write_pos > tail) ? (tail + cap - _trx->ubr_tx.write_pos) : (tail - _trx->ubr_tx.write_pos); - uint32_t need_msg_chunk_num = CalcUbrMsgChunkCnt((uint32_t)buf_len); - if (remain_chunk_num < need_msg_chunk_num) { - return HLC_RETRY; - } - return HLC_OK; -} - -RETURN_CODE UBRing::UbrClearResourceCheck(UbrTrx *trx, uint64_t start_time, UbrCloseType close_type) -{ - if (UNLIKELY(trx == NULL)) { - LOG(ERROR) << "Trx close failed, trx is null."; - return HLC_ERR; - } - - UbrEventQMsg *local_tx_event_q = (UbrEventQMsg *)trx->ubr_tx.local_tx_event_q.addr; - while (ATOMIC_LOAD(trx->close_cnt) == 1 && local_tx_event_q->flag == UBR_STATE_CLOSING) { - if (HasTimedOut(start_time, FLAGS_ub_disconnect_timeout) != HLC_OK) { - LOG(ERROR) << "Trx close failed, wait close time out."; - break; - } - usleep(1); - } - int first_clear_expected = UBR_CLOSE_FIRST; - int second_clear_expected = UBR_CLOSE_SECOND; - if (local_tx_event_q->flag == UBR_STATE_CLOSING) { - if (ATOMIC_COMPARE_EXCHANGE_STRONG(trx->close_state, first_clear_expected, UBR_CLOSE_SECOND)) { - LOG(ERROR) << "Trx close, exist process is closing, name=" << trx->local_shm.name; - return HLC_REENTRY; - } else if (ATOMIC_COMPARE_EXCHANGE_STRONG(trx->close_state, second_clear_expected, UBR_CLOSE_END)) { - local_tx_event_q->flag = UBR_STATE_CLOSED; - trx->ubr_tx.trx_state = UBR_STATE_CLOSED; - } - } - - if (close_type == UBR_SEND_CLOSE) { - DeleteTimerSafe((uint32_t)trx->timer_fd); - } else { - DeleteTimer((uint32_t)trx->timer_fd); - } - DeleteTimerSafe((uint32_t)trx->hb_timer_fd); - return HLC_OK; -} - -RETURN_CODE UBRing::ClearTrxResource(UbrTrx *trx, uint64_t start_time, UbrCloseType close_type, int op) -{ - UbrEventQMsg *local_tx_event_q = (UbrEventQMsg *)trx->ubr_tx.local_tx_event_q.addr; - RETURN_CODE rc = UbrClearResourceCheck(trx, start_time, close_type); - if (rc != HLC_OK) { - return rc; - } - - rc = UbrAddAsynClearTimer(trx); - if (rc != HLC_OK) { - LOG(ERROR) << "Trx close, add " << trx->local_shm.name << " close clear timer failed."; - return HLC_ERR; - } - - return HLC_OK; -} - -RETURN_CODE UBRing::UbrTrxCloseCheck(UbrTrx *trx) -{ - if (UNLIKELY(trx == NULL)) { - LOG(ERROR) << "Trx close failed, client trx is null."; - return HLC_ERR; - } - int expected = MAX_CLOSE_COUNT; - if (!ATOMIC_COMPARE_EXCHANGE_STRONG(trx->close_cnt, expected, MAX_CLOSE_COUNT - 1)) { - LOG(ERROR) << "Trx close failed, exist other close acquire, trx local name=" << trx->local_shm.name; - return HLC_ERR; - } - - if (UNLIKELY(trx->ubr_tx.local_tx_event_q.addr == NULL)) { - LOG(ERROR) << "Trx close failed, local_tx_event_q addr is NULL, trx local name=" << trx->local_shm.name; - return HLC_ERR; - } - return HLC_OK; -} - -ssize_t UBRing::StartReadv(UbrTrx *trx, const struct iovec *iov, int iovcnt, size_t remain_buf_len) -{ - ssize_t total_recv_len = 0; - int iov_index = 0; - size_t iov_pos = 0; - UbrMsgFormat *data_msg = (UbrMsgFormat *)trx->ubr_rx.local_data_q.addr; - bool not_eof_encountered = true; - while (not_eof_encountered && remain_buf_len > 0) { - if (UNLIKELY(CheckTrxRecvPreCheck(trx) != HLC_OK)) { - return HLC_ERR; - } - UbrMsgFormat *current_chunk = &data_msg[trx->ubr_rx.read_pos]; - uint8_t flag = current_chunk->header[UBR_MSG_FLAG_INDEX]; - if (flag == UBR_MSG_CHUNK_NONE) { - continue; - } - if (flag == UBR_MSG_CHUNK_EOF) { - not_eof_encountered = false; - } - uint8_t chunk_msg_len = current_chunk->header[UBR_MSG_LEN_INDEX]; - uint8_t cur_index = current_chunk->header[UBR_MSG_CUR_INDEX]; - uint8_t recv_len = - remain_buf_len > (size_t)(chunk_msg_len - cur_index) ? (chunk_msg_len - cur_index) : (uint8_t)remain_buf_len; - while (iov_index < iovcnt && recv_len > 0) { - size_t copy_len = - recv_len > (iov[iov_index].iov_len - iov_pos) ? iov[iov_index].iov_len - iov_pos : (size_t)recv_len; - memcpy((uint8_t *)iov[iov_index].iov_base + iov_pos, current_chunk->payload.inner + cur_index, copy_len); - recv_len -= (uint8_t)copy_len; - iov_pos += copy_len; - cur_index += (uint8_t)copy_len; - if (iov_pos == iov[iov_index].iov_len) { - iov_index++; - iov_pos = 0; - } - remain_buf_len -= copy_len; - total_recv_len += (ssize_t)copy_len; - } - current_chunk->header[UBR_MSG_CUR_INDEX] = cur_index; - if (current_chunk->header[UBR_MSG_CUR_INDEX] == chunk_msg_len) { - current_chunk->header[UBR_MSG_FLAG_INDEX] = UBR_MSG_CHUNK_NONE; - UpdateDataQTail(trx); - trx->ubr_rx.read_pos = (trx->ubr_rx.read_pos + 1) % trx->ubr_rx.capacity; - } - } - return total_recv_len; -} -} // namespace ub -} // namespace brpc \ No newline at end of file diff --git a/src/brpc/ub/ub_ring_manager.cpp b/src/brpc/ub/ub_ring_manager.cpp deleted file mode 100644 index 9ef3d25b60..0000000000 --- a/src/brpc/ub/ub_ring_manager.cpp +++ /dev/null @@ -1,263 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include "brpc/ub/ub_ring_manager.h" -#include "butil/logging.h" - -namespace brpc { -namespace ub { -DEFINE_int32(ubr_max_managed_num, 1024, "maximum number of managed ubring"); -DEFINE_int32(tail_update_after_read, 8, "Position of the tail update after the read"); - -UbrMgr UBRingManager::g_ubr_mgr; -UbrLinkInfoMgr UBRingManager::g_link_info_mgr; -pthread_mutex_t UBRingManager::g_ubr_trx_mgr_mtx = PTHREAD_MUTEX_INITIALIZER; -pthread_mutex_t UBRingManager::g_ubr_listener_mgr_mtx = PTHREAD_MUTEX_INITIALIZER; -pthread_mutex_t UBRingManager::g_link_info_mgr_mtx = PTHREAD_MUTEX_INITIALIZER; - -uint64_t g_ubr_trx_num = 0; -uint64_t g_ub_event_cnt = 0; -uint64_t g_ubr_listener_num = 0; - -RETURN_CODE UBRingManager::GetHlcDealMsgMaxCnt(const uint32_t capacity, uint32_t *deal_msg_max_cnt) { - if (UNLIKELY(deal_msg_max_cnt == NULL)) { - LOG(ERROR) << "Get update factor failed, dealMsgMaxCnt is null."; - return HLC_ERR; - } - if (UNLIKELY(FLAGS_tail_update_after_read == 0)) { - LOG(ERROR) << "Get update factor failed, factor is 0."; - return HLC_ERR; - } - *deal_msg_max_cnt = capacity / FLAGS_tail_update_after_read; - return HLC_OK; -} - -RETURN_CODE UBRingManager::UbrMgrDefault() -{ - g_ubr_mgr.trx_num = 0; - g_ubr_mgr.trx_cap = FLAGS_ubr_max_managed_num; - g_ubr_mgr.trx_mgr_unit_status = NULL; - g_ubr_mgr.trx_mgr = NULL; - return HLC_OK; -} - -RETURN_CODE UBRingManager::UbrMgrInit() { - RETURN_CODE rc = UbrMgrDefault(); - if (UNLIKELY(rc != HLC_OK)) { - LOG(ERROR) << "Ubr manager set default values failed."; - return rc; - } - - size_t trx_mgr_size = g_ubr_mgr.trx_cap * sizeof(UbrTrx); - g_ubr_mgr.trx_mgr = (UbrTrx *)malloc(trx_mgr_size); - size_t trx_mgr_status_size = g_ubr_mgr.trx_cap * sizeof(UbrMgrUnitStatus); - g_ubr_mgr.trx_mgr_unit_status = (UbrMgrUnitStatus *)malloc(trx_mgr_status_size); - if (UNLIKELY(g_ubr_mgr.trx_mgr == NULL || - g_ubr_mgr.trx_mgr_unit_status == NULL)) { - LOG(ERROR) << "Ubr manager memory allocation failed."; - UbrMgrFini(); - return HLC_ERR; - } - - memset(g_ubr_mgr.trx_mgr, 0, trx_mgr_size); - memset(g_ubr_mgr.trx_mgr_unit_status, UBR_MGR_UNIT_FREE, trx_mgr_status_size); - LinkInfoInit(); - return HLC_OK; - return UBR_NOT_CONNECTED; -} - -void UBRingManager::UbrMgrFini() { - { - LOCK_GUARD(g_ubr_trx_mgr_mtx); - FREE_PTR(g_ubr_mgr.trx_mgr); - FREE_PTR(g_ubr_mgr.trx_mgr_unit_status); - } - { - LOCK_GUARD(g_ubr_listener_mgr_mtx); - } - g_ubr_mgr.trx_num = 0; - g_ubr_mgr.trx_cap = 0; - LinkInfoFini(); -} - -RETURN_CODE UBRingManager::AcquireUbrTrxFromMgr(UbrTrx **trx) { - if (UNLIKELY(trx == NULL)) { - LOG(ERROR) << "Acquire trx failed, trx is null."; - return HLC_ERR; - } - - if (UNLIKELY(g_ubr_mgr.trx_mgr == NULL)) { - LOG(ERROR) << "Acquire trx failed, trxMgr is null."; - return HLC_ERR; - } - - LOCK_GUARD(g_ubr_trx_mgr_mtx); - if (g_ubr_mgr.trx_num >= g_ubr_mgr.trx_cap) { - LOG(ERROR) << "Acquire trx failed, trx number is full."; - return HLC_ERR; - } - - for (uint32_t i = 0; i < g_ubr_mgr.trx_cap; ++i) { - if (g_ubr_mgr.trx_mgr_unit_status[i] == UBR_MGR_UNIT_FREE) { - memset(&g_ubr_mgr.trx_mgr[i], 0, sizeof(UbrTrx)); - g_ubr_mgr.trx_mgr_unit_status[i] = UBR_MGR_UNIT_USED; - *trx = &g_ubr_mgr.trx_mgr[i]; - (*trx)->trx_mgr_index = i; - (*trx)->ubr_id = g_ubr_trx_num; - (*trx)->close_state = UBR_CLOSE_FIRST; - (*trx)->close_cnt = MAX_CLOSE_COUNT; - ++g_ubr_mgr.trx_num; - ++g_ubr_trx_num; - return HLC_OK; - } - } - LOG(ERROR) << "Acquire trx failed, no available space."; - return HLC_ERR; -} - -RETURN_CODE UBRingManager::ReleaseUbrTrxFromMgr(UbrTrx *trx) { - if (UNLIKELY(trx == NULL)) { - LOG(ERROR) << "Release trx failed, trx is null."; - return HLC_ERR; - } - - trx->local_shm.addr = NULL; - trx->ubr_tx.local_tx_event_q.addr = NULL; - trx->ubr_tx.local_data_status_q.addr = NULL; - trx->ubr_rx.local_rx_event_q.addr = NULL; - trx->ubr_rx.remote_data_status_q.addr = NULL; - if (UNLIKELY(g_ubr_mgr.trx_mgr == NULL)) { - LOG(ERROR) << "Release trx failed, trxMgr is null."; - return HLC_ERR; - } - - LOCK_GUARD(g_ubr_trx_mgr_mtx); - if (g_ubr_mgr.trx_num == 0) { - LOG(ERROR) << "Release trx failed, trx number is 0."; - return HLC_ERR; - } - - uint32_t idx = trx->trx_mgr_index; - if (g_ubr_mgr.trx_mgr_unit_status[idx] == UBR_MGR_UNIT_FREE) { - LOG(ERROR) << "Release trx failed, trx is not in manager."; - return HLC_ERR; - } - g_ubr_mgr.trx_mgr_unit_status[idx] = UBR_MGR_UNIT_FREE; - --g_ubr_mgr.trx_num; - return HLC_OK; -} - -void UBRingManager::LinkInfoInit(void) { - - size_t link_info_mgr_size = FLAGS_ubr_max_managed_num * sizeof(UbrLinkInfo); - g_link_info_mgr.all_link_info = (UbrLinkInfo*) malloc(link_info_mgr_size); - if (g_link_info_mgr.all_link_info == NULL) { - LOG(ERROR) << "allLinkInfo is NULL"; - LinkInfoFini(); - return; - } - - g_link_info_mgr.link_mgr_unit_status = (UbrMgrUnitStatus*) malloc(link_info_mgr_size); - if (g_link_info_mgr.link_mgr_unit_status == NULL) { - LinkInfoFini(); - return; - } - - memset(g_link_info_mgr.all_link_info, 0, link_info_mgr_size); - memset(g_link_info_mgr.link_mgr_unit_status, 0, link_info_mgr_size); -} - -void UBRingManager::LinkInfoFini(void) { - if (g_link_info_mgr.link_mgr_unit_status == NULL || g_link_info_mgr.all_link_info == NULL) { - LOG(ERROR) << "LinkInfo is NULL"; - return; - } - { - LOCK_GUARD(g_link_info_mgr_mtx); - FREE_PTR(g_link_info_mgr.all_link_info); - FREE_PTR(g_link_info_mgr.link_mgr_unit_status); - } - - g_link_info_mgr.link_num = 0; -} - -void UBRingManager::AcquireLinkInfoToMgr(const char *listener_name, UbrTrx *trx) { - if (listener_name == NULL || trx == NULL) { - LOG(ERROR) << "LinkInfo acquire fail."; - return; - } - - if (g_link_info_mgr.link_mgr_unit_status == NULL || g_link_info_mgr.all_link_info == NULL) { - LOG(ERROR) << "LinkInfo is NULL."; - return; - } - uint32_t ubr_index = trx->trx_mgr_index; - char* connect_name = trx->local_shm.name; - if (g_link_info_mgr.link_mgr_unit_status[ubr_index] == UBR_MGR_UNIT_FREE) { - strncpy(g_link_info_mgr.all_link_info[ubr_index].connect_name, - connect_name, SHM_MAX_NAME_BUFF_LEN); - strncpy(g_link_info_mgr.all_link_info[ubr_index].listener_name, - listener_name, SHM_MAX_NAME_BUFF_LEN); - g_link_info_mgr.link_mgr_unit_status[ubr_index] = UBR_MGR_UNIT_USED; - g_link_info_mgr.link_num++; - } -} - -void UBRingManager::ReleaseLinkInfoFromMgr(UbrTrx *trx) { - if (trx == NULL || g_link_info_mgr.link_mgr_unit_status == NULL) { - LOG(ERROR) << "LinkInfo release fail."; - return; - } - - if (g_link_info_mgr.link_mgr_unit_status[trx->trx_mgr_index] == UBR_MGR_UNIT_FREE) { - LOG(ERROR) << "Release linkInfo failed, trx is not in manager."; - return; - } - g_link_info_mgr.link_mgr_unit_status[trx->trx_mgr_index] = UBR_MGR_UNIT_FREE; - g_link_info_mgr.link_num--; -} - -int32_t UBRingManager::UbEventCallback(const char *shm_name) -{ - if (UNLIKELY(shm_name == NULL)) { - LOG(ERROR) << "Ub event callback failed, shm name is null."; - return HLC_ERR; - } - if (UNLIKELY(g_ubr_mgr.trx_mgr == NULL)) { - LOG(ERROR) << "Ub event callback failed, trx mgr is null."; - return HLC_ERR; - } - LOG(DEBUG) << "Ub event callback is processing. shm_name=" << shm_name; - - for (uint32_t i = 0; i < g_ubr_mgr.trx_cap; ++i) { - if (g_ubr_mgr.trx_mgr_unit_status[i] == UBR_MGR_UNIT_FREE) { - continue; - } - - if (strcmp(g_ubr_mgr.trx_mgr[i].local_shm.name, shm_name) == 0 || - strcmp(g_ubr_mgr.trx_mgr[i].remote_shm.name, shm_name) == 0) { - ++g_ub_event_cnt; - int fd = (int)g_ubr_mgr.trx_mgr[i].local_shm.fd; - LOG(INFO) << "Ub event callback, the fd of the faulty link is " << fd; - return UBRing::UbrPassiveClearTrx(&g_ubr_mgr.trx_mgr[i], fd, UBR_UB_EVENT); - } - } - return HLC_ERR; -} -} -} \ No newline at end of file diff --git a/src/brpc/ub_transport.cpp b/src/brpc/ub_transport.cpp index 937030ba83..80f9c9eb9d 100644 --- a/src/brpc/ub_transport.cpp +++ b/src/brpc/ub_transport.cpp @@ -19,8 +19,8 @@ #include "brpc/ub_transport.h" #include "brpc/tcp_transport.h" -#include "brpc/ub/ub_endpoint.h" -#include "brpc/ub/ub_helper.h" +#include "brpc/ubring/ub_endpoint.h" +#include "brpc/ubring/ub_helper.h" namespace brpc { DECLARE_bool(usercode_in_coroutine); @@ -31,7 +31,7 @@ extern SocketVarsCollector *g_vars; void UBShmTransport::Init(Socket *socket, const SocketOptions &options) { CHECK(_ub_ep == NULL); if (options.socket_mode == SOCKET_MODE_UBRING) { - _ub_ep = new(std::nothrow)ub::UBShmEndpoint(socket); + _ub_ep = new(std::nothrow)ubring::UBShmEndpoint(socket); if (!_ub_ep) { const int saved_errno = errno; PLOG(ERROR) << "Fail to create UBShmEndpoint"; @@ -47,7 +47,7 @@ void UBShmTransport::Init(Socket *socket, const SocketOptions &options) { _default_connect = options.app_connect; _on_edge_trigger = options.on_edge_triggered_events; if (options.need_on_edge_trigger && _on_edge_trigger == NULL) { - _on_edge_trigger = ub::UBShmEndpoint::OnNewDataFromTcp; + _on_edge_trigger = ubring::UBShmEndpoint::OnNewDataFromTcp; } _tcp_transport = std::unique_ptr(new TcpTransport()); _tcp_transport->Init(socket, options); @@ -71,7 +71,7 @@ int UBShmTransport::Reset(int32_t expected_nref) { std::shared_ptr UBShmTransport::Connect() { if (_default_connect == nullptr) { - return std::make_shared(); + return std::make_shared(); } return _default_connect; } @@ -134,7 +134,7 @@ void UBShmTransport::ProcessEvent(bthread_attr_t attr) { bthread_t tid; if (FLAGS_usercode_in_coroutine) { OnEdge(_socket); - } else if (ub::FLAGS_ub_edisp_unsched == false) { + } else if (ubring::FLAGS_ub_edisp_unsched == false) { auto rc = bthread_start_background(&tid, &attr, OnEdge, _socket); if (rc != 0) { LOG(FATAL) << "Fail to start ProcessEvent"; @@ -156,7 +156,7 @@ void UBShmTransport::QueueMessage(InputMessageClosure& input_msg, return; } - if (ub::FLAGS_ub_disable_bthread) { + if (ubring::FLAGS_ub_disable_bthread) { ProcessInputMessage(to_run_msg); return; } @@ -187,16 +187,16 @@ int UBShmTransport::ContextInitOrDie(bool serverOrNot, const void* _options) { if (!OptionsAvailableOverUB(static_cast(_options))) { return -1; } - ub::GlobalUBInitializeOrDie(); - if (!ub::InitPollingModeWithTag(static_cast(_options)->bthread_tag)) { + ubring::GlobalUBInitializeOrDie(); + if (!ubring::InitPollingModeWithTag(static_cast(_options)->bthread_tag)) { return -1; } } else { if (!OptionsAvailableForUB(static_cast(_options))) { return -1; } - ub::GlobalUBInitializeOrDie(); - if (!ub::InitPollingModeWithTag(bthread_self_tag())) { + ubring::GlobalUBInitializeOrDie(); + if (!ubring::InitPollingModeWithTag(bthread_self_tag())) { return -1; } return 0; @@ -210,7 +210,7 @@ bool UBShmTransport::OptionsAvailableForUB(const ChannelOptions* opt) { LOG(WARNING) << "Cannot use SSL and UB at the same time"; return false; } - if (!ub::SupportedByUB(opt->protocol.name())) { + if (!ubring::SupportedByUB(opt->protocol.name())) { LOG(WARNING) << "Cannot use " << opt->protocol.name() << " over UB"; return false; diff --git a/src/brpc/ub_transport.h b/src/brpc/ub_transport.h index 49403c172a..7119a96ac5 100644 --- a/src/brpc/ub_transport.h +++ b/src/brpc/ub_transport.h @@ -25,8 +25,8 @@ namespace brpc { class UBShmTransport : public Transport { friend class TransportFactory; - friend class ub::UBShmEndpoint; - friend class ub::UBConnect; + friend class ubring::UBShmEndpoint; + friend class ubring::UBConnect; public: void Init(Socket* socket, const SocketOptions& options) override; void Release() override; @@ -38,7 +38,7 @@ namespace brpc { void ProcessEvent(bthread_attr_t attr) override; void QueueMessage(InputMessageClosure& inputMsg, int* num_bthread_created, bool last_msg) override; void Debug(std::ostream &os) override; - ub::UBShmEndpoint* GetUBShmEp() { + ubring::UBShmEndpoint* GetUBShmEp() { CHECK(_ub_ep != NULL); return _ub_ep; } @@ -54,7 +54,7 @@ namespace brpc { UB_UNKNOWN }; // The UBShmEndpoint - ub::UBShmEndpoint* _ub_ep = NULL; + ubring::UBShmEndpoint* _ub_ep = NULL; // Should use UB or not UBState _ub_state; std::shared_ptr _tcp_transport; diff --git a/src/brpc/ub/common/common.h b/src/brpc/ubring/common/common.h similarity index 91% rename from src/brpc/ub/common/common.h rename to src/brpc/ubring/common/common.h index f2c185b109..b2ab945b7f 100644 --- a/src/brpc/ub/common/common.h +++ b/src/brpc/ubring/common/common.h @@ -33,12 +33,13 @@ #ifdef UT #define STATIC #define INLINE -#define HLC_STATISTICS_PATH ROOT_PATH "/hlc/run" +#define UBRING_STATISTICS_PATH ROOT_PATH "/ubring/run" #else #define STATIC static #define INLINE inline -#define HLC_STATISTICS_PATH "/opt/hlc/run" +#define UBRING_STATISTICS_PATH "/opt/ubring/run" #endif + #ifdef __cplusplus #include using AtomicInt = std::atomic; @@ -126,11 +127,11 @@ static inline uint64_t GetCurNanoSeconds(void) } while (0) typedef enum { - HLC_OK = 0, - HLC_ERR = -1, - HLC_RETRY = -2, - HLC_REENTRY = -3, - HLC_ERR_TIMEOUT = -4, + UBRING_OK = 0, + UBRING_ERR = -1, + UBRING_RETRY = -2, + UBRING_REENTRY = -3, + UBRING_ERR_TIMEOUT = -4, // SHM Module SHM_ERR = -100, SHM_ERR_INPUT_INVALID = -101, @@ -163,13 +164,13 @@ static inline size_t Aligned64Offset(uint8_t *addr) return ((ALIGN_BYTES - (((size_t)(addr)) & CHECKED_ALIGN_BITS)) & CHECKED_ALIGN_BITS); } -static inline RETURN_CODE HasTimedOut(const uint64_t start_time, const uint32_t timeout) +static inline RETURN_CODE HasTimedOut(const uint64_t startTime, const uint32_t timeout) { - uint64_t end_time = start_time + (uint64_t)timeout * SEC_TO_NSEC; - if (GetCurNanoSeconds() > end_time) { + uint64_t endTime = startTime + (uint64_t)timeout * SEC_TO_NSEC; + if (GetCurNanoSeconds() > endTime) { LOG(ERROR) << "task time out " << timeout << " seconds."; - return HLC_ERR; + return UBRING_ERR; } - return HLC_OK; + return UBRING_OK; } #endif //BRPC_COMMON_H \ No newline at end of file diff --git a/src/brpc/ubring/common/thread_lock.h b/src/brpc/ubring/common/thread_lock.h new file mode 100644 index 0000000000..07368daa57 --- /dev/null +++ b/src/brpc/ubring/common/thread_lock.h @@ -0,0 +1,118 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_THREAD_LOCK_H +#define BRPC_THREAD_LOCK_H +#include +#include +#include +#include +#include +#include "brpc/ubring/common/common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static inline void UnlockMutex(pthread_mutex_t **mtx) +{ + if (LIKELY(mtx != NULL && *mtx != NULL)) { + pthread_mutex_unlock(*mtx); + } else { + LOG(ERROR) << "Invalid input for mtx."; + } +} + +#define LOCK_GUARD(mtxPtr) \ + pthread_mutex_t *__attribute__((cleanup(UnlockMutex))) _mtxPtr = ({ \ + pthread_mutex_lock(&(mtxPtr)); \ + &(mtxPtr); \ + }) + +static inline void UnlockSpinLock(pthread_spinlock_t **spinLock) +{ + if (LIKELY(spinLock != NULL && *spinLock != NULL)) { + pthread_spin_unlock(*spinLock); + } else { + LOG(ERROR) << "Invalid input for spinLock."; + } +} + +#define SPIN_LOCK_GUARD(spinLockPtr) \ + pthread_spinlock_t *__attribute__((cleanup(UnlockSpinLock))) _spinLockPtr = ({ \ + pthread_spin_lock(&(spinLockPtr)); \ + &(spinLockPtr); \ + }) + +static inline void UnlockRWLock(pthread_rwlock_t **rwLock) +{ + if (LIKELY(rwLock != NULL && *rwLock != NULL)) { + pthread_rwlock_unlock(*rwLock); + } else { + LOG(ERROR) << "Invalid input for rwLock."; + } +} + +#define R_LOCK_GUARD(readLockPtr) \ + pthread_rwlock_t *__attribute__((cleanup(UnlockRWLock))) _readLockPtr = ({ \ + pthread_rwlock_rdlock(&(readLockPtr)); \ + &(readLockPtr); \ + }) + +#define W_LOCK_GUARD(writeLockPtr) \ + pthread_rwlock_t *__attribute__((cleanup(UnlockRWLock))) _writeLockPtr = ({ \ + pthread_rwlock_wrlock(&(writeLockPtr)); \ + &(writeLockPtr); \ + }) + +static inline void PostSemWithClose(sem_t **sem) +{ + if (LIKELY(sem != NULL && *sem != NULL)) { + sem_post(*sem); + sem_close(*sem); + *sem = NULL; + sem = NULL; + } else { + LOG(ERROR) << "Invalid input for semaphore."; + } +} + +static inline void PostSem(sem_t **sem) +{ + if (LIKELY(sem != NULL && *sem != NULL)) { + sem_post(*sem); + } else { + LOG(ERROR) << "Invalid input for semaphore."; + } +} + +#define SEMAPHORE_WAIT_GUARD_WITH_CLOSE(semPtr) \ + sem_t *__attribute__((cleanup(PostSemWithClose))) _semPtr = ({ \ + sem_wait(semPtr); \ + semPtr; \ + }) + +#define SEMAPHORE_WAIT_GUARD(semPtr) \ + sem_t *__attribute__((cleanup(PostSem))) _semPtr = ({ \ + sem_wait(semPtr); \ + semPtr; \ + }) + +#ifdef __cplusplus +} +#endif +#endif //BRPC_THREAD_LOCK_H \ No newline at end of file diff --git a/src/brpc/ubring/rack_mem/declare_shm_ubs.h.tmpl b/src/brpc/ubring/rack_mem/declare_shm_ubs.h.tmpl new file mode 100644 index 0000000000..79dd2dbaf3 --- /dev/null +++ b/src/brpc/ubring/rack_mem/declare_shm_ubs.h.tmpl @@ -0,0 +1,40 @@ +#ifndef UBRING_MK_UBSM +#error Do not include this file unless you know what you are doing. +#endif + +#ifndef UBRING_MK_UBSM_OPTIONAL +#define UBRING_MK_UBSM_OPTIONAL UBRING_MK_UBSM +#endif + +UBRING_MK_UBSM(int, ubsmem_init_attributes, (ubsmem_options_t *ubsm_shmem_opts)); + +UBRING_MK_UBSM(int, ubsmem_initialize, (const ubsmem_options_t *ubsm_shmem_opts)); + +UBRING_MK_UBSM(int, ubsmem_finalize, (void)); + +UBRING_MK_UBSM(int, ubsmem_set_logger_level, (int level)); + +UBRING_MK_UBSM(int, ubsmem_set_extern_logger, (void (*func)(int level, const char *msg))); + +UBRING_MK_UBSM(int, ubsmem_lookup_regions, (ubsmem_regions_t* regions)); + +UBRING_MK_UBSM(int, ubsmem_create_region, (const char *region_name, size_t size, const ubsmem_region_attributes_t *reg_attr)); + +UBRING_MK_UBSM(int, ubsmem_destroy_region, (const char *region_name)); + +UBRING_MK_UBSM(int, ubsmem_shmem_allocate,(const char *region_name, const char *name, size_t size, mode_t mode, + uint64_t flags)); + +UBRING_MK_UBSM(int, ubsmem_shmem_deallocate, (const char *name)); + +UBRING_MK_UBSM(int, ubsmem_shmem_map, (void *addr, size_t length, int prot, int flags, const char *name, off_t offset, + void **local_ptr)); + +UBRING_MK_UBSM(int, ubsmem_shmem_unmap, (void *local_ptr, size_t length)); + +UBRING_MK_UBSM(int, ubsmem_shmem_faults_register, (shmem_faults_func registerFunc)); + +UBRING_MK_UBSM(int, ubsmem_local_nid_query, (uint32_t *nid)); + +#undef UBRING_MK_UBSM_OPTIONAL +#undef UBRING_MK_UBSM \ No newline at end of file diff --git a/src/brpc/ub/rack_mem/ubs_mem.h b/src/brpc/ubring/rack_mem/ubs_mem.h similarity index 98% rename from src/brpc/ub/rack_mem/ubs_mem.h rename to src/brpc/ubring/rack_mem/ubs_mem.h index 6466dba67f..66069c6e9c 100644 --- a/src/brpc/ub/rack_mem/ubs_mem.h +++ b/src/brpc/ubring/rack_mem/ubs_mem.h @@ -192,10 +192,10 @@ SHMEM_API int ubsmem_lookup_cluster_statistic(ubsmem_cluster_info_t *info); /** * Subscribes to shared memory UB Event. - * @param register_func - Shared Memory UB Event Response Handling Function. + * @param registerFunc - Shared Memory UB Event Response Handling Function. * @return - 0 on success and other on failure */ -SHMEM_API int ubsmem_shmem_faults_register(shmem_faults_func register_func); +SHMEM_API int ubsmem_shmem_faults_register(shmem_faults_func registerFunc); /** * Query the supernode ID of this node within the supernode domain. diff --git a/src/brpc/ub/rack_mem/ubs_mem_def.h b/src/brpc/ubring/rack_mem/ubs_mem_def.h similarity index 100% rename from src/brpc/ub/rack_mem/ubs_mem_def.h rename to src/brpc/ubring/rack_mem/ubs_mem_def.h diff --git a/src/brpc/ub/rack_mem/ubshmem_stub.cpp b/src/brpc/ubring/rack_mem/ubshmem_stub.cpp similarity index 95% rename from src/brpc/ub/rack_mem/ubshmem_stub.cpp rename to src/brpc/ubring/rack_mem/ubshmem_stub.cpp index ce01694a19..f0eaf29f8e 100644 --- a/src/brpc/ub/rack_mem/ubshmem_stub.cpp +++ b/src/brpc/ubring/rack_mem/ubshmem_stub.cpp @@ -53,7 +53,7 @@ int ubsmem_set_extern_logger(void (*func)(int level, const char *msg)) return UBSM_OK; } -int ubsmem_lookup_regions(ubsmem_regions_t *regions) +int ubsmem_lookup_regions(ubsmem_regions_t* regions) { regions->num = 1; regions->region[0].host_num = 1; @@ -96,7 +96,7 @@ int ubsmem_shmem_unmap(void *local_ptr, size_t length) return UBSM_OK; } -int ubsmem_shmem_faults_register(shmem_faults_func register_func) +int ubsmem_shmem_faults_register(shmem_faults_func registerFunc) { return UBSM_OK; } diff --git a/src/brpc/ub/shm/shm_def.h b/src/brpc/ubring/shm/shm_def.h similarity index 96% rename from src/brpc/ub/shm/shm_def.h rename to src/brpc/ubring/shm/shm_def.h index 5be35a37d4..230943877e 100644 --- a/src/brpc/ub/shm/shm_def.h +++ b/src/brpc/ubring/shm/shm_def.h @@ -35,13 +35,13 @@ #define SHM_ALLOC_UNIT_SIZE (4 * 1024 * 1024) // 4MB namespace brpc { - namespace ub { + namespace ubring { typedef enum { SHM_TYPE_UB, SHM_TYPE_IPC, SHM_TYPE_UBS, SHM_TYPE_UNSUPPORT } SHM_TYPE; typedef struct { uint8_t *addr; size_t len; - uint64_t mem_id; + uint64_t memid; char name[SHM_MAX_NAME_BUFF_LEN]; uint32_t fd; } SHM; @@ -56,7 +56,7 @@ namespace brpc { ShmListNode* head; ShmListNode* tail; size_t size; - pthread_mutex_t shm_lock; + pthread_mutex_t shmLock; } ShmList; } } diff --git a/src/brpc/ub/shm/shm_ipc.cpp b/src/brpc/ubring/shm/shm_ipc.cpp similarity index 92% rename from src/brpc/ub/shm/shm_ipc.cpp rename to src/brpc/ubring/shm/shm_ipc.cpp index a195ea5a8f..c5cfad9e4b 100644 --- a/src/brpc/ub/shm/shm_ipc.cpp +++ b/src/brpc/ubring/shm/shm_ipc.cpp @@ -23,12 +23,12 @@ #include #include #include -#include "brpc/ub/common/common.h" -#include "brpc/ub/shm/shm_def.h" -#include "brpc/ub/shm/shm_ipc.h" +#include "brpc/ubring/common/common.h" +#include "brpc/ubring/shm/shm_def.h" +#include "brpc/ubring/shm/shm_ipc.h" namespace brpc { -namespace ub { +namespace ubring { RETURN_CODE IpcShmLocalMalloc(SHM *shm) { int fd = shm_open(shm->name, O_CREAT | O_EXCL | O_RDWR, SHM_IPC_MODE); @@ -60,7 +60,7 @@ RETURN_CODE IpcShmLocalMalloc(SHM *shm) close(fd); LOG(DEBUG) << "IPC Create shm=" << shm->name << " length=" << shm->len << " success."; - return HLC_OK; + return UBRING_OK; } RETURN_CODE IpcShmMunmap(SHM *shm) @@ -71,13 +71,13 @@ RETURN_CODE IpcShmMunmap(SHM *shm) } int ret = munmap(shm->addr, shm->len); - if (ret != HLC_OK) { + if (ret != UBRING_OK) { LOG(ERROR) << "IPC unmap shm=" << shm->name << " failed, errno=" << errno; return SHM_ERR; } LOG(DEBUG) << "IPC unmap shm=" << shm->name << " length=" << shm->len << " success."; - return HLC_OK; + return UBRING_OK; } RETURN_CODE IpcShmFree(SHM *shm) @@ -89,7 +89,7 @@ RETURN_CODE IpcShmFree(SHM *shm) // free int ret = shm_unlink(shm->name); - if (ret != HLC_OK) { + if (ret != UBRING_OK) { if (errno == EBUSY) { LOG_EVERY_SECOND(ERROR) << "IPC free shm=" << shm->name << " failed, errno=" << errno; return SHM_ERR_RESOURCE_ATTACHED; @@ -99,7 +99,7 @@ RETURN_CODE IpcShmFree(SHM *shm) } shm->addr = NULL; LOG(DEBUG) << "IPC free shm=" << shm->name << " success."; - return HLC_OK; + return UBRING_OK; } RETURN_CODE IpcShmLocalFree(SHM *shm) @@ -110,12 +110,12 @@ RETURN_CODE IpcShmLocalFree(SHM *shm) } int ret = munmap(shm->addr, shm->len); - if (ret != HLC_OK) { + if (ret != UBRING_OK) { LOG(WARNING) << "IPC unmap shm=" << shm->name << " failed, ret=" << ret; } ret = shm_unlink(shm->name); - if (ret != HLC_OK) { + if (ret != UBRING_OK) { if (errno == EBUSY) { LOG_EVERY_SECOND(ERROR) << "IPC delete shm=" << shm->name << " failed, ret=" << ret; return SHM_ERR_RESOURCE_ATTACHED; @@ -125,7 +125,7 @@ RETURN_CODE IpcShmLocalFree(SHM *shm) } shm->addr = NULL; LOG(DEBUG) << "IPC free local shm=" << shm->name << " success."; - return HLC_OK; + return UBRING_OK; } RETURN_CODE IpcShmRemoteMalloc(SHM *shm) @@ -145,7 +145,7 @@ RETURN_CODE IpcShmRemoteMalloc(SHM *shm) close(fd); LOG(DEBUG) << "IPC malloc remote shm=" << shm->name << " length=" << shm->len << " success."; - return HLC_OK; + return UBRING_OK; } RETURN_CODE IpcShmLocalMmap(SHM *shm, int prot) @@ -165,7 +165,7 @@ RETURN_CODE IpcShmLocalMmap(SHM *shm, int prot) close(fd); LOG(DEBUG) << "IPC mmap remote shm=" << shm->name << " length=" << shm->len << " success."; - return HLC_OK; + return UBRING_OK; } RETURN_CODE IpcShmRemoteFree(SHM *shm) @@ -176,13 +176,13 @@ RETURN_CODE IpcShmRemoteFree(SHM *shm) } int ret = munmap(shm->addr, shm->len); - if (ret != HLC_OK) { + if (ret != UBRING_OK) { LOG(ERROR) << "IPC unmap shm=" << shm->name << " failed, ret=" << ret; return SHM_ERR; } LOG(DEBUG) << "IPC free remote shm=" << shm->name << " success."; - return HLC_OK; + return UBRING_OK; } } } \ No newline at end of file diff --git a/src/brpc/ub/shm/shm_ipc.h b/src/brpc/ubring/shm/shm_ipc.h similarity index 98% rename from src/brpc/ub/shm/shm_ipc.h rename to src/brpc/ubring/shm/shm_ipc.h index 058142951c..29d88307fe 100644 --- a/src/brpc/ub/shm/shm_ipc.h +++ b/src/brpc/ubring/shm/shm_ipc.h @@ -24,7 +24,7 @@ #define SHM_IPC_MODE 0666 namespace brpc { - namespace ub { + namespace ubring { RETURN_CODE IpcShmLocalMalloc(SHM *shm); RETURN_CODE IpcShmMunmap(SHM *shm); RETURN_CODE IpcShmFree(SHM *shm); diff --git a/src/brpc/ub/shm/shm_mgr.cpp b/src/brpc/ubring/shm/shm_mgr.cpp similarity index 90% rename from src/brpc/ub/shm/shm_mgr.cpp rename to src/brpc/ubring/shm/shm_mgr.cpp index 403f172f53..a295c3da2b 100644 --- a/src/brpc/ub/shm/shm_mgr.cpp +++ b/src/brpc/ubring/shm/shm_mgr.cpp @@ -21,13 +21,13 @@ #include #include #include -#include "brpc/ub/common/common.h" -#include "brpc/ub/shm/shm_ipc.h" -#include "brpc/ub/shm/shm_ubs.h" -#include "brpc/ub/shm/shm_mgr.h" +#include "brpc/ubring/common/common.h" +#include "brpc/ubring/shm/shm_ipc.h" +#include "brpc/ubring/shm/shm_ubs.h" +#include "brpc/ubring/shm/shm_mgr.h" namespace brpc { -namespace ub { +namespace ubring { DEFINE_int32(ub_shm_type, 1, "shm type: 1-ipc; 2-ub_ring"); static SHM_TYPE g_shmType; @@ -61,24 +61,24 @@ RETURN_CODE ShmMgrInit(void) { if (UNLIKELY(FLAGS_ub_shm_type >= (uint32_t)SHM_TYPE_UNSUPPORT)) { LOG(ERROR) << "Shm type config=" << FLAGS_ub_shm_type << " is not supported."; - return HLC_ERR; + return UBRING_ERR; } g_shmType = (SHM_TYPE)FLAGS_ub_shm_type; if (g_shmType == SHM_TYPE_UBS) { - if (UbsShmInit() != HLC_OK) { + if (UbsShmInit() != UBRING_OK) { LOG(ERROR) << "Init beiming ubs shm failed."; - return HLC_ERR; + return UBRING_ERR; } } LOG(DEBUG) << "shm mgr init success, shm type=" << g_shmType; - return HLC_OK; + return UBRING_OK; } void ShmMgrFini(void) { if (g_shmType == SHM_TYPE_UBS) { - if (UbsShmFini() != HLC_OK) { + if (UbsShmFini() != UBRING_OK) { LOG(ERROR) << "Fini beiming ubs shm failed."; return; } @@ -98,7 +98,7 @@ RETURN_CODE ShmLocalMalloc(SHM *shm) return SHM_ERR_INPUT_INVALID; } - RETURN_CODE rc = HLC_OK; + RETURN_CODE rc = UBRING_OK; switch (g_shmType) { case SHM_TYPE_IPC: rc = IpcShmLocalMalloc(shm); @@ -116,12 +116,12 @@ RETURN_CODE ShmLocalMalloc(SHM *shm) RETURN_CODE ShmLocalCalloc(SHM *shm) { RETURN_CODE rc = ShmLocalMalloc(shm); - if (UNLIKELY(rc != HLC_OK)) { + if (UNLIKELY(rc != UBRING_OK)) { LOG(ERROR) << "Failed to alloc local shm."; return rc; } memset(shm->addr, 0, shm->len); - return HLC_OK; + return UBRING_OK; } RETURN_CODE ShmLocalFree(SHM *shm) @@ -131,7 +131,7 @@ RETURN_CODE ShmLocalFree(SHM *shm) return SHM_ERR_INPUT_INVALID; } - RETURN_CODE rc = HLC_OK; + RETURN_CODE rc = UBRING_OK; switch (g_shmType) { case SHM_TYPE_IPC: rc = IpcShmLocalFree(shm); @@ -153,7 +153,7 @@ RETURN_CODE ShmRemoteMalloc(SHM *shm) return SHM_ERR_INPUT_INVALID; } - RETURN_CODE rc = HLC_OK; + RETURN_CODE rc = UBRING_OK; switch (g_shmType) { case SHM_TYPE_IPC: rc = IpcShmRemoteMalloc(shm); @@ -175,7 +175,7 @@ RETURN_CODE ShmRemoteFree(SHM *shm) return SHM_ERR_INPUT_INVALID; } - RETURN_CODE rc = HLC_OK; + RETURN_CODE rc = UBRING_OK; switch (g_shmType) { case SHM_TYPE_IPC: rc = IpcShmRemoteFree(shm); @@ -197,7 +197,7 @@ RETURN_CODE ShmLocalMmap(SHM *shm, int prot) return SHM_ERR_INPUT_INVALID; } - RETURN_CODE rc = HLC_OK; + RETURN_CODE rc = UBRING_OK; switch (g_shmType) { case SHM_TYPE_IPC: rc = IpcShmLocalMmap(shm, prot); @@ -219,7 +219,7 @@ RETURN_CODE ShmMunmap(SHM *shm) return SHM_ERR_INPUT_INVALID; } - RETURN_CODE rc = HLC_OK; + RETURN_CODE rc = UBRING_OK; switch (g_shmType) { case SHM_TYPE_IPC: rc = IpcShmMunmap(shm); @@ -241,7 +241,7 @@ RETURN_CODE ShmFree(SHM *shm) return SHM_ERR_INPUT_INVALID; } - RETURN_CODE rc = HLC_OK; + RETURN_CODE rc = UBRING_OK; switch (g_shmType) { case SHM_TYPE_IPC: rc = IpcShmFree(shm); diff --git a/src/brpc/ub/shm/shm_mgr.h b/src/brpc/ubring/shm/shm_mgr.h similarity index 93% rename from src/brpc/ub/shm/shm_mgr.h rename to src/brpc/ubring/shm/shm_mgr.h index 5ab6cfe886..e9afa086d2 100644 --- a/src/brpc/ub/shm/shm_mgr.h +++ b/src/brpc/ubring/shm/shm_mgr.h @@ -19,11 +19,11 @@ #define BRPC_SHM_MGR_H #include -#include "brpc/ub/common/common.h" -#include "brpc/ub/shm/shm_def.h" +#include "brpc/ubring/common/common.h" +#include "brpc/ubring/shm/shm_def.h" namespace brpc { -namespace ub { +namespace ubring { void SetShmType(SHM_TYPE type); RETURN_CODE ShmMgrInit(void); diff --git a/src/brpc/ub/shm/shm_ubs.cpp b/src/brpc/ubring/shm/shm_ubs.cpp similarity index 65% rename from src/brpc/ub/shm/shm_ubs.cpp rename to src/brpc/ubring/shm/shm_ubs.cpp index ef2a777cbb..f79185c54d 100644 --- a/src/brpc/ub/shm/shm_ubs.cpp +++ b/src/brpc/ubring/shm/shm_ubs.cpp @@ -22,102 +22,102 @@ #include #include #include -#include "brpc/ub/timer/timer_mgr.h" -#include "brpc/ub/common/thread_lock.h" -#include "brpc/ub/common/common.h" -#include "brpc/ub/shm/shm_def.h" -#include "brpc/ub/ub_ring_manager.h" -#include "brpc/ub/rack_mem/ubs_mem.h" -#include "brpc/ub/rack_mem/ubs_mem_def.h" +#include "brpc/ubring/timer/timer_mgr.h" +#include "brpc/ubring/common/thread_lock.h" +#include "brpc/ubring/common/common.h" +#include "brpc/ubring/shm/shm_def.h" +#include "brpc/ubring/ub_ring_manager.h" +#include "brpc/ubring/rack_mem/ubs_mem.h" +#include "brpc/ubring/rack_mem/ubs_mem_def.h" #ifdef UT #include "ubs_mem.h" #endif #include "shm_ubs.h" namespace brpc { -namespace ub { -#define HLC_MK_UBSM(ret, fn, args) ret (*fn) args = NULL -#include "brpc/ub/rack_mem/declare_shm_ubs.h.tmpl" +namespace ubring { +#define UBRING_MK_UBSM(ret, fn, args) ret (*fn) args = NULL +#include "brpc/ubring/rack_mem/declare_shm_ubs.h.tmpl" #define SHM_RIGHT_MODE 0666 -#define HLC_REGION_NAME_PREFIX "HlcONE2ALLRegion" +#define UBRING_REGION_NAME_PREFIX "UbrONE2ALLRegion" DEFINE_uint32(node_location, 1, "Location of the ub machine."); DEFINE_bool(shm_wr_delay_comp, true, "Indicates whether to enable the write relay." "0: relay; 1: non-relay."); DEFINE_int32(ub_flying_io_timeout, 1, "Waiting time for stopping data" "sending and receiving when the link is disconnected."); -char g_region_name[MAX_REGION_NAME_DESC_LENGTH] = {0}; -int g_shm_timer_fd = 0; -ShmList *g_shm_list = NULL; +char g_regionName[MAX_REGION_NAME_DESC_LENGTH] = {0}; +int g_shmTimerFd = 0; +ShmList *g_shmList = NULL; static RETURN_CODE UbsShmInterfacesLoad(void); char hostname[MAX_HOST_NAME_DESC_LENGTH]; RETURN_CODE UbsShmInterfacesLoad(void) { #ifndef UT - const char *ubsm_sdk_location = "/usr/local/ubs_mem/lib/libubsm_sdk.so"; - void* dlhandler = dlmopen(LM_ID_NEWLM, ubsm_sdk_location, RTLD_NOW | RTLD_LOCAL | RTLD_NODELETE | RTLD_DEEPBIND); + const char *ubsmSdkLocation = "/usr/local/ubs_mem/lib/libubsm_sdk.so"; + void* dlhandler = dlmopen(LM_ID_NEWLM, ubsmSdkLocation, RTLD_NOW | RTLD_LOCAL | RTLD_NODELETE | RTLD_DEEPBIND); if (dlhandler == NULL) { - LOG(ERROR) << "Dlmopen libubsm_sdk.so in " << ubsm_sdk_location << " failed, error:" << dlerror(); - return HLC_ERR; + LOG(ERROR) << "Dlmopen libubsm_sdk.so in " << ubsmSdkLocation << " failed, error:" << dlerror(); + return UBRING_ERR; } -#define HLC_MK_UBSM_OPTIONAL(ret, fn, args) \ +#define UBRING_MK_UBSM_OPTIONAL(ret, fn, args) \ do { \ fn = (decltype(fn))dlsym(dlhandler, #fn); \ } while (0) -#define HLC_MK_UBSM(ret, fn, args) \ +#define UBRING_MK_UBSM(ret, fn, args) \ do { \ if ((fn) != NULL) { \ break; \ } \ - HLC_MK_UBSM_OPTIONAL(ret, fn, args); \ + UBRING_MK_UBSM_OPTIONAL(ret, fn, args); \ if ((fn) == NULL) { \ LOG(ERROR) << "Fail load ubs_mem func " << #fn <<" error:" << dlerror(); \ - return HLC_ERR; \ + return UBRING_ERR; \ } \ } while (0) -#include "brpc/ub/rack_mem/declare_shm_ubs.h.tmpl" +#include "brpc/ubring/rack_mem/declare_shm_ubs.h.tmpl" dlclose(dlhandler); dlhandler = NULL; #endif - return HLC_OK; + return UBRING_OK; } -static RETURN_CODE CreateUbsShmRegion(const char *region_name) +static RETURN_CODE CreateUbsShmRegion(const char *regionName) { - int ret = snprintf(g_region_name, MAX_REGION_NAME_DESC_LENGTH, "%s_%u", - HLC_REGION_NAME_PREFIX, FLAGS_node_location); + int ret = snprintf(g_regionName, MAX_REGION_NAME_DESC_LENGTH, "%s_%u", + UBRING_REGION_NAME_PREFIX, FLAGS_node_location); if (ret < 0) { LOG(ERROR) << "Snprintf_s region name failed, ret=" << ret; - return HLC_ERR; + return UBRING_ERR; } ubsmem_regions_t regions = {0}; // 16 * (48 + 1) bytes, 约0.8k ret = ubsmem_lookup_regions(®ions); if (ret != UBSM_OK || regions.region[0].host_num <= 0) { LOG(ERROR) << "Ubs lookup share region failed, ret=" << ret << ", region.num=" << regions.region[0].host_num; - return HLC_ERR; + return UBRING_ERR; } - ubsmem_region_attributes_t region_attr = {0}; - region_attr.host_num = regions.region[0].host_num; - for (int i = 0; i < region_attr.host_num; i++) { - strcpy(region_attr.hosts[i].host_name, regions.region[0].hosts[i].host_name); - region_attr.hosts[i].affinity = (strcmp(region_attr.hosts[i].host_name, hostname) == 0) ? + ubsmem_region_attributes_t regionAttr = {0}; + regionAttr.host_num = regions.region[0].host_num; + for (int i = 0; i < regionAttr.host_num; i++) { + strcpy(regionAttr.hosts[i].host_name, regions.region[0].hosts[i].host_name); + regionAttr.hosts[i].affinity = (strcmp(regionAttr.hosts[i].host_name, hostname) == 0) ? true : false; } - ret = ubsmem_create_region(region_name, 0, ®ion_attr); + ret = ubsmem_create_region(regionName, 0, ®ionAttr); if (ret == UBSM_ERR_ALREADY_EXIST) { - LOG(WARNING) << "Ubs region exists, region_name=" << region_name; - return HLC_OK; + LOG(WARNING) << "Ubs region exists, region_name=" << regionName; + return UBRING_OK; } else if (ret != UBSM_OK) { LOG(ERROR) << "Ubsmem create region failed, ret=" << ret; - return HLC_ERR; + return UBRING_ERR; } - return HLC_OK; + return UBRING_OK; } static uint64_t AquireFlagIfWrDelayComp(const uint64_t flag) @@ -130,7 +130,7 @@ static uint64_t AquireFlagIfWrDelayComp(const uint64_t flag) RETURN_CODE UbsShmLocalMalloc(SHM *shm) { - int ret = ubsmem_shmem_allocate(g_region_name, shm->name, shm->len, SHM_RIGHT_MODE, + int ret = ubsmem_shmem_allocate(g_regionName, shm->name, shm->len, SHM_RIGHT_MODE, AquireFlagIfWrDelayComp(UBSM_FLAG_ONLY_IMPORT_NONCACHE | UBSM_FLAG_MEM_ANONYMOUS)); do { if (ret == UBSM_ERR_ALREADY_EXIST) { @@ -139,7 +139,7 @@ do { return SHM_ERR_EXIST; } LOG(INFO) << "Ubs delete shm name=" << shm->name << " success, try to recreate."; - ret = ubsmem_shmem_allocate(g_region_name, shm->name, shm->len, SHM_RIGHT_MODE, + ret = ubsmem_shmem_allocate(g_regionName, shm->name, shm->len, SHM_RIGHT_MODE, AquireFlagIfWrDelayComp(UBSM_FLAG_ONLY_IMPORT_NONCACHE | UBSM_FLAG_MEM_ANONYMOUS)); if (ret != UBSM_OK) { LOG(ERROR) << "Ubs recreate shm name=" << shm->name << " failed, ret=" << ret; @@ -162,9 +162,9 @@ do { } // 通过MXE获取memid - shm->mem_id = 1; // 暂时打桩 - LOG(DEBUG) << "Ubs malloc local shm=" << shm->name << " length=" << shm->len << " mem_id=" << shm->mem_id << " success."; - return HLC_OK; + shm->memid = 1; // 暂时打桩 + LOG(DEBUG) << "Ubs malloc local shm=" << shm->name << " length=" << shm->len << " memid=" << shm->memid << " success."; + return UBRING_OK; } RETURN_CODE UbsShmMunmap(SHM *shm) @@ -179,7 +179,7 @@ RETURN_CODE UbsShmMunmap(SHM *shm) if (ret != UBSM_OK) { if (ret == UBSM_ERR_NET) { LOG(ERROR) << "Ubs unmap shm=" << shm->name << " failed, ubsm net err=" << ret; - AddShmToList(g_shm_list, shm); + AddShmToList(g_shmList, shm); return SHM_ERR_UBSM_NET_ERR; } LOG(ERROR) << "Ubs unmap shm=" << shm->name << " length=" << shm->len << " failed, ret=" << ret; @@ -187,7 +187,7 @@ RETURN_CODE UbsShmMunmap(SHM *shm) } LOG(DEBUG) << "Ubs unmap shm=" << shm->name << " length=" << shm->len << " success."; - return HLC_OK; + return UBRING_OK; } RETURN_CODE UbsShmFree(SHM *shm) @@ -212,7 +212,7 @@ RETURN_CODE UbsShmFree(SHM *shm) } shm->addr = NULL; LOG(DEBUG) << "Ubs free shm=" << shm->name << " length=" << shm->len << " success."; - return HLC_OK; + return UBRING_OK; } RETURN_CODE UbsShmLocalFree(SHM *shm) @@ -227,7 +227,7 @@ RETURN_CODE UbsShmLocalFree(SHM *shm) if (ret != UBSM_OK) { if (ret == UBSM_ERR_NET) { LOG(ERROR) << "Ubs unmap shm=" << shm->name << " failed, ubsm net err=" << ret; - AddShmToList(g_shm_list, shm); + AddShmToList(g_shmList, shm); return SHM_ERR_UBSM_NET_ERR; } LOG(WARNING) << "Ubs unmap shm=" << shm->name << " length=" << shm->len << " failed, ret=" << ret; @@ -245,7 +245,7 @@ RETURN_CODE UbsShmLocalFree(SHM *shm) } shm->addr = NULL; LOG(DEBUG) << "Ubs free local shm=" << shm->name << " length=" << shm->len << " success."; - return HLC_OK; + return UBRING_OK; } RETURN_CODE UbsShmRemoteMalloc(SHM *shm) @@ -257,7 +257,7 @@ RETURN_CODE UbsShmRemoteMalloc(SHM *shm) } LOG(INFO) << "Ubs malloc remote shm=" << shm->name << " length=" << shm->len << " success."; - return HLC_OK; + return UBRING_OK; } RETURN_CODE UbsShmLocalMmap(SHM *shm, int prot) @@ -269,7 +269,7 @@ RETURN_CODE UbsShmLocalMmap(SHM *shm, int prot) } LOG(DEBUG) << "Ubs mmap remote shm=" << shm->name << " length=" << shm->len << " success."; - return HLC_OK; + return UBRING_OK; } RETURN_CODE UbsShmRemoteFree(SHM *shm) @@ -284,7 +284,7 @@ RETURN_CODE UbsShmRemoteFree(SHM *shm) if (ret != UBSM_OK) { if (ret == UBSM_ERR_NET) { LOG(ERROR) << "Ubs unmap shm=" << shm->name << " failed, ubsm net err=" << ret; - AddShmToList(g_shm_list, shm); + AddShmToList(g_shmList, shm); return SHM_ERR_UBSM_NET_ERR; } LOG(ERROR) << "Ubs unmap shm=" << shm->name << " length=" << shm->len << " failed, ret=" << ret; @@ -292,7 +292,7 @@ RETURN_CODE UbsShmRemoteFree(SHM *shm) } LOG(DEBUG) << "Ubs free Remote shm=" << shm->name << " length=" << shm->len << " success."; - return HLC_OK; + return UBRING_OK; } void UbsMemLoggerPrint(int level, const char *msg) @@ -309,64 +309,65 @@ void UbsMemLoggerPrint(int level, const char *msg) RETURN_CODE UbsShmInit(void) { - RETURN_CODE ret_code = UbsShmInterfacesLoad(); - if (ret_code != HLC_OK) { - LOG(ERROR) << "Load ubs shm functions failed, ret=" << ret_code; - return HLC_ERR; + // 加载libubsm_sdk.so函数指针 + RETURN_CODE retCode = UbsShmInterfacesLoad(); + if (retCode != UBRING_OK) { + LOG(ERROR) << "Load ubs shm functions failed, ret=" << retCode; + return UBRING_ERR; } if (gethostname(hostname, MAX_HOST_NAME_DESC_LENGTH) != 0) { - LOG(ERROR) << "hlc config gethostname failed, errno=" << errno; - return HLC_ERR; + LOG(ERROR) << "ubring config gethostname failed, errno=" << errno; + return UBRING_ERR; } int ret = ubsmem_set_extern_logger(UbsMemLoggerPrint); if (ret != UBSM_OK) { LOG(ERROR) << "Ubs set logger failed, ret=" << ret; - return HLC_ERR; + return UBRING_ERR; } ret = ubsmem_set_logger_level(UBSM_LOG_INFO_LEVEL); if (ret != UBSM_OK) { LOG(ERROR) << "Ubs set logger level failed, ret=" << ret; - return HLC_ERR; + return UBRING_ERR; } ubsmem_options_t options = {}; ret = ubsmem_init_attributes(&options); if (ret != UBSM_OK) { LOG(ERROR) << "Ubs shm init attributes failed, ret=" << ret; - return HLC_ERR; + return UBRING_ERR; } ret = ubsmem_initialize(&options); if (ret != UBSM_OK) { LOG(ERROR) << "Ubs shm initialize failed, ret=" << ret; - return HLC_ERR; + return UBRING_ERR; } if (UNLIKELY(ubsmem_local_nid_query(&FLAGS_node_location) != UBSM_OK)) { LOG(ERROR) << "Get local nid failed."; - return HLC_ERR; + return UBRING_ERR; } - if (UNLIKELY(ubsmem_shmem_faults_register(brpc::ub::UBRingManager::UbEventCallback) != UBSM_OK)) { + if (UNLIKELY(ubsmem_shmem_faults_register(brpc::ubring::UBRingManager::UbEventCallback) != UBSM_OK)) { LOG(ERROR) << "Failed to register the ub event callback function."; - return HLC_ERR; + return UBRING_ERR; } - if (CreateUbsShmRegion(g_region_name) != HLC_OK) { + if (CreateUbsShmRegion(g_regionName) != UBRING_OK) { LOG(ERROR) << "Create Ubs region failed."; - return HLC_ERR; + return UBRING_ERR; } - if (InitShmTimer(&g_shm_list) != HLC_OK) { + if (InitShmTimer(&g_shmList) != UBRING_OK) { LOG(ERROR) << "Ubs shm list init failed."; - return HLC_ERR; + return UBRING_ERR; } LOG(INFO) << "Ubs shm init success."; - return HLC_OK; + return UBRING_OK; } RETURN_CODE UbsShmFini(void) @@ -374,47 +375,47 @@ RETURN_CODE UbsShmFini(void) int ret = ubsmem_finalize(); if (ret != UBSM_OK) { LOG(ERROR) << "Ubs shm finalize fail, ret=" << ret; - return HLC_ERR; + return UBRING_ERR; } - if (UNLIKELY(DestroyShmTimer(g_shm_list) != HLC_OK)) { + if (UNLIKELY(DestroyShmTimer(g_shmList) != UBRING_OK)) { LOG(ERROR) << "Ubs shm list finalize failed."; - return HLC_ERR; + return UBRING_ERR; } LOG(INFO) << "Ubs shm finalize success."; - return HLC_OK; + return UBRING_OK; } -static void DeleteShmToList(ShmList* shm_list) +static void DeleteShmToList(ShmList* shmList) { - if (shm_list == NULL || shm_list->head == NULL) { + if (shmList == NULL || shmList->head == NULL) { return; } - ShmListNode *cur_node = shm_list->head; - shm_list->head = cur_node->next; - if (shm_list->head != NULL) { - shm_list->head->prev = NULL; + ShmListNode *curNode = shmList->head; + shmList->head = curNode->next; + if (shmList->head != NULL) { + shmList->head->prev = NULL; } else { - shm_list->tail = NULL; + shmList->tail = NULL; } - LOG(DEBUG) << "Delete shm to list, name=" << cur_node->shm.name << " size=" << shm_list->size; - FREE_PTR(cur_node); - shm_list->size--; + LOG(DEBUG) << "Delete shm to list, name=" << curNode->shm.name << " size=" << shmList->size; + FREE_PTR(curNode); + shmList->size--; } void *UbsShmCallback(void* args) { - ShmList *shm_list = (ShmList*)args; - if (UNLIKELY(shm_list == NULL)) { + ShmList *shmList = (ShmList*)args; + if (UNLIKELY(shmList == NULL)) { LOG(ERROR) << "Shm list is null."; return NULL; } - LOCK_GUARD(shm_list->shm_lock); - while (shm_list->head != NULL) { - SHM shm = shm_list->head->shm; + LOCK_GUARD(shmList->shmLock); + while (shmList->head != NULL) { + SHM shm = shmList->head->shm; if (shm.addr == NULL) { LOG(ERROR) << "Ubs input shm param is invalid, addr is NULL."; return NULL; @@ -432,67 +433,67 @@ void *UbsShmCallback(void* args) ret = ubsmem_shmem_deallocate(shm.name); if (ret != UBSM_OK) { - DeleteShmToList(shm_list); + DeleteShmToList(shmList); LOG(ERROR) << "Ubs delete shm=" << shm.name << " failed, ret=" << ret; return NULL; } - DeleteShmToList(shm_list); + DeleteShmToList(shmList); LOG(DEBUG) << "Ubs free local shm=" << shm.name << " length=" << shm.len << " success."; } return NULL; } -RETURN_CODE UbsShmAddTimer(ShmList *shm_list) +RETURN_CODE UbsShmAddTimer(ShmList *shmList) { - uint32_t timer_interval = FLAGS_ub_flying_io_timeout; - struct itimerspec time_spec = { - .it_interval = {.tv_sec = timer_interval, .tv_nsec = 0}, + uint32_t timerInterval = FLAGS_ub_flying_io_timeout; + struct itimerspec timeSpec = { + .it_interval = {.tv_sec = timerInterval, .tv_nsec = 0}, .it_value = {.tv_sec = 0, .tv_nsec = 1} }; - int timer_fd = TimerStart(&time_spec, UbsShmCallback, (void*)shm_list); - if (UNLIKELY(timer_fd == -1)) { + int timerFd = TimerStart(&timeSpec, UbsShmCallback, (void*)shmList); + if (UNLIKELY(timerFd == -1)) { LOG(ERROR) << "Start shm timer failed."; - return HLC_ERR; + return UBRING_ERR; } - g_shm_timer_fd = timer_fd; + g_shmTimerFd = timerFd; - return HLC_OK; + return UBRING_OK; } -RETURN_CODE InitShmTimer(ShmList **shm_list) +RETURN_CODE InitShmTimer(ShmList **shmList) { - *shm_list = (ShmList *)malloc(sizeof(ShmList)); - if (*shm_list == NULL) { + *shmList = (ShmList *)malloc(sizeof(ShmList)); + if (*shmList == NULL) { LOG(ERROR) << "Malloc shm list failed."; - return HLC_ERR; + return UBRING_ERR; } - (*shm_list)->head = NULL; - (*shm_list)->tail = NULL; - (*shm_list)->size = 0; + (*shmList)->head = NULL; + (*shmList)->tail = NULL; + (*shmList)->size = 0; - if (pthread_mutex_init(&(*shm_list)->shm_lock, NULL) != 0) { + if (pthread_mutex_init(&(*shmList)->shmLock, NULL) != 0) { LOG(ERROR) << "Init shm list mutex failed."; - FREE_PTR(*shm_list); - return HLC_ERR; + FREE_PTR(*shmList); + return UBRING_ERR; } - if (UbsShmAddTimer(*shm_list) == HLC_ERR) { + if (UbsShmAddTimer(*shmList) == UBRING_ERR) { LOG(ERROR) << "Ubs add timer failed."; - FREE_PTR(*shm_list); - return HLC_ERR; + FREE_PTR(*shmList); + return UBRING_ERR; } - return HLC_OK; + return UBRING_OK; } -RETURN_CODE DestroyShmTimer(ShmList *shm_list) +RETURN_CODE DestroyShmTimer(ShmList *shmList) { - DeleteTimerSafe((uint32_t)g_shm_timer_fd); - if (shm_list == NULL) { + DeleteTimerSafe((uint32_t)g_shmTimerFd); + if (shmList == NULL) { LOG(WARNING) << "Shm list is null."; - return HLC_ERR; + return UBRING_ERR; } - ShmListNode* current = shm_list->head; + ShmListNode* current = shmList->head; ShmListNode* next; while (current != NULL) { @@ -500,61 +501,61 @@ RETURN_CODE DestroyShmTimer(ShmList *shm_list) free(current); current = next; } - pthread_mutex_destroy(&shm_list->shm_lock); - FREE_PTR(shm_list); - return HLC_OK; + pthread_mutex_destroy(&shmList->shmLock); + FREE_PTR(shmList); + return UBRING_OK; } -RETURN_CODE IsExistInShmList(ShmList *shm_list, const SHM *shm) +RETURN_CODE IsExistInShmList(ShmList *shmList, const SHM *shm) { - LOCK_GUARD(shm_list->shm_lock); - if (UNLIKELY(shm_list == NULL)) { + LOCK_GUARD(shmList->shmLock); + if (UNLIKELY(shmList == NULL)) { LOG(ERROR) << "Shm list is null."; - return HLC_ERR; + return UBRING_ERR; } - ShmListNode *cur_node = shm_list->head; - while (cur_node != NULL) { - if (strcmp(cur_node->shm.name, shm->name) == 0 && cur_node->shm.len == shm->len) { - return HLC_OK; + ShmListNode *curNode = shmList->head; + while (curNode != NULL) { + if (strcmp(curNode->shm.name, shm->name) == 0 && curNode->shm.len == shm->len) { + return UBRING_OK; } - cur_node = cur_node->next; + curNode = curNode->next; } - return HLC_ERR; + return UBRING_ERR; } -RETURN_CODE AddShmToList(ShmList *shm_list, SHM *shm) +RETURN_CODE AddShmToList(ShmList *shmList, SHM *shm) { - if (shm_list == NULL || shm == NULL) { + if (shmList == NULL || shm == NULL) { LOG(ERROR) << "Shm list or shm is null."; - return HLC_ERR; + return UBRING_ERR; } - if (IsExistInShmList(shm_list, shm) == HLC_OK) { + if (IsExistInShmList(shmList, shm) == UBRING_OK) { LOG(ERROR) << "Shm name=" << shm->name << " is exist in shm list."; - return HLC_ERR; + return UBRING_ERR; } - ShmListNode *new_shm_node = (ShmListNode *)malloc(sizeof(ShmListNode)); - if (new_shm_node == NULL) { + ShmListNode *newShmNode = (ShmListNode *)malloc(sizeof(ShmListNode)); + if (newShmNode == NULL) { LOG(ERROR) << "Malloc shm node failed."; - return HLC_ERR; + return UBRING_ERR; } - memcpy(&new_shm_node->shm, shm, sizeof(SHM)); - LOCK_GUARD(shm_list->shm_lock); - new_shm_node->next = NULL; - new_shm_node->prev = shm_list->tail; - if (shm_list->tail) { - shm_list->tail->next = new_shm_node; - shm_list->tail = new_shm_node; + memcpy(&newShmNode->shm, shm, sizeof(SHM)); + LOCK_GUARD(shmList->shmLock); + newShmNode->next = NULL; + newShmNode->prev = shmList->tail; + if (shmList->tail) { + shmList->tail->next = newShmNode; + shmList->tail = newShmNode; } else { - shm_list->head = new_shm_node; - shm_list->tail = new_shm_node; + shmList->head = newShmNode; + shmList->tail = newShmNode; } - shm_list->size++; - LOG(DEBUG) << "Add shm to list success, shm name=" << shm->name << " size=" << shm_list->size; - return HLC_OK; + shmList->size++; + LOG(DEBUG) << "Add shm to list success, shm name=" << shm->name << " size=" << shmList->size; + return UBRING_OK; } } } \ No newline at end of file diff --git a/src/brpc/ub/shm/shm_ubs.h b/src/brpc/ubring/shm/shm_ubs.h similarity index 98% rename from src/brpc/ub/shm/shm_ubs.h rename to src/brpc/ubring/shm/shm_ubs.h index 226f58bb8c..14b5916503 100644 --- a/src/brpc/ub/shm/shm_ubs.h +++ b/src/brpc/ubring/shm/shm_ubs.h @@ -18,7 +18,7 @@ #ifndef BRPC_SHM_UBS_H #define BRPC_SHM_UBS_H namespace brpc { -namespace ub { +namespace ubring { DECLARE_int32(ub_flying_io_timeout); typedef enum TagUbsLogLevel { diff --git a/src/brpc/ub/timer/timer_mgr.cpp b/src/brpc/ubring/timer/timer_mgr.cpp similarity index 50% rename from src/brpc/ub/timer/timer_mgr.cpp rename to src/brpc/ubring/timer/timer_mgr.cpp index 83ce18effe..7688198519 100644 --- a/src/brpc/ub/timer/timer_mgr.cpp +++ b/src/brpc/ubring/timer/timer_mgr.cpp @@ -24,75 +24,75 @@ #include #include #include -#include "brpc/ub/timer/timer_mgr.h" +#include "brpc/ubring/timer/timer_mgr.h" namespace brpc { -namespace ub { -int32_t g_epoll_fd = -1; -std::atomic g_total_timer_num; -TimerFdCtx *g_timer_fd_ctx_map = NULL; -uint32_t max_system_fd; -static pthread_t g_epoll_execute_thread; -static int32_t g_timer_module_initialized; +namespace ubring { +int32_t g_epollFd = -1; +std::atomic g_totalTimerNum; +TimerFdCtx *g_timerFdCtxMap = NULL; +uint32_t maxSystemFd; +static pthread_t g_epollExecuteThread; +static int32_t g_timerModuleInitialized; static RETURN_CODE DeleteTimerInner(uint32_t fd) { - if (g_timer_fd_ctx_map == NULL) { + if (g_timerFdCtxMap == NULL) { LOG(WARNING) << "The timer is not initialized."; - return HLC_OK; + return UBRING_OK; } - if (g_timer_fd_ctx_map[fd].status == TIMER_CONTEXT_NOT_USING) { + if (g_timerFdCtxMap[fd].status == TIMER_CONTEXT_NOT_USING) { LOG(WARNING) << "The timer is not using, timerFd=" << fd; - return HLC_OK; + return UBRING_OK; } - if (epoll_ctl(g_epoll_fd, EPOLL_CTL_DEL, (int)fd, NULL) != 0) { + if (epoll_ctl(g_epollFd, EPOLL_CTL_DEL, (int)fd, NULL) != 0) { LOG(ERROR) << "Failed to delete the timer fd=" << fd << " with errno=" << errno; } CloseTimerFd(fd); - atomic_fetch_sub(&g_total_timer_num, 1); - return HLC_OK; + atomic_fetch_sub(&g_totalTimerNum, 1); + return UBRING_OK; } static RETURN_CODE StartTimeEpoll(void) { - g_epoll_fd = epoll_create1(0); - if (UNLIKELY(g_epoll_fd == -1)) { + g_epollFd = epoll_create1(0); + if (UNLIKELY(g_epollFd == -1)) { LOG(ERROR) << "Failed to create epoll. errno=" << errno; - return HLC_ERR; + return UBRING_ERR; } - int ret = pthread_create(&g_epoll_execute_thread, NULL, TimerEpoll, NULL); + int ret = pthread_create(&g_epollExecuteThread, NULL, TimerEpoll, NULL); if (UNLIKELY(ret != 0)) { LOG(ERROR) << "Failed to create thread err=" << ret; - return HLC_ERR; + return UBRING_ERR; } - return HLC_OK; + return UBRING_OK; } static RETURN_CODE TimerSpinLocksInit(void) { - if (g_timer_fd_ctx_map == NULL) { + if (g_timerFdCtxMap == NULL) { LOG(ERROR) << "Timer module is not fully initialized."; - return HLC_ERR; + return UBRING_ERR; } - for (uint32_t fd = 0; fd < max_system_fd; fd++) { - int ret = pthread_spin_init(&g_timer_fd_ctx_map[fd].spin_lock, PTHREAD_PROCESS_PRIVATE); + for (uint32_t fd = 0; fd < maxSystemFd; fd++) { + int ret = pthread_spin_init(&g_timerFdCtxMap[fd].spinLock, PTHREAD_PROCESS_PRIVATE); if (ret != EOK) { LOG(ERROR) << "Failed to initialize spin lock for fd=" << fd; - for (uint32_t cleanup_fd = 0; cleanup_fd < fd; cleanup_fd++) { - pthread_spin_destroy(&g_timer_fd_ctx_map[cleanup_fd].spin_lock); + for (uint32_t cleanupFd = 0; cleanupFd < fd; cleanupFd++) { + pthread_spin_destroy(&g_timerFdCtxMap[cleanupFd].spinLock); } - return HLC_ERR; + return UBRING_ERR; } } - return HLC_OK; + return UBRING_OK; } -static RETURN_CODE ExecuteCallback(int32_t timer_fd) +static RETURN_CODE ExecuteCallback(int32_t timerFd) { pthread_attr_t attr; pthread_attr_init(&attr); @@ -101,106 +101,112 @@ static RETURN_CODE ExecuteCallback(int32_t timer_fd) LOG(ERROR) << "Failed to set thread detach status when executing callback"; } - pthread_t cb_thread; - err = pthread_create(&cb_thread, &attr, UnifiedCallback, (void *)(&g_timer_fd_ctx_map[timer_fd])); + pthread_t cbThread; + err = pthread_create(&cbThread, &attr, UnifiedCallback, (void *)(&g_timerFdCtxMap[timerFd])); if (err != 0) { pthread_attr_destroy(&attr); LOG(ERROR) << "Failed to create thread while executing callback due to errno=" << err; - return HLC_ERR; + return UBRING_ERR; } pthread_attr_destroy(&attr); - return HLC_OK; + return UBRING_OK; } static RETURN_CODE TimerCtxMapCompletion(void) { - memset(g_timer_fd_ctx_map, 0, - sizeof(TimerFdCtx) * max_system_fd); + memset(g_timerFdCtxMap, 0, + sizeof(TimerFdCtx) * maxSystemFd); RETURN_CODE ret = TimerSpinLocksInit(); - if (ret != HLC_OK) { + if (ret != UBRING_OK) { LOG(ERROR) << "Failed to init spin locks for timer module."; - return HLC_ERR; + return UBRING_ERR; } - return HLC_OK; + return UBRING_OK; } RETURN_CODE TimerInit(void) { - if (g_timer_module_initialized > 0) { - return HLC_OK; + if (g_timerModuleInitialized > 0) { + return UBRING_OK; } - g_total_timer_num.store(0); + g_totalTimerNum.store(0); struct rlimit rlim; - if (getrlimit(RLIMIT_NOFILE, &rlim) != HLC_OK) { + if (getrlimit(RLIMIT_NOFILE, &rlim) != UBRING_OK) { LOG(ERROR) << "Failed to get fd"; - return HLC_ERR; + return UBRING_ERR; } - max_system_fd = (uint32_t)rlim.rlim_cur; + maxSystemFd = (uint32_t)rlim.rlim_cur; - if (g_timer_fd_ctx_map == NULL) { - g_timer_fd_ctx_map = (TimerFdCtx *)malloc(sizeof(TimerFdCtx) * max_system_fd); - if (UNLIKELY(!g_timer_fd_ctx_map)) { + if (g_timerFdCtxMap == NULL) { + g_timerFdCtxMap = (TimerFdCtx *)malloc(sizeof(TimerFdCtx) * maxSystemFd); + if (UNLIKELY(!g_timerFdCtxMap)) { LOG(ERROR) << "Fail to malloc space for timer modules. errno=%d", errno; - return HLC_ERR; + return UBRING_ERR; } RETURN_CODE ret = TimerCtxMapCompletion(); - if (ret != HLC_OK) { + if (ret != UBRING_OK) { LOG(ERROR) << "Failed to init main data structure of Time Module. ret=" << ret; - free(g_timer_fd_ctx_map); - g_timer_fd_ctx_map = NULL; - return HLC_ERR; + free(g_timerFdCtxMap); + g_timerFdCtxMap = NULL; + return UBRING_ERR; } } RETURN_CODE ret = StartTimeEpoll(); - if (ret != HLC_OK) { + if (ret != UBRING_OK) { LOG(ERROR) << "Failed to start Timer Epoll. ret=" << ret; - if (LIKELY(g_timer_fd_ctx_map != NULL)) { - FREE_PTR(g_timer_fd_ctx_map); + if (LIKELY(g_timerFdCtxMap != NULL)) { + FREE_PTR(g_timerFdCtxMap); } - return HLC_ERR; + return UBRING_ERR; } - g_timer_module_initialized = 1; - return HLC_OK; + g_timerModuleInitialized = 1; + return UBRING_OK; } void *UnifiedCallback(void *args) { TimerFdCtx *ctx = (TimerFdCtx *)args; - if (pthread_spin_trylock(&ctx->spin_lock) == 0) { - if (ctx->status == TIMER_CONTEXT_NOT_USING) { - pthread_spin_unlock(&ctx->spin_lock); + // Try to lock with a small delay if initial try fails + int retry = 0; + while (pthread_spin_trylock(&ctx->spinLock) != 0) { + if (retry >= 3) { + LOG_EVERY_SECOND(WARNING) << "Failed to acquire spin lock after multiple attempts, context status is " << ctx->status; return NULL; } - ctx->status = TIMER_CONTEXT_CALLBACK_ONGOING; - ctx->cb(ctx->args); - if (ctx->periodical != 1) { - DeleteTimerInner((uint32_t)ctx->fd); - } - pthread_spin_unlock(&ctx->spin_lock); - } else { - LOG_EVERY_SECOND(WARNING) << "The context status is " << ctx->status; + usleep(100); // Small delay before retry + retry++; + } + + if (ctx->status == TIMER_CONTEXT_NOT_USING) { + pthread_spin_unlock(&ctx->spinLock); return NULL; } + ctx->status = TIMER_CONTEXT_CALLBACK_ONGOING; + ctx->cb(ctx->args); + if (ctx->periodical != 1) { + DeleteTimerInner((uint32_t)ctx->fd); + } + pthread_spin_unlock(&ctx->spinLock); return NULL; } void *TimerEpoll(void *args) { UNREFERENCE_PARAM(args); - struct epoll_event ready_events[MAX_TIMER]; + struct epoll_event readyEvents[MAX_TIMER]; while (1) { - if (g_timer_module_initialized <= 0) { + if (g_timerModuleInitialized <= 0) { LOG(ERROR) << "The Timer module is not initialized."; break; } - int32_t ready_num = epoll_wait(g_epoll_fd, ready_events, MAX_TIMER, TIMER_EPOLL_WAIT_TIMEOUT); - if (UNLIKELY(ready_num == -1)) { + int32_t readyNum = epoll_wait(g_epollFd, readyEvents, MAX_TIMER, TIMER_EPOLL_WAIT_TIMEOUT); + if (UNLIKELY(readyNum == -1)) { error_t err = errno; if (err == EINTR) { LOG_EVERY_SECOND(WARNING) << "Epoll wait was interrupted. errno=" << err; @@ -213,23 +219,23 @@ void *TimerEpoll(void *args) break; } - for (int32_t i = 0; i < ready_num; i++) { - struct epoll_event *event = &ready_events[i]; - int32_t timer_fd = event->data.fd; + for (int32_t i = 0; i < readyNum; i++) { + struct epoll_event *event = &readyEvents[i]; + int32_t timerFd = event->data.fd; uint64_t exp = 0; - if (read(timer_fd, &exp, sizeof(exp)) < 0) { - LOG(ERROR) << "Failed to read timerfd=" << timer_fd << " errno=" << errno; + if (read(timerFd, &exp, sizeof(exp)) < 0) { + LOG(ERROR) << "Failed to read timerfd=" << timerFd << " errno=" << errno; continue; } - if (TimerFdCtxValidate((uint32_t)timer_fd) != HLC_OK) { - LOG(ERROR) << "Timer ctx is not valid=" << timer_fd; + if (TimerFdCtxValidate((uint32_t)timerFd) != UBRING_OK) { + LOG(ERROR) << "Timer ctx is not valid=" << timerFd; continue; } - RETURN_CODE ret = ExecuteCallback(timer_fd); - if (ret != HLC_OK) { + RETURN_CODE ret = ExecuteCallback(timerFd); + if (ret != UBRING_OK) { LOG(ERROR) << "Failed execute callback ret=" << ret; - DeleteTimerInner((uint32_t)timer_fd); + DeleteTimerInner((uint32_t)timerFd); continue; } } @@ -239,103 +245,103 @@ void *TimerEpoll(void *args) void DeleteTimerSafe(uint32_t fd) { - if (g_timer_fd_ctx_map == NULL) { + if (g_timerFdCtxMap == NULL) { LOG(WARNING) << "The timer is not initialized."; return; } - if (pthread_spin_lock(&g_timer_fd_ctx_map[fd].spin_lock) != 0) { + if (pthread_spin_lock(&g_timerFdCtxMap[fd].spinLock) != 0) { LOG(ERROR) << "Failed to lock while deleting timer=" << fd << " errno=" << errno; return; } - if (g_timer_fd_ctx_map[fd].status == TIMER_CONTEXT_NOT_USING) { + if (g_timerFdCtxMap[fd].status == TIMER_CONTEXT_NOT_USING) { LOG(WARNING) << "The timer is not using, timerFd=" << fd; - pthread_spin_unlock(&g_timer_fd_ctx_map[fd].spin_lock); + pthread_spin_unlock(&g_timerFdCtxMap[fd].spinLock); return; } - if (epoll_ctl(g_epoll_fd, EPOLL_CTL_DEL, (int)fd, NULL) != 0) { + if (epoll_ctl(g_epollFd, EPOLL_CTL_DEL, (int)fd, NULL) != 0) { LOG(ERROR) << "Failed to delete the timer fd=" << fd << " with errno=" << errno; } CloseTimerFd(fd); - atomic_fetch_sub(&g_total_timer_num, 1); + atomic_fetch_sub(&g_totalTimerNum, 1); - pthread_spin_unlock(&g_timer_fd_ctx_map[fd].spin_lock); + pthread_spin_unlock(&g_timerFdCtxMap[fd].spinLock); } void DeleteTimer(uint32_t fd) { - if (g_timer_fd_ctx_map == NULL) { + if (g_timerFdCtxMap == NULL) { LOG(WARNING) << "The timer is not initialized."; return; } - g_timer_fd_ctx_map[fd].periodical = 0; + g_timerFdCtxMap[fd].periodical = 0; } int32_t TimerStart(const struct itimerspec *time, void *(*cb)(void *), void *args) { - if (g_epoll_fd == -1) { + if (g_epollFd == -1) { LOG(ERROR) << "Timer epoll encountered internal error."; return -1; } - int timer_fd = timerfd_create(CLOCK_MONOTONIC, 0); - if (UNLIKELY(timer_fd >= (int)max_system_fd || timer_fd == -1)) { - LOG(ERROR) << "Failed to create timerfd=" << timer_fd << " errno=" << errno; + int timerFd = timerfd_create(CLOCK_MONOTONIC, 0); + if (UNLIKELY(timerFd >= (int)maxSystemFd || timerFd == -1)) { + LOG(ERROR) << "Failed to create timerfd=" << timerFd << " errno=" << errno; return -1; } - g_timer_fd_ctx_map[timer_fd].status = TIMER_CONTEXT_EPOLL_WAITING; - g_timer_fd_ctx_map[timer_fd].cb = cb; - g_timer_fd_ctx_map[timer_fd].args = args; - g_timer_fd_ctx_map[timer_fd].fd = (uint32_t)timer_fd; + g_timerFdCtxMap[timerFd].status = TIMER_CONTEXT_EPOLL_WAITING; + g_timerFdCtxMap[timerFd].cb = cb; + g_timerFdCtxMap[timerFd].args = args; + g_timerFdCtxMap[timerFd].fd = (uint32_t)timerFd; if (LIKELY(time->it_interval.tv_sec > 0 || time->it_interval.tv_nsec > 0)) { - g_timer_fd_ctx_map[timer_fd].periodical = 1; + g_timerFdCtxMap[timerFd].periodical = 1; } struct epoll_event event = { .events = EPOLLIN, - .data = {.fd = timer_fd} + .data = {.fd = timerFd} }; - int32_t ret = epoll_ctl(g_epoll_fd, EPOLL_CTL_ADD, timer_fd, &event); + int32_t ret = epoll_ctl(g_epollFd, EPOLL_CTL_ADD, timerFd, &event); if (UNLIKELY(ret != 0)) { - CloseTimerFd((uint32_t)timer_fd); + CloseTimerFd((uint32_t)timerFd); LOG(ERROR) << "Failed to add event to epoll. errno=" << errno; return -1; } - atomic_fetch_add(&g_total_timer_num, 1); + atomic_fetch_add(&g_totalTimerNum, 1); - ret = timerfd_settime(timer_fd, 0, time, NULL); + ret = timerfd_settime(timerFd, 0, time, NULL); if (UNLIKELY(ret != 0)) { - if (epoll_ctl(g_epoll_fd, EPOLL_CTL_DEL, timer_fd, NULL) != 0) { - LOG(ERROR) << "Failed to delete the timer fd=" << timer_fd << " with errno=" << errno; + if (epoll_ctl(g_epollFd, EPOLL_CTL_DEL, timerFd, NULL) != 0) { + LOG(ERROR) << "Failed to delete the timer fd=" << timerFd << " with errno=" << errno; } - CloseTimerFd((uint32_t)timer_fd); - atomic_fetch_sub(&g_total_timer_num, 1); + CloseTimerFd((uint32_t)timerFd); + atomic_fetch_sub(&g_totalTimerNum, 1); LOG(ERROR) << "Failed to set timer"; return -1; } - return timer_fd; + return timerFd; } uint32_t GetActiveTimerNum(void) { - return atomic_load(&g_total_timer_num); + return atomic_load(&g_totalTimerNum); } void CloseTimerFd(uint32_t fd) { - g_timer_fd_ctx_map[fd].cb = NULL; - g_timer_fd_ctx_map[fd].args = NULL; - g_timer_fd_ctx_map[fd].status = TIMER_CONTEXT_NOT_USING; - g_timer_fd_ctx_map[fd].fd = 0; - g_timer_fd_ctx_map[fd].periodical = 0; + g_timerFdCtxMap[fd].cb = NULL; + g_timerFdCtxMap[fd].args = NULL; + g_timerFdCtxMap[fd].status = TIMER_CONTEXT_NOT_USING; + g_timerFdCtxMap[fd].fd = 0; + g_timerFdCtxMap[fd].periodical = 0; if (close((int)fd) != 0) { LOG(ERROR) << "Failed to close timer fd=" << fd << " errno=" << errno; return; @@ -344,19 +350,19 @@ void CloseTimerFd(uint32_t fd) void TimerModuleDestroy(void) { - uint32_t max_fd = max_system_fd; - if (g_timer_fd_ctx_map) { - for (uint32_t fd = 0; fd < max_fd; fd++) { - if (g_timer_fd_ctx_map[fd].status != TIMER_CONTEXT_NOT_USING) { + uint32_t maxFd = maxSystemFd; + if (g_timerFdCtxMap) { + for (uint32_t fd = 0; fd < maxFd; fd++) { + if (g_timerFdCtxMap[fd].status != TIMER_CONTEXT_NOT_USING) { DeleteTimerSafe(fd); } } } - close(g_epoll_fd); - g_epoll_fd = -1; - g_total_timer_num = 0; - g_timer_module_initialized = 0; - int32_t ret = pthread_join(g_epoll_execute_thread, NULL); + close(g_epollFd); + g_epollFd = -1; + g_totalTimerNum = 0; + g_timerModuleInitialized = 0; + int32_t ret = pthread_join(g_epollExecuteThread, NULL); if (ret != EOK) { LOG(ERROR) << "Failed to join pthread, during destroying timer module. ret=" << ret; return; @@ -365,20 +371,20 @@ void TimerModuleDestroy(void) RETURN_CODE TimerFdCtxValidate(uint32_t fd) { - if (fd >= max_system_fd) { - LOG(ERROR) << "TimerFd=" << fd << " is out of range=" << max_system_fd; - return HLC_ERR; + if (fd >= maxSystemFd) { + LOG(ERROR) << "TimerFd=" << fd << " is out of range=" << maxSystemFd; + return UBRING_ERR; } - if (g_timer_fd_ctx_map[fd].status == TIMER_CONTEXT_NOT_USING) { - LOG(ERROR) << "TimerFd=" << fd << " has wrong status=" << g_timer_fd_ctx_map[fd].status; - return HLC_ERR; + if (g_timerFdCtxMap[fd].status == TIMER_CONTEXT_NOT_USING) { + LOG(ERROR) << "TimerFd=" << fd << " has wrong status=" << g_timerFdCtxMap[fd].status; + return UBRING_ERR; } - if (g_timer_fd_ctx_map[fd].cb == NULL) { + if (g_timerFdCtxMap[fd].cb == NULL) { LOG(ERROR) << "The callback is not set."; - return HLC_ERR; + return UBRING_ERR; } - return HLC_OK; + return UBRING_OK; } } } \ No newline at end of file diff --git a/src/brpc/ub/timer/timer_mgr.h b/src/brpc/ubring/timer/timer_mgr.h similarity index 94% rename from src/brpc/ub/timer/timer_mgr.h rename to src/brpc/ubring/timer/timer_mgr.h index 01e81c968a..0ef1532715 100644 --- a/src/brpc/ub/timer/timer_mgr.h +++ b/src/brpc/ubring/timer/timer_mgr.h @@ -20,13 +20,13 @@ #include #include #include -#include "brpc/ub/common/common.h" +#include "brpc/ubring/common/common.h" #define MAX_TIMER 1024 #define TIMER_EPOLL_WAIT_TIMEOUT 1000 namespace brpc { -namespace ub { +namespace ubring { typedef enum { TIMER_CONTEXT_NOT_USING, TIMER_CONTEXT_EPOLL_WAITING, @@ -39,7 +39,7 @@ typedef struct { uint32_t fd; TimerFdCtxStatus status; uint32_t periodical; - pthread_spinlock_t spin_lock; + pthread_spinlock_t spinLock; } TimerFdCtx; RETURN_CODE TimerInit(void); diff --git a/src/brpc/ub/ub_endpoint.cpp b/src/brpc/ubring/ub_endpoint.cpp similarity index 92% rename from src/brpc/ub/ub_endpoint.cpp rename to src/brpc/ubring/ub_endpoint.cpp index 6b7872c74d..4d0bffd735 100644 --- a/src/brpc/ub/ub_endpoint.cpp +++ b/src/brpc/ubring/ub_endpoint.cpp @@ -27,16 +27,18 @@ #include "brpc/input_messenger.h" #include "brpc/socket.h" #include "brpc/reloadable_flags.h" -#include "brpc/ub/ub_helper.h" -#include "brpc/ub/ub_endpoint.h" +#include "brpc/ubring/ub_helper.h" +#include "brpc/ubring/ub_endpoint.h" +#include "brpc/ubring/shm/shm_def.h" +#include "brpc/ubring/common/common.h" #include "brpc/ub_transport.h" -#include "brpc/ub/ubr_trx.h" +#include "brpc/ubring/ubr_trx.h" DECLARE_int32(task_group_ntags); namespace brpc { DECLARE_bool(log_connection_close); -namespace ub { +namespace ubring { extern bool g_skip_ub_init; DEFINE_int32(data_queue_size, 4, "data queue size for UB"); @@ -61,7 +63,7 @@ static uint16_t g_ub_impl_version = 1; static const uint32_t ACK_MSG_UB_OK = 0x1; -static butil::Mutex* g_rdma_resource_mutex = NULL; +static butil::Mutex* g_ubring_resource_mutex = NULL; struct HelloMessage { void Serialize(void* data) const; @@ -235,7 +237,7 @@ bool HelloNegotiationValid(HelloMessage& msg) { return false; } -static const int wait_timeout_ms = 50; +static const int WAIT_TIMEOUT_MS = 50; int UBShmEndpoint::ReadFromFd(void* data, size_t len) { CHECK(data != NULL); @@ -243,7 +245,7 @@ int UBShmEndpoint::ReadFromFd(void* data, size_t len) { size_t received = 0; do { const int expected_val = _read_butex->load(butil::memory_order_acquire); - const timespec duetime = butil::milliseconds_from_now(wait_timeout_ms); + const timespec duetime = butil::milliseconds_from_now(WAIT_TIMEOUT_MS); nr = read(_socket->fd(), (uint8_t*)data + received, len - received); if (nr < 0) { if (errno == EAGAIN) { @@ -270,7 +272,7 @@ int UBShmEndpoint::WriteToFd(void* data, size_t len) { int nw = 0; size_t written = 0; do { - const timespec duetime = butil::milliseconds_from_now(wait_timeout_ms); + const timespec duetime = butil::milliseconds_from_now(WAIT_TIMEOUT_MS); nw = write(_socket->fd(), (uint8_t*)data + written, len - written); if (nw < 0) { if (errno == EAGAIN) { @@ -333,7 +335,7 @@ void* UBShmEndpoint::ProcessHandshakeAtClient(void* arg) { if (ep->WriteToFd(data, g_ub_hello_msg_len) < 0) { const int saved_errno = errno; PLOG(WARNING) << "Fail to send hello message to server:" << s->description(); - s->SetFailed(saved_errno, "Fail to complete rdma handshake from %s: %s", + s->SetFailed(saved_errno, "Fail to complete ubring handshake from %s: %s", s->description().c_str(), berror(saved_errno)); ep->_state = FAILED; return NULL; @@ -343,14 +345,14 @@ void* UBShmEndpoint::ProcessHandshakeAtClient(void* arg) { if (ep->ReadFromFd(data, MAGIC_STR_LEN) < 0) { const int saved_errno = errno; PLOG(WARNING) << "Fail to get hello message from server:" << s->description(); - s->SetFailed(saved_errno, "Fail to complete rdma handshake from %s: %s", + s->SetFailed(saved_errno, "Fail to complete ubring handshake from %s: %s", s->description().c_str(), berror(saved_errno)); ep->_state = FAILED; return NULL; } if (memcmp(data, MAGIC_STR, MAGIC_STR_LEN) != 0) { LOG(WARNING) << "Read unexpected data during handshake:" << s->description(); - s->SetFailed(EPROTO, "Fail to complete rdma handshake from %s: %s", + s->SetFailed(EPROTO, "Fail to complete ubring handshake from %s: %s", s->description().c_str(), berror(EPROTO)); ep->_state = FAILED; return NULL; @@ -359,7 +361,7 @@ void* UBShmEndpoint::ProcessHandshakeAtClient(void* arg) { if (ep->ReadFromFd(data, HELLO_MSG_LEN_MIN - MAGIC_STR_LEN) < 0) { const int saved_errno = errno; PLOG(WARNING) << "Fail to get Hello Message from server:" << s->description(); - s->SetFailed(saved_errno, "Fail to complete rdma handshake from %s: %s", + s->SetFailed(saved_errno, "Fail to complete ubring handshake from %s: %s", s->description().c_str(), berror(saved_errno)); ep->_state = FAILED; return NULL; @@ -369,7 +371,7 @@ void* UBShmEndpoint::ProcessHandshakeAtClient(void* arg) { if (remote_msg.msg_len < HELLO_MSG_LEN_MIN) { LOG(WARNING) << "Fail to parse Hello Message length from server:" << s->description(); - s->SetFailed(EPROTO, "Fail to complete rdma handshake from %s: %s", + s->SetFailed(EPROTO, "Fail to complete ubring handshake from %s: %s", s->description().c_str(), berror(EPROTO)); ep->_state = FAILED; return NULL; @@ -404,7 +406,7 @@ void* UBShmEndpoint::ProcessHandshakeAtClient(void* arg) { if (ep->WriteToFd(data, ACK_MSG_LEN) < 0) { const int saved_errno = errno; PLOG(WARNING) << "Fail to send Ack Message to server:" << s->description(); - s->SetFailed(saved_errno, "Fail to complete rdma handshake from %s: %s", + s->SetFailed(saved_errno, "Fail to complete ubring handshake from %s: %s", s->description().c_str(), berror(saved_errno)); ep->_state = FAILED; return NULL; @@ -413,7 +415,7 @@ void* UBShmEndpoint::ProcessHandshakeAtClient(void* arg) { if (ub_transport->_ub_state == UBShmTransport::UB_ON) { ep->_state = ESTABLISHED; LOG_IF(INFO, FLAGS_ub_trace_verbose) - << "Client handshake ends (use rdma) on " << s->description(); + << "Client handshake ends (use ubring) on " << s->description(); } else { ep->_state = FALLBACK_TCP; LOG_IF(INFO, FLAGS_ub_trace_verbose) @@ -438,7 +440,7 @@ void* UBShmEndpoint::ProcessHandshakeAtServer(void* arg) { if (ep->ReadFromFd(data, MAGIC_STR_LEN) < 0) { const int saved_errno = errno; PLOG(WARNING) << "Fail to read Hello Message from client:" << s->description() << " " << s->_remote_side; - s->SetFailed(saved_errno, "Fail to complete rdma handshake from %s: %s", + s->SetFailed(saved_errno, "Fail to complete ubring handshake from %s: %s", s->description().c_str(), berror(saved_errno)); ep->_state = FAILED; return NULL; @@ -458,7 +460,7 @@ void* UBShmEndpoint::ProcessHandshakeAtServer(void* arg) { if (ep->ReadFromFd(data, g_ub_hello_msg_len - MAGIC_STR_LEN) < 0) { const int saved_errno = errno; PLOG(WARNING) << "Fail to read Hello Message from client:" << s->description(); - s->SetFailed(saved_errno, "Fail to complete rdma handshake from %s: %s", + s->SetFailed(saved_errno, "Fail to complete ubring handshake from %s: %s", s->description().c_str(), berror(saved_errno)); ep->_state = FAILED; return NULL; @@ -469,7 +471,7 @@ void* UBShmEndpoint::ProcessHandshakeAtServer(void* arg) { if (remote_msg.msg_len < HELLO_MSG_LEN_MIN) { LOG(WARNING) << "Fail to parse Hello Message length from client:" << s->description(); - s->SetFailed(EPROTO, "Fail to complete rdma handshake from %s: %s", + s->SetFailed(EPROTO, "Fail to complete ubring handshake from %s: %s", s->description().c_str(), berror(EPROTO)); ep->_state = FAILED; return NULL; @@ -485,21 +487,21 @@ void* UBShmEndpoint::ProcessHandshakeAtServer(void* arg) { ub_transport->_ub_state = UBShmTransport::UB_OFF; } else { ep->_state = S_ALLOC_SHM; - SHM remote_trx_shm = {NULL, remote_msg.len, 0, {0}, (uint8_t)ep->_socket->fd()}; + ubring::SHM remote_trx_shm = {NULL, remote_msg.len, 0, {0}, (uint8_t)ep->_socket->fd()}; strncpy(remote_trx_shm.name, remote_msg.shm_name, SHM_MAX_NAME_BUFF_LEN); size_t local_shm_len = (size_t)(FLAGS_data_queue_size) * MB_TO_BYTE; // server端共享内存名称 - SHM local_trx_shm = {NULL, local_shm_len, 0, {0}, (uint8_t)ep->_socket->fd()}; - char client_name[SHM_MAX_NAME_BUFF_LEN]; - strncpy(client_name, remote_msg.shm_name, SHM_MAX_NAME_BUFF_LEN); + ubring::SHM local_trx_shm = {NULL, local_shm_len, 0, {0}, (uint8_t)ep->_socket->fd()}; + char clientName[SHM_MAX_NAME_BUFF_LEN]; + strncpy(clientName, remote_msg.shm_name, SHM_MAX_NAME_BUFF_LEN); - char *client_ip_port = strrchr(client_name, '_'); - if (client_ip_port != NULL) { - *client_ip_port = '\0'; + char *clientIpPort = strrchr(clientName, '_'); + if (clientIpPort != NULL) { + *clientIpPort = '\0'; } int result = snprintf(local_trx_shm.name, SHM_MAX_NAME_BUFF_LEN, "%s_%s", - client_name, SERVER_SHM_NAME_SUFFIX); + clientName, SERVER_SHM_NAME_SUFFIX); if (UNLIKELY(result < 0)) { LOG(WARNING) << "Copy client shared memory name failed, ret=" << result; ub_transport->_ub_state = UBShmTransport::UB_OFF; @@ -538,7 +540,7 @@ void* UBShmEndpoint::ProcessHandshakeAtServer(void* arg) { if (ep->ReadFromFd(data, ACK_MSG_LEN) < 0) { const int saved_errno = errno; PLOG(WARNING) << "Fail to read ack message from client:" << s->description(); - s->SetFailed(saved_errno, "Fail to complete rdma handshake from %s: %s", + s->SetFailed(saved_errno, "Fail to complete ubring handshake from %s: %s", s->description().c_str(), berror(saved_errno)); ep->_state = FAILED; return NULL; @@ -558,7 +560,7 @@ void* UBShmEndpoint::ProcessHandshakeAtServer(void* arg) { ub_transport->_ub_state = UBShmTransport::UB_ON; ep->_state = ESTABLISHED; LOG_IF(INFO, FLAGS_ub_trace_verbose) - << "Server handshake ends (use rdma) on " << s->description(); + << "Server handshake ends (use ubring) on " << s->description(); } } else { ub_transport->_ub_state = UBShmTransport::UB_OFF; @@ -609,7 +611,7 @@ ssize_t UBShmEndpoint::CutFromIOBufList(butil::IOBuf** from, size_t ndata) { if (UNLIKELY(nw == -1)) { LOG(ERROR) << "Non-blocking send msg in failed, connection has been closed."; errno = EPIPE; - } else if (UNLIKELY(nw == HLC_RETRY)) { + } else if (UNLIKELY(nw == UBRING_RETRY)) { errno = EAGAIN; nw = -1; } @@ -626,7 +628,7 @@ ssize_t UBShmEndpoint::CutFromIOBufList(butil::IOBuf** from, size_t ndata) { return nw; } -int UBShmEndpoint::AllocateClientResources(SHM* local_trx_shm, const char* shm_name) { +int UBShmEndpoint::AllocateClientResources(ubring::SHM* local_trx_shm, const char* shm_name) { if (BAIDU_UNLIKELY(g_skip_ub_init)) { // For UT return 0; @@ -651,7 +653,7 @@ int UBShmEndpoint::AllocateClientResources(SHM* local_trx_shm, const char* shm_n return 0; } -int UBShmEndpoint::AllocateServerResources(SHM* remote_trx_shm, SHM* local_trx_shm) { +int UBShmEndpoint::AllocateServerResources(ubring::SHM* remote_trx_shm, ubring::SHM* local_trx_shm) { if (BAIDU_UNLIKELY(g_skip_ub_init)) { // For UT return 0; @@ -693,7 +695,7 @@ void UBShmEndpoint::DeallocateResources() { } } -void UBShmEndpoint::PollIn(UBShmEndpoint* ep, uint32_t ep_event) { +void UBShmEndpoint::PollIn(UBShmEndpoint* ep, uint32_t epEvent) { SocketUniquePtr s; if (Socket::Address(ep->_socket->id(), &s) < 0) { return; @@ -703,7 +705,7 @@ void UBShmEndpoint::PollIn(UBShmEndpoint* ep, uint32_t ep_event) { InputMessageClosure last_msg; while (true) { - int ret = ep->_ub_ring->IsUbrTrxReadable(ep_event); + int ret = ep->_ub_ring->IsUbrTrxReadable(epEvent); if (ret < 0) { return; } @@ -755,7 +757,7 @@ void UBShmEndpoint::PollIn(UBShmEndpoint* ep, uint32_t ep_event) { } } -void UBShmEndpoint::PollOut(UBShmEndpoint* ep, uint32_t ep_event) { +void UBShmEndpoint::PollOut(UBShmEndpoint* ep, uint32_t epEvent) { SocketUniquePtr s; if (Socket::Address(ep->_socket->id(), &s) < 0) { return; @@ -769,7 +771,7 @@ void UBShmEndpoint::PollOut(UBShmEndpoint* ep, uint32_t ep_event) { } int UBShmEndpoint::GlobalInitialize() { - g_rdma_resource_mutex = new butil::Mutex; + g_ubring_resource_mutex = new butil::Mutex; _poller_groups = std::vector(FLAGS_task_group_ntags); return 0; } @@ -862,7 +864,7 @@ int UBShmEndpoint::PollingModeInitialize(bthread_tag_t tag, pollers[i].release_fn = release_fn; auto rc = bthread_start_background(&pollers[i].tid, &attr, fn, args); if (rc != 0) { - LOG(ERROR) << "Fail to start rdma polling bthread"; + LOG(ERROR) << "Fail to start ubring polling bthread"; return -1; } } @@ -889,7 +891,7 @@ void UBShmEndpoint::PollerRegisterEvent(CqSidOp::OpType op, uint32_t events) { } } -} // namespace ub +} // namespace ubring } // namespace brpc #endif // if BRPC_WITH_UBRING diff --git a/src/brpc/ub/ub_endpoint.h b/src/brpc/ubring/ub_endpoint.h similarity index 94% rename from src/brpc/ub/ub_endpoint.h rename to src/brpc/ubring/ub_endpoint.h index 0d5ea49003..e63513165c 100644 --- a/src/brpc/ub/ub_endpoint.h +++ b/src/brpc/ubring/ub_endpoint.h @@ -30,13 +30,14 @@ #include "butil/macros.h" #include "butil/containers/mpsc_queue.h" #include "brpc/socket.h" -#include "brpc/ub/ub_helper.h" -#include "brpc/ub/ub_ring.h" +#include "brpc/ubring/ub_helper.h" +#include "brpc/ubring/ub_ring.h" +#include "brpc/ubring/shm/shm_def.h" namespace brpc { class Socket; -namespace ub { +namespace ubring { DECLARE_int32(ub_poller_num); DECLARE_bool(ub_edisp_unsched); @@ -75,7 +76,7 @@ friend class Socket; // Reset the endpoint (for next use) void Reset(); - // Cut data from the given IOBuf list and use RDMA to send + // Cut data from the given IOBuf list and use UBRING to send // Return bytes cut if success, -1 if failed and errno set ssize_t CutFromIOBufList(butil::IOBuf** data, size_t ndata); @@ -148,15 +149,16 @@ friend class Socket; // return -1 if encounter other errno (including EOF) int ReadFromFd(void* data, size_t len); + // Write at most len bytes from data to fd in _socket // wait for _epollout_butex if encounter EAGAIN // return -1 if encounter other errno int WriteToFd(void* data, size_t len); // Poll CQ and get the work completion - static void PollIn(UBShmEndpoint* ep, uint32_t ep_event); + static void PollIn(UBShmEndpoint* ep, uint32_t epEvent); - static void PollOut(UBShmEndpoint* ep, uint32_t ep_event); + static void PollOut(UBShmEndpoint* ep, uint32_t epEvent); // Try to read data on TCP fd in _socket inline void TryReadOnTcp(); @@ -167,7 +169,7 @@ friend class Socket; State _state; // ub resource - UBRing* _ub_ring{nullptr}; + ubring::UBRing* _ub_ring{nullptr}; SocketId _cq_sid; @@ -220,7 +222,7 @@ friend class Socket; void PollerRegisterEvent(CqSidOp::OpType op, uint32_t events = EPOLLET); }; -} // namespace ub +} // namespace ubring } // namespace brpc #else // if BRPC_WITH_UBRING diff --git a/src/brpc/ub/ub_helper.cpp b/src/brpc/ubring/ub_helper.cpp similarity index 88% rename from src/brpc/ub/ub_helper.cpp rename to src/brpc/ubring/ub_helper.cpp index 62bb4cdd12..9b397cbb63 100644 --- a/src/brpc/ub/ub_helper.cpp +++ b/src/brpc/ubring/ub_helper.cpp @@ -24,12 +24,12 @@ #include #include "butil/logging.h" #include "brpc/socket.h" -#include "brpc/ub/ub_endpoint.h" -#include "brpc/ub/ub_helper.h" -#include "ub_ring_manager.h" +#include "brpc/ubring/ub_endpoint.h" +#include "brpc/ubring/ub_helper.h" +#include "brpc/ubring/ub_ring_manager.h" namespace brpc { -namespace ub { +namespace ubring { void* g_handle_ub = NULL; bool g_skip_ub_init = false; @@ -70,7 +70,7 @@ static void GlobalUBInitializeOrDieImpl() { } if (UBShmEndpoint::GlobalInitialize() < 0) { - LOG(ERROR) << "rdma_recv_block_type incorrect " + LOG(ERROR) << "ubring_recv_block_type incorrect " << "(valid value: default/large/huge)"; ExitWithError(); } @@ -78,10 +78,10 @@ static void GlobalUBInitializeOrDieImpl() { g_ub_available.store(true, butil::memory_order_relaxed); } -static pthread_once_t initialize_ub_once = PTHREAD_ONCE_INIT; +static pthread_once_t initialize_UB_once = PTHREAD_ONCE_INIT; void GlobalUBInitializeOrDie() { - if (pthread_once(&initialize_ub_once, + if (pthread_once(&initialize_UB_once, GlobalUBInitializeOrDieImpl) != 0) { LOG(FATAL) << "Fail to pthread_once GlobalUBInitializeOrDie"; exit(1); @@ -116,7 +116,7 @@ bool InitPollingModeWithTag(bthread_tag_t tag, return false; } -} // namespace ub +} // namespace ubring } // namespace brpc #else @@ -125,10 +125,10 @@ bool InitPollingModeWithTag(bthread_tag_t tag, #include "butil/logging.h" namespace brpc { -namespace ub { +namespace ubring { void GlobalUBInitializeOrDie() { - LOG(ERROR) << "brpc is not compiled with rdma. To enable it, please refer to " - << "https://github.com/apache/brpc/blob/master/docs/en/rdma.md"; + LOG(ERROR) << "brpc is not compiled with ubring. To enable it, please refer to " + << "https://github.com/apache/brpc/blob/master/docs/en/ubring.md"; exit(1); } } diff --git a/src/brpc/ub/ub_helper.h b/src/brpc/ubring/ub_helper.h similarity index 94% rename from src/brpc/ub/ub_helper.h rename to src/brpc/ubring/ub_helper.h index 958e918e9a..4de3ead291 100644 --- a/src/brpc/ub/ub_helper.h +++ b/src/brpc/ubring/ub_helper.h @@ -26,7 +26,7 @@ namespace brpc { - namespace ub { + namespace ubring { void GlobalRelease(); @@ -46,11 +46,11 @@ namespace brpc { // If the given protocol supported by UB bool SupportedByUB(std::string protocol); - } // namespace ub + } // namespace ubring } // namespace brpc #else namespace brpc { - namespace ub { + namespace ubring { void GlobalRelease(); @@ -58,7 +58,7 @@ namespace brpc { // Exit if failed void GlobalUBInitializeOrDie(); - } // namespace ub + } // namespace ubring } // namespace brpc #endif // if BRPC_WITH_UBRING diff --git a/src/brpc/ubring/ub_ring.cpp b/src/brpc/ubring/ub_ring.cpp new file mode 100644 index 0000000000..97e5371924 --- /dev/null +++ b/src/brpc/ubring/ub_ring.cpp @@ -0,0 +1,1048 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include "butil/logging.h" +#include "brpc/ubring/ub_ring.h" + +namespace brpc { +namespace ubring { +uint32_t g_sleepTime[UBR_TASK_STEP_NUM] = {0}; +#define TIME_COVERSION 1000 +DEFINE_int32(ub_disconnect_timeout, 1, "Ubshm disconnection timeout."); +DEFINE_int32(ub_connect_timeout, 1, "Ubshm connection timeout."); +DEFINE_int32(ub_hb_timer_interval, 1, "Heartbeat timer interval."); +DEFINE_int32(ub_hb_retry_cnt, 3, "Heartbeat retry times."); +DEFINE_int32(ub_event_queue_timer_interval, 100, "Interval of the disconnection timer."); + +UBRing::UBRing() +{} +UBRing::~UBRing() +{} + +RETURN_CODE UBRing::UbrTrxMapShm(SHM *localShm, SHM *remoteShm) +{ + RETURN_CODE rc = UbrTrxMapLocalShm(localShm); + if (UNLIKELY(rc != UBRING_OK)) { + LOG(ERROR) << "Trx map local shared memory failed."; + return rc; + } + rc = UbrTrxMapRemoteShm(remoteShm); + if (UNLIKELY(rc != UBRING_OK)) { + LOG(ERROR) << "Trx map remote shared memory failed."; + return rc; + } + return UBRING_OK; +} + +RETURN_CODE UBRing::UbrTrxClose() { + if (UNLIKELY(UbrTrxCloseCheck(_trx) != UBRING_OK)) { + return UBRING_ERR; + } + ((UbrEventQMsg *)_trx->ubrRx.remoteTxEventQ.addr)->flag = UBR_STATE_CLOSING; + + uint32_t disconnectTimeout = FLAGS_ub_disconnect_timeout; + uint64_t startTime = GetCurNanoSeconds(); + + if (_trx->ubrTx.localTxEventQ.addr != nullptr && ((UbrEventQMsg *)_trx->ubrTx.localTxEventQ.addr)->flag == UBR_STATE_CONNECTED) { + ((UbrEventQMsg *)_trx->ubrTx.localTxEventQ.addr)->flag = UBR_STATE_CLOSED; + _trx->ubrTx.trxState = UBR_STATE_CLOSED; + } + + ((UbrEventQMsg *)_trx->ubrTx.remoteRxEventQ.addr)->flag = UBR_STATE_CLOSED; + while (_trx->ubrRx.localRxEventQ.addr != nullptr && ((UbrEventQMsg *)_trx->ubrRx.localRxEventQ.addr)->flag != UBR_STATE_CLOSED) { + UbrSetSleepTask(UBR_TASK_CLOSE); + if (HasTimedOut(startTime, disconnectTimeout) != UBRING_OK) { + LOG(ERROR) << "Local shm " << _trx->localShm.name + << " wait for the peer to close the connection failed."; + _trx->ubrRx.trxState = UBR_STATE_CLOSED; + ClearTrxResource(_trx, startTime, UBR_SEND_CLOSE); + return UBRING_ERR_TIMEOUT; + } + usleep(1); + } + _trx->ubrRx.trxState = UBR_STATE_CLOSED; + RETURN_CODE rc; + if (UNLIKELY((rc = ClearTrxResource(_trx, startTime, UBR_SEND_CLOSE)) != UBRING_OK)) { + LOG(ERROR) << "Trx close, clear trx resource failed, trx local name=" << _trx->localShm.name; + return UBRING_ERR; + } + LOG(INFO) << "The peer is closed, local name=" << _trx->localShm.name; + return UBRING_OK; +} + +RETURN_CODE UBRing::UbrAddCloseTimer() { + if (UNLIKELY(_trx == NULL)) { + LOG(ERROR) << "Trx add close timer failed, trx is null."; + return UBRING_ERR; + } + + uint32_t eventQTimerInterval = FLAGS_ub_event_queue_timer_interval * TIME_COVERSION; + struct itimerspec timeSpec = { + .it_interval = {.tv_sec = 0, .tv_nsec = eventQTimerInterval}, + .it_value = {.tv_sec = 0, .tv_nsec = 1} + }; + int timerFd = TimerStart(&timeSpec, UbrTrxCloseCallback, (void*)_trx); + if (UNLIKELY(timerFd == -1)) { + LOG(ERROR) << "Start ubr close timer failed, trx local name=" << _trx->localShm.name; + return UBRING_ERR; + } + _trx->timerFd = timerFd; + return UBRING_OK; +} + +RETURN_CODE UBRing::UbrAddTimer() { + if (UNLIKELY(UbrAddCloseTimer() != UBRING_OK)) { + LOG(ERROR) << "Ubr " << _trx->localShm.name << " add closed timer failed."; + return UBRING_ERR; + } + + if (UNLIKELY(UbrAddHBTimer() != UBRING_OK)) { + DeleteTimerSafe((uint32_t)_trx->timerFd); + LOG(ERROR) << "Ubr " << _trx->localShm.name << " add heartbeat timer failed."; + return UBRING_ERR; + } + return UBRING_OK; +} + +void* UBRing::UbrTrxCloseCallback(void* args) { + auto* trx = (UbrTrx*) args; + if (UNLIKELY(UBRing::UbrTrxCallbackCheck(trx) != UBRING_OK)) { + return nullptr; + } + + auto* localRxEventQ = (UbrEventQMsg *)trx->ubrRx.localRxEventQ.addr; + auto* localTxEventQ = (UbrEventQMsg *)trx->ubrTx.localTxEventQ.addr; + if (localRxEventQ->flag != UBR_STATE_CLOSED || localTxEventQ->flag == UBR_STATE_CLOSED) { + return nullptr; + } + trx->ubrRx.trxState = UBR_STATE_CLOSED; + int fd = (int)trx->localShm.fd; + do { + if (ATOMIC_LOAD(trx->closeCnt) == 0) { + LOG(ERROR) << "Trx close callback failed, exist other closing call, name=" << trx->localShm.name; + break; + } + ATOMIC_SUB(trx->closeCnt, 1); + + uint64_t startTime = GetCurNanoSeconds(); + + if (localTxEventQ->flag == UBR_STATE_CONNECTED || ATOMIC_LOAD(trx->closeCnt) == 1) { + localTxEventQ->flag = UBR_STATE_CLOSED; + trx->ubrTx.trxState = UBR_STATE_CLOSED; + } + UbrEventQMsg* remoteRxEventQ = (UbrEventQMsg *)trx->ubrTx.remoteRxEventQ.addr; + if (remoteRxEventQ == nullptr) { + LOG(ERROR) << "Trx close callback failed, " << trx->localShm.name << " remoteRxEventQ is NULL."; + break; + } + remoteRxEventQ->flag = UBR_STATE_CLOSED; + if (UNLIKELY(ClearTrxResource(trx, startTime, UBR_CALL_BACK_CLOSE, 1) != UBRING_OK)) { + LOG(ERROR) << "Trx close callback failed, " << trx->localShm.name << " clear trx resource failed."; + break; + } + } while (0); + return nullptr; +} + +RETURN_CODE UBRing::UbrAddHBTimer() { + if (UNLIKELY(_trx == NULL)) { + LOG(ERROR) << "Trx add heartbeat timer failed, trx is null."; + return UBRING_ERR; + } + + struct itimerspec timeSpec = { + .it_interval = {.tv_sec = FLAGS_ub_hb_timer_interval, .tv_nsec = 0}, + .it_value = {.tv_sec = 0, .tv_nsec = 1} + }; + int timerFd = TimerStart(&timeSpec, UbrTrxHBCallback, (void*)_trx); + if (UNLIKELY(timerFd == -1)) { + LOG(ERROR) << "Start ubr heartbeat timer failed."; + return UBRING_ERR; + } + _trx->hbTimerFd = timerFd; + return UBRING_OK; +} + +RETURN_CODE UBRing::UbrPassiveClearTrx(UbrTrx *trx, int fd, PASSIVE_DISC_TYPE type) { + if (UNLIKELY(UbrTrxCloseCheck(trx) != UBRING_OK)) { + return UBRING_ERR; + } + trx->ubrTx.trxState = UBR_STATE_CLOSED; + trx->ubrRx.trxState = UBR_STATE_CLOSED; + DeleteTimerSafe((uint32_t)trx->timerFd); + const char *typeName = NULL; + if (type == UBR_HEARTBEAT) { + DeleteTimer((uint32_t)trx->hbTimerFd); + typeName = "Trx heartbeat"; + } else if (type == UBR_UB_EVENT) { + DeleteTimerSafe((uint32_t)trx->hbTimerFd); + typeName = "Ub event callback"; + } + sleep(FLAGS_ub_flying_io_timeout); + + int rc = ShmLocalFree(&trx->remoteShm); + if (rc != UBRING_OK) { + LOG(ERROR) << typeName << ", delete remote shm failed. ret=" << rc; + } + rc = ShmLocalFree(&trx->localShm); + if (rc != UBRING_OK) { + LOG(ERROR) << typeName << ", delete local shm failed. ret=" << rc; + } + + UBRingManager::ReleaseUbrTrxFromMgr(trx); + return UBRING_OK; +} + +void* UBRing::UbrTrxHBCallback(void* args) { + auto* trx = (UbrTrx*) args; + if (UNLIKELY(UbrTrxCallbackCheck(trx) != UBRING_OK)) { + return NULL; + } + + auto* localDataStatus = (UbrDataStatusQMsg *)trx->ubrTx.localDataStatusQ.addr; + auto* remoteDataStatus = (UbrDataStatusQMsg *)trx->ubrRx.remoteDataStatusQ.addr; + if (UNLIKELY(localDataStatus == NULL || remoteDataStatus == NULL)) { + LOG(ERROR) << "Heartbeat error, datastatus is NULL."; + return NULL; + } + + if (trx->ubrTx.trxState != UBR_STATE_CONNECTED || trx->ubrRx.trxState != UBR_STATE_CONNECTED) { + LOG_EVERY_SECOND(INFO) << "Heartbeat cannot be started, wait connected state."; + return NULL; + } + + remoteDataStatus->heartBeat = 1; + if (localDataStatus->heartBeat == 1) { + localDataStatus->heartBeat = 0; + trx->ubrTx.hbRetryCnt = 0; + return NULL; + } + + ++trx->ubrTx.hbRetryCnt; + if (trx->ubrTx.hbRetryCnt <= FLAGS_ub_hb_retry_cnt) { + return NULL; + } + + int fd = (int)trx->localShm.fd; + LOG(INFO) << "Hlc heartbeat, start to clear trx resource. hbTimerFd=" << fd << ", shmName=" << trx->localShm.name; + UbrPassiveClearTrx(trx, fd, UBR_HEARTBEAT); + LOG(INFO) << "Hlc heartbeat clear trx resource finish."; + return NULL; +} + +RETURN_CODE UBRing::UbrAddAsynClearTimer(UbrTrx *trx) { + if (UNLIKELY(trx == NULL)) { + LOG(ERROR) << "Trx add close timer failed, trx is null."; + return UBRING_ERR; + } + + struct itimerspec timeSpec = { + .it_interval = {.tv_sec = 0, .tv_nsec = 0}, + .it_value = {.tv_sec = FLAGS_ub_flying_io_timeout, .tv_nsec = 0} + }; + + int timerFd = TimerStart(&timeSpec, UbrAsynClearCallback, (void*)trx); + if (UNLIKELY(timerFd == -1)) { + LOG(ERROR) << "Start ubr close timer failed, trx name=%s.", trx->localShm.name; + return UBRING_ERR; + } + trx->clearTimerFd = timerFd; + return UBRING_OK; +} + +void *UBRing::UbrAsynClearCallback(void *args) +{ + auto* trx = (UbrTrx*) args; + if (UNLIKELY(trx == NULL)) { + LOG(ERROR) << "Trx close, trx is null."; + return NULL; + } + + if (UNLIKELY(ShmRemoteFree(&trx->remoteShm) != UBRING_OK)) { + LOG(ERROR) << "Trx close, remote shm " << trx->remoteShm.name << " free failed."; + } + + if (UNLIKELY(UbrTrxFreeShm(trx) != UBRING_OK)) { + LOG(ERROR) << "Trx close, wait for local shm " << trx->localShm.name << " free fail."; + } + + if (UNLIKELY(UBRingManager::ReleaseUbrTrxFromMgr(trx) != UBRING_OK)) { + LOG(ERROR) << "Trx close, release shm " << trx->localShm.name << " trx failed."; + } + return NULL; +} + +int UBRing::UbrTrxSend(const void *buf, uint32_t bufLen) +{ + if (UNLIKELY(CheckTrxSendPreCheck(_trx) != UBRING_OK)) { + return UBRING_ERR; + } + // 1.2 计算空间 + auto *dataStatusMsg = (UbrDataStatusQMsg *)_trx->ubrTx.localDataStatusQ.addr; + auto *dataMsg = (UbrMsgFormat *)_trx->ubrTx.remoteDataQ.addr; + uint32_t cap = _trx->ubrTx.capacity; + uint32_t tail = dataStatusMsg->tail; + uint32_t remainChunkNum = + (_trx->ubrTx.writePos > tail) ? (tail + cap - _trx->ubrTx.writePos) : (tail - _trx->ubrTx.writePos); + uint32_t needMsgChunkNum = CalcUbrMsgChunkCnt(bufLen); + if (remainChunkNum < needMsgChunkNum) { + return UBRING_RETRY; + } + UbrMsgFormat *msg = &(_trx->ubrTx.localMsgSpace); + uint32_t totalSendLen = 0; + uint32_t remainBufLen = bufLen; + uint8_t isLastPkt = 0; + _trx->ubrTx.outIoId++; + ((UbrEventQMsg *)_trx->ubrTx.remoteRxEventQ.addr)->ioId = _trx->ubrTx.outIoId; + while (remainBufLen > 0) { + isLastPkt = (uint8_t)(remainBufLen <= UBR_MSG_PAYLOAD_LEN); + msg->header[UBR_MSG_FLAG_INDEX] = isLastPkt ? UBR_MSG_CHUNK_EOF : UBR_MSG_CHUNK_EXIST; + msg->header[UBR_MSG_LEN_INDEX] = isLastPkt ? (uint8_t)remainBufLen : UBR_MSG_PAYLOAD_LEN; + msg->header[UBR_MSG_CUR_INDEX] = 0; + memcpy(msg->payload.inner, (const uint8_t *)buf + totalSendLen, msg->header[UBR_MSG_LEN_INDEX]); + Copy64Byte((int8_t *)&dataMsg[_trx->ubrTx.writePos], (int8_t *)msg); + _trx->ubrTx.writePos = (_trx->ubrTx.writePos + 1) % cap; + totalSendLen += msg->header[UBR_MSG_LEN_INDEX]; + remainBufLen -= msg->header[UBR_MSG_LEN_INDEX]; + } + return (int)totalSendLen; +} + +int UBRing::UbrTrxRecv(void *buf, uint32_t bufLen) +{ + RETURN_CODE rc = UBRING_OK; + if (UNLIKELY((rc = CheckTrxRecvParam(_trx, buf, bufLen)) != UBRING_OK)) { + return (rc == UBR_NOT_CONNECTED) ? 0 : rc; + } + UbrMsgFormat *dataMsg = (UbrMsgFormat *)_trx->ubrRx.localDataQ.addr; + uint32_t readPosEnd = _trx->ubrRx.readPos; + uint8_t flag = dataMsg[readPosEnd].header[UBR_MSG_FLAG_INDEX]; + if (flag == UBR_MSG_CHUNK_NONE) { + return UBRING_RETRY; + } + return UbrTrxRecvBlockMode(static_cast(buf), bufLen); +} + +int UBRing::UbrTrxRecvBlockMode(uint8_t *dest, uint32_t bufLen) +{ + RETURN_CODE rc = UBRING_OK; + if (UNLIKELY((rc = CheckTrxRecvParam(_trx, dest, bufLen)) != UBRING_OK)) { + return (rc == UBR_NOT_CONNECTED) ? 0 : rc; + } + + int32_t totalCopied = 0; + int32_t remainingLen = (int32_t)bufLen; + bool notEofEncountered = true; + + UbrRx *ubrRx = &_trx->ubrRx; + UbrMsgFormat *dataMsg = (UbrMsgFormat *)ubrRx->localDataQ.addr; + bool needUpdateEpollEofPos = ubrRx->readPos == ubrRx->epEofPos; + + while (notEofEncountered && remainingLen > 0) { + if (UNLIKELY(CheckTrxRecvPreCheck(_trx) != UBRING_OK)) { + return UBRING_ERR; + } + UbrMsgFormat *currentChunk = &dataMsg[ubrRx->readPos]; + uint8_t flag = currentChunk->header[UBR_MSG_FLAG_INDEX]; + if (flag == UBR_MSG_CHUNK_NONE) { + continue; + } + if (flag == UBR_MSG_CHUNK_EOF) { + notEofEncountered = false; + } + uint8_t chunkMsgLen = currentChunk->header[UBR_MSG_LEN_INDEX]; + uint8_t curIndex = currentChunk->header[UBR_MSG_CUR_INDEX]; + uint8_t availableData = chunkMsgLen - curIndex; + + int32_t copyLen = (remainingLen < availableData) ? remainingLen : availableData; + memcpy(dest + totalCopied, dataMsg[ubrRx->readPos].payload.inner + curIndex, (size_t)copyLen); + totalCopied += copyLen; + remainingLen -= copyLen; + currentChunk->header[UBR_MSG_CUR_INDEX] += (uint8_t)copyLen; + if (LIKELY(currentChunk->header[UBR_MSG_CUR_INDEX] == chunkMsgLen)) { + currentChunk->header[UBR_MSG_FLAG_INDEX] = UBR_MSG_CHUNK_NONE; + UpdateDataQTail(_trx); + ubrRx->readPos = (ubrRx->readPos + 1) % ubrRx->capacity; + } + } + if (needUpdateEpollEofPos) { + ubrRx->epEofPos = ubrRx->readPos; + } + return (int)totalCopied; +} + +ssize_t UBRing::UbrTrxWritev(const struct iovec *iov, int iovcnt) +{ + if (UNLIKELY(CheckTrxSendPreCheck(_trx) != UBRING_OK)) { + return UBRING_ERR; + } + + size_t bufLen = 0; + for (int i = 0; i < iovcnt; i++) { + bufLen += iov[i].iov_len; + } + RETURN_CODE rc = WritevHasEnoughSpace(bufLen); + if (rc != UBRING_OK) { + return rc; + } + + UbrMsgFormat *dataMsg = (UbrMsgFormat *)_trx->ubrTx.remoteDataQ.addr; + UbrMsgFormat *msg = &(_trx->ubrTx.localMsgSpace); + int curIov = 0; + size_t curIovPos = 0; + ssize_t totalSendLen = 0; + size_t pktRemainN = 0; + size_t iovRemain = 0; + size_t fulled = 0; + uint8_t isLastPkt = 0; + uint8_t curPktLen = 0; + _trx->ubrTx.outIoId++; + ((UbrEventQMsg *)_trx->ubrTx.remoteRxEventQ.addr)->ioId = _trx->ubrTx.outIoId; + while (bufLen > 0) { + isLastPkt = (uint8_t)(bufLen <= UBR_MSG_PAYLOAD_LEN); + curPktLen = isLastPkt ? (uint8_t)bufLen : UBR_MSG_PAYLOAD_LEN; + msg->header[UBR_MSG_FLAG_INDEX] = isLastPkt ? UBR_MSG_CHUNK_EOF : UBR_MSG_CHUNK_EXIST; + msg->header[UBR_MSG_LEN_INDEX] = curPktLen; + msg->header[UBR_MSG_CUR_INDEX] = 0; + pktRemainN = curPktLen; + while (curIov < iovcnt && pktRemainN > 0) { + iovRemain = (iov[curIov].iov_len - curIovPos); + fulled = iovRemain > pktRemainN ? pktRemainN : iovRemain; + memcpy((msg->payload.inner + (curPktLen - (uint8_t)pktRemainN)), + (uint8_t *)(iov[curIov].iov_base) + curIovPos, + fulled); + pktRemainN -= fulled; + curIovPos += fulled; + if (curIovPos == iov[curIov].iov_len) { + curIov++; + curIovPos = 0; + } + } + + Copy64Byte((int8_t *)&dataMsg[_trx->ubrTx.writePos], (int8_t *)msg); + _trx->ubrTx.writePos = (_trx->ubrTx.writePos + 1) % _trx->ubrTx.capacity; + totalSendLen += (ssize_t)curPktLen; + bufLen -= (int)curPktLen; + } + return totalSendLen; +} + +ssize_t UBRing::UbrTrxReadv(const struct iovec *iov, int iovcnt) +{ + RETURN_CODE rc = UBRING_OK; + if (UNLIKELY((rc = CheckTrxRecvParam(_trx, iov, (uint32_t)iovcnt)) != UBRING_OK)) { + return (rc == UBR_NOT_CONNECTED) ? 0 : rc; + } + UbrMsgFormat *dataMsg = (UbrMsgFormat *)_trx->ubrRx.localDataQ.addr; + uint32_t readPosEnd = _trx->ubrRx.readPos; + uint8_t flag = dataMsg[readPosEnd].header[UBR_MSG_FLAG_INDEX]; + if (flag == UBR_MSG_CHUNK_NONE) { + errno = EAGAIN; + return -1; + } + ssize_t nr = UbrTrxReadvBlockMode(iov, iovcnt); + if (UNLIKELY(nr == -1)) { + LOG(ERROR) << "Non-blocking readv msg in failed, connection has been closed."; + errno = EPIPE; + return -1; + } + return nr; +} + +ssize_t UBRing::UbrTrxReadvBlockMode(const struct iovec *iov, int iovcnt) +{ + RETURN_CODE rc = UBRING_OK; + if (UNLIKELY((rc = CheckTrxRecvParam(_trx, iov, (uint32_t)iovcnt)) != UBRING_OK)) { + return (rc == UBR_NOT_CONNECTED) ? 0 : rc; + } + + size_t remainBufLen = 0; + for (int i = 0; i < iovcnt; i++) { + remainBufLen += iov[i].iov_len; + } + + bool needUpdateEpollEofPos = _trx->ubrRx.readPos == _trx->ubrRx.epEofPos; + ssize_t totalRecvLen = StartReadv(_trx, iov, iovcnt, remainBufLen); + + if (needUpdateEpollEofPos) { + _trx->ubrRx.epEofPos = _trx->ubrRx.readPos; + } + return totalRecvLen; +} + +RETURN_CODE UBRing::IsUbrTrxReadable(uint32_t epEvent) +{ + if (UNLIKELY(_trx == NULL)) { + LOG(ERROR) << "The trx to be checked is NULL."; + return UBRING_ERR; + } + if (UNLIKELY(_trx->localShm.addr == NULL)) { + LOG(ERROR) << "The trx localShm to be checked is NULL."; + return UBRING_ERR; + } + if (UNLIKELY(_trx->ubrTx.trxState != UBR_STATE_CONNECTED)) { + // TODO mwj 这几块的日志是否需要删除 + // LOG(ERROR) << "The trx is not connected state."; + return UBRING_ERR; + } + + uint64_t ioId = ((UbrEventQMsg *)_trx->ubrRx.localRxEventQ.addr)->ioId; + if ((epEvent & EPOLLET) && ioId == _trx->ubrRx.inIoId) { + return MPA_MUXER_NOT_READY; + } + + uint32_t readPosEnd = _trx->ubrRx.readPos; + if (epEvent & EPOLLET) { + readPosEnd = _trx->ubrRx.epEofPos; + } + + UbrMsgFormat *dataMsg = (UbrMsgFormat *)_trx->ubrRx.localDataQ.addr; + uint8_t flag = dataMsg[readPosEnd].header[UBR_MSG_FLAG_INDEX]; + if (flag == UBR_MSG_CHUNK_NONE) { + return MPA_MUXER_NOT_READY; + } + if (epEvent & EPOLLET) { + _trx->ubrRx.inIoId = ioId; + } + return UBRING_OK; +} + +RETURN_CODE UBRing::IsUbrTrxWriteable(uint32_t epEvent) +{ + if (UNLIKELY(_trx == NULL)) { + LOG(ERROR) << "The trx to be checked is NULL."; + return UBRING_ERR; + } + if (UNLIKELY(_trx->localShm.addr == NULL)) { + LOG(ERROR) << "The trx localShm to be checked is NULL."; + return UBRING_ERR; + } + if (UNLIKELY((UbrEventQMsg *)_trx->ubrTx.localTxEventQ.addr == NULL)) { + LOG(ERROR) << "The trx localTxEventQ addr is NULL."; + return UBRING_ERR; + } + if (UNLIKELY((UbrEventQMsg *)_trx->ubrTx.localDataStatusQ.addr == NULL)) { + LOG(ERROR) << "The trx localDataStatusQ addr is NULL."; + return UBRING_ERR; + } + + if (UNLIKELY(_trx->ubrTx.trxState != UBR_STATE_CONNECTED)) { + LOG(ERROR) << "The trx is not connected state."; + return UBRING_ERR; + } + + UbrDataStatusQMsg *dataStatusMsg = (UbrDataStatusQMsg *)_trx->ubrTx.localDataStatusQ.addr; + uint32_t cap = _trx->ubrTx.capacity; + uint32_t tail = dataStatusMsg->tail; + uint32_t remainChunkNum = + (_trx->ubrTx.writePos > tail) ? (tail + cap - _trx->ubrTx.writePos) : (tail - _trx->ubrTx.writePos); + if (remainChunkNum == 0) { + _trx->ubrTx.epLastCap = remainChunkNum; + return MPA_MUXER_NOT_READY; + } + + if ((epEvent & EPOLLET) && (_trx->ubrTx.epLastCap >= remainChunkNum)) { + _trx->ubrTx.epLastCap = remainChunkNum; + return MPA_MUXER_NOT_READY; + } + _trx->ubrTx.epLastCap = remainChunkNum; + return UBRING_OK; +} + +RETURN_CODE UBRing::UbrSetTimeout(UbrTaskStep taskType, int timeout) +{ + if (taskType >= UBR_TASK_STEP_NUM || timeout < 0) { + LOG(ERROR) << "Set timeout failed, invalid task type."; + return UBRING_ERR; + } + + g_sleepTime[taskType] = (uint32_t)timeout; + LOG(INFO) << "Set timeout success, taskType=" << taskType << ", timeout=" << timeout; + return UBRING_OK; +} + +RETURN_CODE UBRing::UbrTrxFreeShm(UbrTrx *trx) +{ + if (trx == NULL) { + LOG(ERROR) << "Trx is NULL."; + return UBRING_ERR; + } + + RETURN_CODE rc = UBRING_OK; + rc = ShmMunmap(&trx->localShm); + if (UNLIKELY(rc != UBRING_OK)) { + LOG(ERROR) << "Trx close, local unmap " << trx->localShm.name << " shm fail."; + return UBRING_ERR; + } + + rc = ShmFree(&trx->localShm); + if (UNLIKELY(rc != UBRING_OK)) { + if (UNLIKELY(rc == SHM_ERR_RESOURCE_ATTACHED || rc == SHM_ERR_NOT_FOUND)) { + LOG(INFO) << "Wait for " << trx->remoteShm.name << " remote free shm."; + return UBRING_OK; + } + LOG(ERROR) << "Wait for " << trx->localShm.name << " local shm free fail."; + return UBRING_ERR; + } + + size_t nameLen = strlen(trx->remoteShm.name); + if (!(nameLen <= 0 || nameLen > SHM_MAX_NAME_LEN || trx->remoteShm.len <= 0)) { + rc = ShmFree(&trx->remoteShm); + } + if (rc != UBRING_OK) { + if (rc == SHM_ERR_RESOURCE_ATTACHED || rc == SHM_ERR_NOT_FOUND) { + LOG(INFO) << "Wait for " << trx->remoteShm.name << " remote free shm."; + return UBRING_OK; + } + LOG(ERROR) << "Wait for " << trx->remoteShm.name << " remote shm free fail."; + return UBRING_ERR; + } + + return UBRING_OK; +} + +void UBRing::PreWriteAddr(uint8_t *addr, size_t len) +{ + if (addr == NULL) { + return; + } + + size_t i = 0; + while (i < len) { + if (i + sizeof(uint64_t) <= len) { + *(uint64_t *)(addr + i) = (uint64_t)0; + i += sizeof(uint64_t); + } else if (i + sizeof(uint32_t) < len) { + *(uint32_t *)(addr + i) = (uint32_t)0; + i += sizeof(uint32_t); + } else if (i + sizeof(uint16_t) < len) { + *(uint16_t *)(addr + i) = (uint16_t)0; + i += sizeof(uint16_t); + } else { + *(addr + i) = (uint8_t)0; + i += sizeof(uint8_t); + } + } +} + +void UBRing::PrewriteUbrTx(UbrTx *tx) +{ + if (tx == NULL) { + return; + } + PreWriteAddr(tx->remoteDataQ.addr, tx->capacity * sizeof(UbrMsgFormat)); +} + +void UBRing::PrewriteUbrRx(UbrRx *rx) +{ + if (rx == NULL) { + return; + } + PreWriteAddr(rx->localDataQ.addr, rx->capacity * sizeof(UbrMsgFormat)); +} + +RETURN_CODE UBRing::UbrTrxMapLocalShm(SHM *localShm) +{ + if (UNLIKELY(_trx == NULL)) { + LOG(ERROR) << "Trx map Shared memory failed, trx is null."; + return UBRING_ERR; + } + if (UNLIKELY(localShm == NULL || localShm->addr == NULL)) { + LOG(ERROR) << "Trx map Shared memory failed, localShm is null or addr is NULL."; + return UBRING_ERR; + } + _trx->localShm = *localShm; + _trx->ubrTx.localTxEventQ.addr = localShm->addr + TX_EVENTQ_ADDR_OFFSET; + _trx->ubrTx.localTxEventQ.len = UBR_EVENTQ_LEN; + _trx->ubrRx.localRxEventQ.addr = localShm->addr + RX_EVENTQ_ADDR_OFFSET; + _trx->ubrRx.localRxEventQ.len = UBR_EVENTQ_LEN; + _trx->ubrTx.localDataStatusQ.addr = localShm->addr + DATASTATUSQ_ADDR_OFFSET; + _trx->ubrTx.localDataStatusQ.len = UBR_DATASTATUSQ_LEN; + size_t addrAlignedOffset = Aligned64Offset(localShm->addr + DATAQ_ADDR_OFFSET); + LOG(DEBUG) << "UbrRx's localDataQ address will aligned with offset=" << addrAlignedOffset; + _trx->ubrRx.localDataQ.addr = localShm->addr + DATAQ_ADDR_OFFSET + addrAlignedOffset; + _trx->ubrRx.localDataQ.len = localShm->len - DATAQ_ADDR_OFFSET - addrAlignedOffset; + return UBRING_OK; +} + +RETURN_CODE UBRing::UbrTrxMapRemoteShm(SHM *remoteShm) +{ + if (UNLIKELY(_trx == NULL)) { + LOG(ERROR) << "Trx map Shared memory failed, trx is null."; + return UBRING_ERR; + } + if (UNLIKELY(remoteShm == NULL || remoteShm->addr == NULL)) { + LOG(ERROR) << "Trx map Shared memory failed, remoteShm is null or addr is NULL."; + return UBRING_ERR; + } + _trx->remoteShm = *remoteShm; + _trx->ubrRx.remoteTxEventQ.addr = remoteShm->addr + TX_EVENTQ_ADDR_OFFSET; + _trx->ubrRx.remoteTxEventQ.len = UBR_EVENTQ_LEN; + _trx->ubrTx.remoteRxEventQ.addr = remoteShm->addr + RX_EVENTQ_ADDR_OFFSET; + _trx->ubrTx.remoteRxEventQ.len = UBR_EVENTQ_LEN; + _trx->ubrRx.remoteDataStatusQ.addr = remoteShm->addr + DATASTATUSQ_ADDR_OFFSET; + _trx->ubrRx.remoteDataStatusQ.len = UBR_DATASTATUSQ_LEN; + size_t addrAlignedOffset = Aligned64Offset(remoteShm->addr + DATAQ_ADDR_OFFSET); + LOG(DEBUG) << "UbrTx's remoteDataQ will aligned with offset=" << addrAlignedOffset; + _trx->ubrTx.remoteDataQ.addr = remoteShm->addr + DATAQ_ADDR_OFFSET + addrAlignedOffset; + _trx->ubrTx.remoteDataQ.len = remoteShm->len - DATAQ_ADDR_OFFSET - addrAlignedOffset; + return UBRING_OK; +} + +RETURN_CODE UBRing::UbrServerTrxInit(SHM *localShm, SHM *remoteShm) +{ + RETURN_CODE rc = UbrTrxMapShm(localShm, remoteShm); + if (UNLIKELY(rc != UBRING_OK)) { + LOG(ERROR) <<"Trx map shared memory failed."; + return rc; + } + + uint32_t localDataMsgCap = (uint32_t)(_trx->ubrRx.localDataQ.len / UBR_MSG_LEN); + uint32_t remoteDataMsgCap = (uint32_t)(_trx->ubrTx.remoteDataQ.len / UBR_MSG_LEN); + _trx->ubrRx.capacity = localDataMsgCap; + _trx->ubrTx.capacity = remoteDataMsgCap; + rc = UBRingManager::GetUbrDealMsgMaxCnt(_trx->ubrRx.capacity, &_trx->ubrRx.dealMsgMaxCnt); + if (UNLIKELY(rc != UBRING_OK)) { + LOG(ERROR) << "Get ubring deal msg max cnt."; + return rc; + } + PrewriteUbrRx(&_trx->ubrRx); + PrewriteUbrTx(&_trx->ubrTx); + + ((UbrDataStatusQMsg *)(_trx->ubrTx.localDataStatusQ.addr))->tail = remoteDataMsgCap - 1; + ((UbrDataStatusQMsg *)(_trx->ubrRx.remoteDataStatusQ.addr))->tail = localDataMsgCap - 1; + + if (UNLIKELY(UbrAddTimer() != UBRING_OK)) { + LOG(ERROR) << "Ubr add timer failed, localName=" << localShm->name; + return UBRING_ERR; + } + + ((UbrDataStatusQMsg *)(_trx->ubrTx.localDataStatusQ.addr))->timeout = FLAGS_ub_connect_timeout; + ((UbrDataStatusQMsg *)(_trx->ubrRx.remoteDataStatusQ.addr))->timeout = FLAGS_ub_connect_timeout; + + ((UbrEventQMsg *)_trx->ubrTx.remoteRxEventQ.addr)->flag = UBR_STATE_CONNECTED; + ((UbrEventQMsg *)_trx->ubrRx.localRxEventQ.addr)->flag = UBR_STATE_CONNECTED; + _trx->ubrTx.trxState = UBR_STATE_CONNECTED; + _trx->ubrRx.trxState = UBR_STATE_CONNECTED; + return UBRING_OK; +} + +int UBRing::UbrAllocateServerShm(SHM* remote_trx_shm, SHM* local_trx_shm) { + UbrSetSleepTask(UBR_TASK_ACCEPT_MAP_FRONT); + if (UNLIKELY((ShmRemoteMalloc(remote_trx_shm)) != UBRING_OK)) { + LOG(ERROR) << "Trx apply remote shared memory failed."; + return -1; + } + + if (UNLIKELY((ShmLocalCalloc(local_trx_shm)) != UBRING_OK)) { + LOG(ERROR) << "Trx apply local shared memory failed."; + return -1; + } + + UbrTrx **ubrTrxPtr = &_trx; + if (UNLIKELY((UBRingManager::AcquireUbrTrxFromMgr(ubrTrxPtr)) != UBRING_OK)) { + LOG(ERROR) << "Acquire ubrtrx failed."; + ShmRemoteFree(remote_trx_shm); + ShmLocalFree(local_trx_shm); + return -1; + } + _trx->type = TCP_TRX; + if (UNLIKELY((UbrServerTrxInit(local_trx_shm, remote_trx_shm)) != UBRING_OK)) { + LOG(ERROR) << "Server trx init failed."; + ShmRemoteFree(remote_trx_shm); + UbrTrxFreeShm(_trx); + UBRingManager::ReleaseUbrTrxFromMgr(_trx); + return -1; + } + return 0; +} + +int UBRing::UbrAllocateLocalShm(SHM *local_trx_shm, const char *shm_name) +{ + if (UNLIKELY((UBRingManager::AcquireUbrTrxFromMgr(&(_trx))) != UBRING_OK)) { + LOG(ERROR) << "Acquire ubrtrx failed, localName=" << shm_name; + return -1; + } + + _trx->type = TCP_TRX; + if (UNLIKELY((ApplyAndMapLocalShm(local_trx_shm, shm_name)) != UBRING_OK)) { + LOG(ERROR) << "Trx apply or map local shared memory failed, localName=" << shm_name; + return -1; + } + return 0; +} + +int UBRing::UbrMapRemoteShm(SHM *local_trx_shm, const char *local_name) +{ + RETURN_CODE rc = UbrMapRemoteShmAddTimer(local_trx_shm, local_name); + if (UNLIKELY(rc != UBRING_OK)) { + LOG(ERROR) << "Connect Trx failed, local shm name=" << local_trx_shm->name; + return -1; + } + PrewriteUbrRx(&_trx->ubrRx); + PrewriteUbrTx(&_trx->ubrTx); + ((UbrEventQMsg *)_trx->ubrRx.remoteTxEventQ.addr)->flag = UBR_STATE_CONNECTED; + ((UbrEventQMsg *)_trx->ubrRx.localRxEventQ.addr)->flag = UBR_STATE_CONNECTED; + _trx->ubrTx.trxState = UBR_STATE_CONNECTED; + _trx->ubrRx.trxState = UBR_STATE_CONNECTED; + return 0; +} + +RETURN_CODE UBRing::UbrMapRemoteShmAddTimer(SHM *localTrxShm, const char *localName) +{ + uint64_t startTime = GetCurNanoSeconds(); + + size_t remoteServerLen = UBR_MSG_LEN * (((UbrDataStatusQMsg *)(_trx->ubrTx.localDataStatusQ.addr))->tail + 1) + + UBR_MSG_LEN * ((DATAQ_ADDR_OFFSET / UBR_MSG_LEN) + 1); + SHM remoteTrxShm = {NULL, remoteServerLen, 0, {0}, localTrxShm->fd}; + int result = snprintf(remoteTrxShm.name, + SHM_MAX_NAME_BUFF_LEN, + "%s_%s_%s", + SHM_NAME_PREFIX, + localName, + SERVER_SHM_NAME_SUFFIX); + if (UNLIKELY(result < 0)) { + LOG(ERROR) << "Copy server shared memory name failed, localName=%s, ret=%d.", localName, result; + return UBRING_ERR; + } + UbrSetSleepTask(UBR_TASK_CONNECT_MAP_FRONT); + RETURN_CODE rc = ApplyAndMapRemoteShm(&remoteTrxShm); + if (UNLIKELY(rc != UBRING_OK)) { + LOG(ERROR) << "Connect Trx map shared memory failed, remote shm=" << remoteTrxShm.name; + return rc; + } + + if (UNLIKELY(UbrAddTimer() != UBRING_OK)) { + LOG(ERROR) << "Ubr add timer failed, localName=" << localName; + ShmRemoteFree(&remoteTrxShm); + return UBRING_ERR; + } + + UbrSetSleepTask(UBR_TASK_CONNECT_MAP_AFTER); + + uint32_t timeout = ((UbrDataStatusQMsg *)(_trx->ubrTx.localDataStatusQ.addr))->timeout; + if (HasTimedOut(startTime, timeout) != UBRING_OK) { + LOG(ERROR) << "Local shm " << localTrxShm->name << " wait for connect remote map timeout."; + DeleteTimerSafe((uint32_t)_trx->hbTimerFd); + DeleteTimerSafe((uint32_t)_trx->timerFd); + ShmRemoteFree(&remoteTrxShm); + return UBRING_ERR_TIMEOUT; + } + + return UBRING_OK; +} + +RETURN_CODE UBRing::ApplyAndMapLocalShm(SHM *localTrxShm, const char *localName) +{ + if (UNLIKELY(_trx == NULL || localTrxShm == NULL)) { + LOG(ERROR) << "Trx map Shared memory failed, trx is null, localName=" << localName; + return UBRING_ERR; + } + int result = snprintf(localTrxShm->name, + SHM_MAX_NAME_BUFF_LEN, + "%s_%s_%s", + SHM_NAME_PREFIX, + localName, + CLIENT_SHM_NAME_SUFFIX); + if (UNLIKELY(result < 0)) { + LOG(ERROR) << "Copy client localTrx shared memory name failed, localName=" << localName << ", ret=" << result; + return UBRING_ERR; + } + + RETURN_CODE rc = ShmLocalCalloc(localTrxShm); + if (UNLIKELY(rc != UBRING_OK)) { + LOG(ERROR) << "Trx apply local shared memory failed, local shm name=" << localTrxShm->name << ", rc=" << rc; + if (rc == SHM_ERR_EXIST || rc == SHM_ERR_NOT_FOUND) { + rc = UBR_ERR_ADDR_IN_USE; + } + UBRingManager::ReleaseUbrTrxFromMgr(_trx); + return rc; + } + rc = UbrTrxMapLocalShm(localTrxShm); + if (UNLIKELY(rc != UBRING_OK)) { + LOG(ERROR) << "Trx map local shared memory failed, local shm name=" << localTrxShm->name; + ShmLocalFree(localTrxShm); + UBRingManager::ReleaseUbrTrxFromMgr(_trx); + return rc; + } + ((UbrDataStatusQMsg *)_trx->ubrTx.localDataStatusQ.addr)->timeout = FLAGS_ub_connect_timeout; + _trx->ubrRx.capacity = (uint32_t)(_trx->ubrRx.localDataQ.len / UBR_MSG_LEN); + rc = UBRingManager::GetUbrDealMsgMaxCnt(_trx->ubrRx.capacity, &_trx->ubrRx.dealMsgMaxCnt); + if (rc != UBRING_OK) { + LOG(ERROR) << "Get ubring deal msg max cnt, local shm name=" << localTrxShm->name; + ShmLocalFree(localTrxShm); + UBRingManager::ReleaseUbrTrxFromMgr(_trx); + return rc; + } + return UBRING_OK; +} + +RETURN_CODE UBRing::ApplyAndMapRemoteShm(SHM *remoteTrxShm) +{ + RETURN_CODE rc = ShmRemoteMalloc(remoteTrxShm); + if (UNLIKELY(rc != UBRING_OK)) { + LOG(ERROR) << "Trx apply remote shared memory failed."; + return rc; + } + rc = UbrTrxMapRemoteShm(remoteTrxShm); + if (UNLIKELY(rc != UBRING_OK)) { + LOG(ERROR) << "Trx map shared memory failed."; + ShmRemoteFree(remoteTrxShm); + return rc; + } + _trx->ubrTx.capacity = (uint32_t)(_trx->ubrTx.remoteDataQ.len / UBR_MSG_LEN); + return UBRING_OK; +} + +RETURN_CODE UBRing::WritevHasEnoughSpace(size_t bufLen) +{ + UbrDataStatusQMsg *dataStatusMsg = (UbrDataStatusQMsg *)_trx->ubrTx.localDataStatusQ.addr; + uint32_t cap = _trx->ubrTx.capacity; + uint32_t tail = dataStatusMsg->tail; + uint32_t remainChunkNum = + (_trx->ubrTx.writePos > tail) ? (tail + cap - _trx->ubrTx.writePos) : (tail - _trx->ubrTx.writePos); + uint32_t needMsgChunkNum = CalcUbrMsgChunkCnt((uint32_t)bufLen); + if (remainChunkNum < needMsgChunkNum) { + return UBRING_RETRY; + } + return UBRING_OK; +} + +RETURN_CODE UBRing::UbrClearResourceCheck(UbrTrx *trx, uint64_t startTime, UbrCloseType closeType) +{ + if (UNLIKELY(trx == NULL)) { + LOG(ERROR) << "Trx close failed, trx is null."; + return UBRING_ERR; + } + + UbrEventQMsg* localTxEventQ = (UbrEventQMsg *)trx->ubrTx.localTxEventQ.addr; + while (ATOMIC_LOAD(trx->closeCnt) == 1 && localTxEventQ->flag == UBR_STATE_CLOSING) { + if (HasTimedOut(startTime, FLAGS_ub_disconnect_timeout) != UBRING_OK) { + LOG(ERROR) << "Trx close failed, wait close time out."; + break; + } + usleep(1); + } + int firstClearExpected = UBR_CLOSE_FIRST; + int secondClearExpected = UBR_CLOSE_SECOND; + if (localTxEventQ->flag == UBR_STATE_CLOSING) { + if (ATOMIC_COMPARE_EXCHANGE_STRONG(trx->closeState, firstClearExpected, UBR_CLOSE_SECOND)) { + LOG(ERROR) << "Trx close, exist process is closing, name=" << trx->localShm.name; + return UBRING_REENTRY; + } else if (ATOMIC_COMPARE_EXCHANGE_STRONG(trx->closeState, secondClearExpected, UBR_CLOSE_END)) { + localTxEventQ->flag = UBR_STATE_CLOSED; + trx->ubrTx.trxState = UBR_STATE_CLOSED; + } + } + + if (closeType == UBR_SEND_CLOSE) { + DeleteTimerSafe((uint32_t)trx->timerFd); + } else { + DeleteTimer((uint32_t)trx->timerFd); + } + DeleteTimerSafe((uint32_t)trx->hbTimerFd); + return UBRING_OK; +} + +RETURN_CODE UBRing::ClearTrxResource(UbrTrx *trx, uint64_t startTime, UbrCloseType closeType, int op) +{ + UbrEventQMsg* localTxEventQ = (UbrEventQMsg *)trx->ubrTx.localTxEventQ.addr; + RETURN_CODE rc = UbrClearResourceCheck(trx, startTime, closeType); + if (rc != UBRING_OK) { + return rc; + } + + rc = UbrAddAsynClearTimer(trx); + if (rc != UBRING_OK) { + LOG(ERROR) << "Trx close, add " << trx->localShm.name << " close clear timer failed."; + return UBRING_ERR; + } + + return UBRING_OK; +} + +RETURN_CODE UBRing::UbrTrxCloseCheck(UbrTrx *trx) +{ + if (UNLIKELY(trx == NULL)) { + LOG(ERROR) << "Trx close failed, client trx is null."; + return UBRING_ERR; + } + int expected = MAX_CLOSE_COUNT; + if (!ATOMIC_COMPARE_EXCHANGE_STRONG(trx->closeCnt, expected, MAX_CLOSE_COUNT - 1)) { + LOG(ERROR) << "Trx close failed, exist other close acquire, trx local name=" << trx->localShm.name; + return UBRING_ERR; + } + + if (UNLIKELY(trx->ubrTx.localTxEventQ.addr == nullptr)) { + LOG(ERROR) << "Trx close failed, localTxEventQ addr is NULL, trx local name=" << trx->localShm.name; + return UBRING_ERR; + } + return UBRING_OK; +} + +ssize_t UBRing::StartReadv(UbrTrx *trx, const struct iovec *iov, int iovcnt, size_t remainBufLen) +{ + ssize_t totalRecvLen = 0; + int iovIndex = 0; + size_t iovPos = 0; + UbrMsgFormat *dataMsg = (UbrMsgFormat *)trx->ubrRx.localDataQ.addr; + bool notEofEncountered = true; + while (notEofEncountered && remainBufLen > 0) { + if (UNLIKELY(CheckTrxRecvPreCheck(trx) != UBRING_OK)) { + return UBRING_ERR; + } + UbrMsgFormat *currentChunk = &dataMsg[trx->ubrRx.readPos]; + uint8_t flag = currentChunk->header[UBR_MSG_FLAG_INDEX]; + if (flag == UBR_MSG_CHUNK_NONE) { + continue; + } + if (flag == UBR_MSG_CHUNK_EOF) { + notEofEncountered = false; + } + uint8_t chunkMsgLen = currentChunk->header[UBR_MSG_LEN_INDEX]; + uint8_t curIndex = currentChunk->header[UBR_MSG_CUR_INDEX]; + uint8_t recvLen = + remainBufLen > (size_t)(chunkMsgLen - curIndex) ? (chunkMsgLen - curIndex) : (uint8_t)remainBufLen; + while (iovIndex < iovcnt && recvLen > 0) { + size_t copyLen = + recvLen > (iov[iovIndex].iov_len - iovPos) ? iov[iovIndex].iov_len - iovPos : (size_t)recvLen; + memcpy((uint8_t *)iov[iovIndex].iov_base + iovPos, currentChunk->payload.inner + curIndex, copyLen); + recvLen -= (uint8_t)copyLen; + iovPos += copyLen; + curIndex += (uint8_t)copyLen; + if (iovPos == iov[iovIndex].iov_len) { + iovIndex++; + iovPos = 0; + } + remainBufLen -= copyLen; + totalRecvLen += (ssize_t)copyLen; + } + currentChunk->header[UBR_MSG_CUR_INDEX] = curIndex; + if (currentChunk->header[UBR_MSG_CUR_INDEX] == chunkMsgLen) { + currentChunk->header[UBR_MSG_FLAG_INDEX] = UBR_MSG_CHUNK_NONE; + UpdateDataQTail(trx); + trx->ubrRx.readPos = (trx->ubrRx.readPos + 1) % trx->ubrRx.capacity; + } + } + return totalRecvLen; +} +} // namespace ubring +} // namespace brpc \ No newline at end of file diff --git a/src/brpc/ub/ub_ring.h b/src/brpc/ubring/ub_ring.h similarity index 54% rename from src/brpc/ub/ub_ring.h rename to src/brpc/ubring/ub_ring.h index d1afd204fc..b6a6ef40eb 100644 --- a/src/brpc/ub/ub_ring.h +++ b/src/brpc/ubring/ub_ring.h @@ -21,15 +21,15 @@ #include #include #include "butil/macros.h" -#include "brpc/ub/ubr_trx.h" -#include "brpc/ub/ub_ring_manager.h" -#include "brpc/ub/shm/shm_mgr.h" -#include "brpc/ub/timer/timer_mgr.h" +#include "brpc/ubring/ubr_trx.h" +#include "brpc/ubring/ub_ring_manager.h" +#include "brpc/ubring/shm/shm_mgr.h" +#include "brpc/ubring/timer/timer_mgr.h" namespace brpc { -namespace ub { +namespace ubring { DECLARE_int32(ub_flying_io_timeout); -extern uint32_t g_sleep_time[UBR_TASK_STEP_NUM]; +extern uint32_t g_sleepTime[UBR_TASK_STEP_NUM]; class UBRing { public: @@ -37,7 +37,7 @@ class UBRing { ~UBRing(); DISALLOW_COPY_AND_ASSIGN(UBRing); - RETURN_CODE UbrTrxMapShm(SHM *local_shm, SHM *remote_shm); + RETURN_CODE UbrTrxMapShm(SHM *localShm, SHM *remoteShm); RETURN_CODE UbrTrxClose(); @@ -57,48 +57,48 @@ class UBRing { static void *UbrAsynClearCallback(void *args); - int UbrTrxSend(const void *buf, uint32_t buf_len); + int UbrTrxSend(const void *buf, uint32_t bufLen); - int UbrTrxRecv(void *buf, uint32_t buf_len); + int UbrTrxRecv(void *buf, uint32_t bufLen); - int UbrTrxRecvBlockMode(uint8_t *dest, uint32_t buf_len); + int UbrTrxRecvBlockMode(uint8_t *dest, uint32_t bufLen); ssize_t UbrTrxWritev(const struct iovec *iov, int iovcnt); ssize_t UbrTrxReadv(const struct iovec *iov, int iovcnt); ssize_t UbrTrxReadvBlockMode(const struct iovec *iov, int iovcnt); - RETURN_CODE IsUbrTrxReadable(uint32_t ep_event); + RETURN_CODE IsUbrTrxReadable(uint32_t epEvent); - RETURN_CODE IsUbrTrxWriteable(uint32_t ep_event); + RETURN_CODE IsUbrTrxWriteable(uint32_t epEvent); - RETURN_CODE UbrSetTimeout(UbrTaskStep task_type, int timeout); + RETURN_CODE UbrSetTimeout(UbrTaskStep taskType, int timeout); static RETURN_CODE UbrTrxFreeShm(UbrTrx *trx); void PrewriteUbrTx(UbrTx *tx); void PrewriteUbrRx(UbrRx *rx); - static inline void UbrSetSleepTask(UbrTaskStep task_type) + static inline void UbrSetSleepTask(UbrTaskStep taskType) { - if (task_type >= UBR_TASK_STEP_NUM || task_type < 0) { + if (taskType >= UBR_TASK_STEP_NUM || taskType < 0) { return; } - uint32_t type = (uint32_t)task_type; - sleep(g_sleep_time[type]); + uint32_t type = (uint32_t)taskType; + sleep(g_sleepTime[type]); return; } - static inline RETURN_CODE CheckTrxConnectParam(const char *listener_name, const char *local_name) + static inline RETURN_CODE CheckTrxConnectParam(const char *listenerName, const char *localName) { - if (UNLIKELY(listener_name == NULL)) { + if (UNLIKELY(listenerName == NULL)) { LOG(ERROR) << "The request listener name is null."; - return HLC_ERR; + return UBRING_ERR; } - if (UNLIKELY(local_name == NULL)) { + if (UNLIKELY(localName == NULL)) { LOG(ERROR) << "The request trx shared memory name is null."; - return HLC_ERR; + return UBRING_ERR; } - return HLC_OK; + return UBRING_OK; } int UbrAllocateServerShm(SHM* remote_trx_shm, SHM* local_trx_shm); @@ -107,92 +107,92 @@ class UBRing { int UbrAllocateLocalShm(SHM *local_trx_shm, const char *shm_name); - RETURN_CODE UbrMapRemoteShmAddTimer(SHM *local_trx_shm, const char *local_name); + RETURN_CODE UbrMapRemoteShmAddTimer(SHM *localTrxShm, const char *localName); static inline RETURN_CODE CheckTrxSendPreCheck(UbrTrx *trx) { - if (UNLIKELY(trx->ubr_tx.trx_state != UBR_STATE_CONNECTED)) { + if (UNLIKELY(trx->ubrTx.trxState != UBR_STATE_CONNECTED)) { LOG(ERROR) << "Trx send failed, trx is not connected state."; - return HLC_ERR; + return UBRING_ERR; } - return HLC_OK; + return UBRING_OK; } - static RETURN_CODE CheckTrxRecvParam(UbrTrx *trx, const void *buf, uint32_t buf_len) + static RETURN_CODE CheckTrxRecvParam(UbrTrx *trx, const void *buf, uint32_t bufLen) { if (UNLIKELY(trx == NULL)) { LOG(ERROR) << "Trx recv failed, trx is null."; - return HLC_ERR; + return UBRING_ERR; } - if (UNLIKELY((UbrEventQMsg *)trx->ubr_rx.local_rx_event_q.addr == NULL)) { - LOG(ERROR) << "Trx send failed, local_tx_event_q addr is NULL."; - return HLC_ERR; + if (UNLIKELY((UbrEventQMsg *)trx->ubrRx.localRxEventQ.addr == NULL)) { + LOG(ERROR) << "Trx send failed, localTxEventQ addr is NULL."; + return UBRING_ERR; } - if (UNLIKELY(trx->ubr_rx.trx_state != UBR_STATE_CONNECTED)) { - LOG(ERROR) << "Trx recv failed, trx is not connected statep=" << trx->ubr_rx.trx_state; + if (UNLIKELY(trx->ubrRx.trxState != UBR_STATE_CONNECTED)) { + LOG(ERROR) << "Trx recv failed, trx is not connected statep=" << trx->ubrRx.trxState; return UBR_NOT_CONNECTED; } if (UNLIKELY(buf == NULL)) { LOG(ERROR) << "Trx recv failed, buf is null."; - return HLC_ERR; + return UBRING_ERR; } - if (UNLIKELY(buf_len == 0)) { - LOG(ERROR) << "Trx recv failed, buf_len is 0."; - return HLC_ERR; + if (UNLIKELY(bufLen == 0)) { + LOG(ERROR) << "Trx recv failed, bufLen is 0."; + return UBRING_ERR; } - return HLC_OK; + return UBRING_OK; } static inline RETURN_CODE CheckTrxRecvPreCheck(UbrTrx *trx) { - if (UNLIKELY(trx->ubr_rx.trx_state != UBR_STATE_CONNECTED)) { + if (UNLIKELY(trx->ubrRx.trxState != UBR_STATE_CONNECTED)) { LOG(ERROR) << "Trx recv failed, trx is not connected state."; - return HLC_ERR; + return UBRING_ERR; } - return HLC_OK; + return UBRING_OK; } static inline void UpdateDataQTail(UbrTrx *trx) { - ((UbrDataStatusQMsg *)trx->ubr_rx.remote_data_status_q.addr)->tail = trx->ubr_rx.read_pos; + ((UbrDataStatusQMsg *)trx->ubrRx.remoteDataStatusQ.addr)->tail = trx->ubrRx.readPos; } static RETURN_CODE UbrTrxCallbackCheck(UbrTrx *trx) { if (trx == NULL) { LOG(ERROR) << "Trx close callback failed, trx is null."; - return HLC_ERR; + return UBRING_ERR; } - if (UNLIKELY(trx->local_shm.addr == NULL)) { - LOG(ERROR) << "Trx close failed, local_shm addr is NULL."; - return HLC_ERR; + if (UNLIKELY(trx->localShm.addr == NULL)) { + LOG(ERROR) << "Trx close failed, localShm addr is NULL."; + return UBRING_ERR; } - if (UNLIKELY(trx->ubr_rx.local_rx_event_q.addr == NULL)) { - LOG(ERROR) << "Trx close failed, local_rx_event_q addr is NULL."; - return HLC_ERR; + if (UNLIKELY(trx->ubrRx.localRxEventQ.addr == NULL)) { + LOG(ERROR) << "Trx close failed, localRxEventQ addr is NULL."; + return UBRING_ERR; } - if (UNLIKELY(trx->ubr_tx.local_tx_event_q.addr == NULL)) { - LOG(ERROR) << "Trx close failed, local_tx_event_q addr is NULL."; - return HLC_ERR; + if (UNLIKELY(trx->ubrTx.localTxEventQ.addr == NULL)) { + LOG(ERROR) << "Trx close failed, localTxEventQ addr is NULL."; + return UBRING_ERR; } - return HLC_OK; + return UBRING_OK; } private: - RETURN_CODE UbrTrxMapLocalShm(SHM *local_shm); - RETURN_CODE UbrTrxMapRemoteShm(SHM *remote_shm); - RETURN_CODE ApplyAndMapLocalShm(SHM *local_trx_shm, const char *local_name); - RETURN_CODE ApplyAndMapRemoteShm(SHM *remote_trx_shm); + RETURN_CODE UbrTrxMapLocalShm(SHM *localShm); + RETURN_CODE UbrTrxMapRemoteShm(SHM *remoteShm); + RETURN_CODE ApplyAndMapLocalShm(SHM *localTrxShm, const char *localName); + RETURN_CODE ApplyAndMapRemoteShm(SHM *remoteTrxShm); static RETURN_CODE UbrTrxCloseCheck(UbrTrx *trx); - void ReleaseFileLock(int lock_fd); - ssize_t StartReadv(UbrTrx *trx, const struct iovec *iov, int iovcnt, size_t remain_buf_len); + void ReleaseFileLock(int lockFd); + ssize_t StartReadv(UbrTrx *trx, const struct iovec *iov, int iovcnt, size_t remainBufLen); void PreWriteAddr(uint8_t *addr, size_t len); - RETURN_CODE WritevHasEnoughSpace(size_t buf_len); - RETURN_CODE UbrServerTrxInit(SHM *local_shm, SHM *remote_shm); - static RETURN_CODE UbrClearResourceCheck(UbrTrx *trx, uint64_t start_time, UbrCloseType close_type); - static RETURN_CODE ClearTrxResource(UbrTrx *trx, uint64_t start_time, UbrCloseType close_type, int op=0); + RETURN_CODE WritevHasEnoughSpace(size_t bufLen); + RETURN_CODE UbrServerTrxInit(SHM *localShm, SHM *remoteShm); + static RETURN_CODE UbrClearResourceCheck(UbrTrx *trx, uint64_t startTime, UbrCloseType closeType); + static RETURN_CODE ClearTrxResource(UbrTrx *trx, uint64_t startTime, UbrCloseType closeType, int op=0); UbrTrx* _trx{nullptr}; }; diff --git a/src/brpc/ubring/ub_ring_manager.cpp b/src/brpc/ubring/ub_ring_manager.cpp new file mode 100644 index 0000000000..f99b1239b0 --- /dev/null +++ b/src/brpc/ubring/ub_ring_manager.cpp @@ -0,0 +1,263 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include "brpc/ubring/ub_ring_manager.h" +#include "butil/logging.h" + +namespace brpc { +namespace ubring { +DEFINE_int32(ubr_max_managed_num, 1024, "maximum number of managed ubring"); +DEFINE_int32(tail_update_after_read, 8, "Position of the tail update after the read"); + +UbrMgr UBRingManager::g_ubrMgr; +UbrLinkInfoMgr UBRingManager::g_linkInfoMgr; +pthread_mutex_t UBRingManager::g_ubrTrxMgrMtx = PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t UBRingManager::g_ubrListenerMgrMtx = PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t UBRingManager::g_linkInfoMgrMtx = PTHREAD_MUTEX_INITIALIZER; + +uint64_t g_ubrTrxNum = 0; +uint64_t g_ubEventCnt = 0; +uint64_t g_ubrListenerNum = 0; + +RETURN_CODE UBRingManager::GetUbrDealMsgMaxCnt(const uint32_t capacity, uint32_t *dealMsgMaxCnt) { + if (UNLIKELY(dealMsgMaxCnt == NULL)) { + LOG(ERROR) << "Get update factor failed, dealMsgMaxCnt is null."; + return UBRING_ERR; + } + if (UNLIKELY(FLAGS_tail_update_after_read == 0)) { + LOG(ERROR) << "Get update factor failed, factor is 0."; + return UBRING_ERR; + } + *dealMsgMaxCnt = capacity / FLAGS_tail_update_after_read; + return UBRING_OK; +} + +RETURN_CODE UBRingManager::UbrMgrDefault() +{ + g_ubrMgr.trxNum = 0; + g_ubrMgr.trxCap = FLAGS_ubr_max_managed_num; + g_ubrMgr.trxMgrUnitStatus = NULL; + g_ubrMgr.trxMgr = NULL; + return UBRING_OK; +} + +RETURN_CODE UBRingManager::UbrMgrInit() { + RETURN_CODE rc = UbrMgrDefault(); + if (UNLIKELY(rc != UBRING_OK)) { + LOG(ERROR) << "Ubr manager set default values failed."; + return rc; + } + + size_t trxMgrSize = g_ubrMgr.trxCap * sizeof(UbrTrx); + g_ubrMgr.trxMgr = (UbrTrx *)malloc(trxMgrSize); + size_t trxMgrStatusSize = g_ubrMgr.trxCap * sizeof(UbrMgrUnitStatus); + g_ubrMgr.trxMgrUnitStatus = (UbrMgrUnitStatus *)malloc(trxMgrStatusSize); + if (UNLIKELY(g_ubrMgr.trxMgr == NULL || + g_ubrMgr.trxMgrUnitStatus == NULL)) { + LOG(ERROR) << "Ubr manager memory allocation failed."; + UbrMgrFini(); + return UBRING_ERR; + } + + memset(g_ubrMgr.trxMgr, 0, trxMgrSize); + memset(g_ubrMgr.trxMgrUnitStatus, UBR_MGR_UNIT_FREE, trxMgrStatusSize); + LinkInfoInit(); + return UBRING_OK; + return UBR_NOT_CONNECTED; +} + +void UBRingManager::UbrMgrFini() { + { + LOCK_GUARD(g_ubrTrxMgrMtx); + FREE_PTR(g_ubrMgr.trxMgr); + FREE_PTR(g_ubrMgr.trxMgrUnitStatus); + } + { + LOCK_GUARD(g_ubrListenerMgrMtx); + } + g_ubrMgr.trxNum = 0; + g_ubrMgr.trxCap = 0; + LinkInfoFini(); +} + +RETURN_CODE UBRingManager::AcquireUbrTrxFromMgr(UbrTrx **trx) { + if (UNLIKELY(trx == NULL)) { + LOG(ERROR) << "Acquire trx failed, trx is null."; + return UBRING_ERR; + } + + if (UNLIKELY(g_ubrMgr.trxMgr == NULL)) { + LOG(ERROR) << "Acquire trx failed, trxMgr is null."; + return UBRING_ERR; + } + + LOCK_GUARD(g_ubrTrxMgrMtx); + if (g_ubrMgr.trxNum >= g_ubrMgr.trxCap) { + LOG(ERROR) << "Acquire trx failed, trx number is full."; + return UBRING_ERR; + } + + for (uint32_t i = 0; i < g_ubrMgr.trxCap; ++i) { + if (g_ubrMgr.trxMgrUnitStatus[i] == UBR_MGR_UNIT_FREE) { + memset(&g_ubrMgr.trxMgr[i], 0, sizeof(UbrTrx)); + g_ubrMgr.trxMgrUnitStatus[i] = UBR_MGR_UNIT_USED; + *trx = &g_ubrMgr.trxMgr[i]; + (*trx)->trxMgrIndex = i; + (*trx)->ubrId = g_ubrTrxNum; + (*trx)->closeState = UBR_CLOSE_FIRST; + (*trx)->closeCnt = MAX_CLOSE_COUNT; + ++g_ubrMgr.trxNum; + ++g_ubrTrxNum; + return UBRING_OK; + } + } + LOG(ERROR) << "Acquire trx failed, no available space."; + return UBRING_ERR; +} + +RETURN_CODE UBRingManager::ReleaseUbrTrxFromMgr(UbrTrx *trx) { + if (UNLIKELY(trx == NULL)) { + LOG(ERROR) << "Release trx failed, trx is null."; + return UBRING_ERR; + } + + trx->localShm.addr = NULL; + trx->ubrTx.localTxEventQ.addr = NULL; + trx->ubrTx.localDataStatusQ.addr = NULL; + trx->ubrRx.localRxEventQ.addr = NULL; + trx->ubrRx.remoteDataStatusQ.addr = NULL; + if (UNLIKELY(g_ubrMgr.trxMgr == NULL)) { + LOG(ERROR) << "Release trx failed, trxMgr is null."; + return UBRING_ERR; + } + + LOCK_GUARD(g_ubrTrxMgrMtx); + if (g_ubrMgr.trxNum == 0) { + LOG(ERROR) << "Release trx failed, trx number is 0."; + return UBRING_ERR; + } + + uint32_t idx = trx->trxMgrIndex; + if (g_ubrMgr.trxMgrUnitStatus[idx] == UBR_MGR_UNIT_FREE) { + LOG(ERROR) << "Release trx failed, trx is not in manager."; + return UBRING_ERR; + } + g_ubrMgr.trxMgrUnitStatus[idx] = UBR_MGR_UNIT_FREE; + --g_ubrMgr.trxNum; + return UBRING_OK; +} + +void UBRingManager::LinkInfoInit(void) { + + size_t linkInfoMgrSize = FLAGS_ubr_max_managed_num * sizeof(UbrLinkInfo); + g_linkInfoMgr.allLinkInfo = (UbrLinkInfo*) malloc(linkInfoMgrSize); + if (g_linkInfoMgr.allLinkInfo == NULL) { + LOG(ERROR) << "allLinkInfo is NULL"; + LinkInfoFini(); + return; + } + + g_linkInfoMgr.linkMgrUnitStatus = (UbrMgrUnitStatus*) malloc(linkInfoMgrSize); + if (g_linkInfoMgr.linkMgrUnitStatus == NULL) { + LinkInfoFini(); + return; + } + + memset(g_linkInfoMgr.allLinkInfo, 0, linkInfoMgrSize); + memset(g_linkInfoMgr.linkMgrUnitStatus, 0, linkInfoMgrSize); +} + +void UBRingManager::LinkInfoFini(void) { + if (g_linkInfoMgr.linkMgrUnitStatus == NULL || g_linkInfoMgr.allLinkInfo == NULL) { + LOG(ERROR) << "LinkInfo is NULL"; + return; + } + { + LOCK_GUARD(g_linkInfoMgrMtx); + FREE_PTR(g_linkInfoMgr.allLinkInfo); + FREE_PTR(g_linkInfoMgr.linkMgrUnitStatus); + } + + g_linkInfoMgr.linkNum = 0; +} + +void UBRingManager::AcquireLinkInfoToMgr(const char *listenerName, UbrTrx *trx) { + if (listenerName == NULL || trx == NULL) { + LOG(ERROR) << "LinkInfo acquire fail."; + return; + } + + if (g_linkInfoMgr.linkMgrUnitStatus == NULL || g_linkInfoMgr.allLinkInfo == NULL) { + LOG(ERROR) << "LinkInfo is NULL."; + return; + } + uint32_t ubrIndex = trx->trxMgrIndex; + char* connectName = trx->localShm.name; + if (g_linkInfoMgr.linkMgrUnitStatus[ubrIndex] == UBR_MGR_UNIT_FREE) { + strncpy(g_linkInfoMgr.allLinkInfo[ubrIndex].connectName, + connectName, SHM_MAX_NAME_BUFF_LEN); + strncpy(g_linkInfoMgr.allLinkInfo[ubrIndex].listenerName, + listenerName, SHM_MAX_NAME_BUFF_LEN); + g_linkInfoMgr.linkMgrUnitStatus[ubrIndex] = UBR_MGR_UNIT_USED; + g_linkInfoMgr.linkNum++; + } +} + +void UBRingManager::ReleaseLinkInfoFromMgr(UbrTrx *trx) { + if (trx == NULL || g_linkInfoMgr.linkMgrUnitStatus == NULL) { + LOG(ERROR) << "LinkInfo release fail."; + return; + } + + if (g_linkInfoMgr.linkMgrUnitStatus[trx->trxMgrIndex] == UBR_MGR_UNIT_FREE) { + LOG(ERROR) << "Release linkInfo failed, trx is not in manager."; + return; + } + g_linkInfoMgr.linkMgrUnitStatus[trx->trxMgrIndex] = UBR_MGR_UNIT_FREE; + g_linkInfoMgr.linkNum--; +} + +int32_t UBRingManager::UbEventCallback(const char *shmName) +{ + if (UNLIKELY(shmName == NULL)) { + LOG(ERROR) << "Ub event callback failed, shm name is null."; + return UBRING_ERR; + } + if (UNLIKELY(g_ubrMgr.trxMgr == NULL)) { + LOG(ERROR) << "Ub event callback failed, trx mgr is null."; + return UBRING_ERR; + } + LOG(DEBUG) << "Ub event callback is processing. shm_name=" << shmName; + + for (uint32_t i = 0; i < g_ubrMgr.trxCap; ++i) { + if (g_ubrMgr.trxMgrUnitStatus[i] == UBR_MGR_UNIT_FREE) { + continue; + } + + if (strcmp(g_ubrMgr.trxMgr[i].localShm.name, shmName) == 0 || // 故障链路为该trx的本端shm + strcmp(g_ubrMgr.trxMgr[i].remoteShm.name, shmName) == 0) { // 故障链路为该trx的对端shm + ++g_ubEventCnt; + int fd = (int)g_ubrMgr.trxMgr[i].localShm.fd; + LOG(INFO) << "Ub event callback, the fd of the faulty link is " << fd; + return UBRing::UbrPassiveClearTrx(&g_ubrMgr.trxMgr[i], fd, UBR_UB_EVENT); + } + } + return UBRING_ERR; +} +} +} \ No newline at end of file diff --git a/src/brpc/ub/ub_ring_manager.h b/src/brpc/ubring/ub_ring_manager.h similarity index 66% rename from src/brpc/ub/ub_ring_manager.h rename to src/brpc/ubring/ub_ring_manager.h index 9e5f848596..b57cfdcd47 100644 --- a/src/brpc/ub/ub_ring_manager.h +++ b/src/brpc/ubring/ub_ring_manager.h @@ -20,32 +20,32 @@ #include #include -#include "brpc/ub/ub_ring.h" -#include "brpc/ub/common/common.h" +#include "brpc/ubring/ub_ring.h" +#include "brpc/ubring/common/common.h" namespace brpc { -namespace ub { +namespace ubring { typedef enum { UBR_MGR_UNIT_FREE = 0, UBR_MGR_UNIT_USED = 1 } UbrMgrUnitStatus; typedef struct TagUbrMgr { - uint32_t trx_num; - uint32_t trx_cap; - UbrTrx *trx_mgr; - UbrMgrUnitStatus *trx_mgr_unit_status; + uint32_t trxNum; + uint32_t trxCap; + UbrTrx *trxMgr; + UbrMgrUnitStatus *trxMgrUnitStatus; } UbrMgr; typedef struct TagUbrLinkInfo { - char connect_name[SHM_MAX_NAME_BUFF_LEN]; - char listener_name[SHM_MAX_NAME_BUFF_LEN]; + char connectName[SHM_MAX_NAME_BUFF_LEN]; + char listenerName[SHM_MAX_NAME_BUFF_LEN]; } UbrLinkInfo; typedef struct TagUbrLinkInfoMgr { - uint32_t link_num; - UbrLinkInfo* all_link_info; - UbrMgrUnitStatus *link_mgr_unit_status; + uint32_t linkNum; + UbrLinkInfo* allLinkInfo; + UbrMgrUnitStatus *linkMgrUnitStatus; } UbrLinkInfoMgr; class UBRingManager { @@ -54,7 +54,7 @@ class UBRingManager { UbrMgrFini(); } - static RETURN_CODE GetHlcDealMsgMaxCnt(const uint32_t capacity, uint32_t *deal_msg_max_cnt); + static RETURN_CODE GetUbrDealMsgMaxCnt(const uint32_t capacity, uint32_t *dealMsgMaxCnt); static RETURN_CODE UbrMgrDefault(); @@ -68,19 +68,19 @@ class UBRingManager { static void LinkInfoInit(void); static void LinkInfoFini(void); - static void AcquireLinkInfoToMgr(const char* listener_name, UbrTrx *trx); + static void AcquireLinkInfoToMgr(const char* listenerName, UbrTrx *trx); static void ReleaseLinkInfoFromMgr(UbrTrx* trx); - static int32_t UbEventCallback(const char *shm_name); + static int32_t UbEventCallback(const char *shmName); private: UBRingManager() { } - static UbrMgr g_ubr_mgr; - static UbrLinkInfoMgr g_link_info_mgr; - static pthread_mutex_t g_ubr_trx_mgr_mtx; - static pthread_mutex_t g_ubr_listener_mgr_mtx; - static pthread_mutex_t g_link_info_mgr_mtx; + static UbrMgr g_ubrMgr; + static UbrLinkInfoMgr g_linkInfoMgr; + static pthread_mutex_t g_ubrTrxMgrMtx; + static pthread_mutex_t g_ubrListenerMgrMtx; + static pthread_mutex_t g_linkInfoMgrMtx; }; } } diff --git a/src/brpc/ub/ubr_msg.h b/src/brpc/ubring/ubr_msg.h similarity index 88% rename from src/brpc/ub/ubr_msg.h rename to src/brpc/ubring/ubr_msg.h index a205230bf4..8a19b6f6bc 100644 --- a/src/brpc/ub/ubr_msg.h +++ b/src/brpc/ubring/ubr_msg.h @@ -26,7 +26,7 @@ #define UBR_MSG_CUR_INDEX 2 namespace brpc { -namespace ub { +namespace ubring { typedef enum { UBR_MSG_CHUNK_NONE = 0, UBR_MSG_CHUNK_EXIST = 1, @@ -43,10 +43,10 @@ typedef struct __attribute__((aligned(64))) TagUbrMsgFormat { uint8_t header[UBR_MSG_HEADER_LEN]; } UbrMsgFormat; -static inline uint32_t CalcUbrMsgChunkCnt(uint32_t buf_len) +static inline uint32_t CalcUbrMsgChunkCnt(uint32_t bufLen) { - uint32_t msg_chunk_num = (buf_len + UBR_MSG_PAYLOAD_LEN - 1) / UBR_MSG_PAYLOAD_LEN; - return msg_chunk_num; + uint32_t msgChunkNum = (bufLen + UBR_MSG_PAYLOAD_LEN - 1) / UBR_MSG_PAYLOAD_LEN; + return msgChunkNum; } } } diff --git a/src/brpc/ub/ubr_trx.h b/src/brpc/ubring/ubr_trx.h similarity index 69% rename from src/brpc/ub/ubr_trx.h rename to src/brpc/ubring/ubr_trx.h index ccba5f0c95..aba6964137 100644 --- a/src/brpc/ub/ubr_trx.h +++ b/src/brpc/ubring/ubr_trx.h @@ -20,10 +20,10 @@ #include #include #include -#include "brpc/ub/shm/shm_def.h" -#include "brpc/ub/common/common.h" -#include "brpc/ub/common/thread_lock.h" -#include "brpc/ub/ubr_msg.h" +#include "brpc/ubring/shm/shm_def.h" +#include "brpc/ubring/common/common.h" +#include "brpc/ubring/common/thread_lock.h" +#include "brpc/ubring/ubr_msg.h" /* +----------------------------------------------------------------------------+ │ UbrTrx shm │ @@ -41,14 +41,14 @@ #define MB_TO_BYTE (1024 * 1024) #define MAX_CLOSE_COUNT 2 -#define SHM_NAME_PREFIX "HLC" +#define SHM_NAME_PREFIX "UBRING" #define SERVER_SHM_NAME_SUFFIX "S" #define CLIENT_SHM_NAME_SUFFIX "C" namespace brpc { -namespace ub { -extern RETURN_CODE(*g_before_tcp_close)(int); -extern RETURN_CODE(*g_after_tcp_close)(int); +namespace ubring { +extern RETURN_CODE(*g_BeforeTcpClose)(int); +extern RETURN_CODE(*g_AfterTcpClose)(int); typedef enum { UBR_STATE_NONE, @@ -86,11 +86,11 @@ typedef enum { typedef struct TagUbrDataStatusQMsg { uint32_t tail; uint32_t timeout; - uint8_t heart_beat; + uint8_t heartBeat; } UbrDataStatusQMsg; typedef struct TagUbrEventQMsg { - uint64_t io_id; + uint64_t ioId; EventQState flag; } UbrEventQMsg; @@ -100,62 +100,62 @@ typedef struct TagUbrAddrInfo { } UbrAddrInfo; typedef struct TagUbrTx { - UbrAddrInfo remote_data_q; - UbrAddrInfo remote_rx_event_q; - UbrAddrInfo local_data_status_q; - UbrAddrInfo local_tx_event_q; - uint64_t out_io_id; - uint32_t write_pos; + UbrAddrInfo remoteDataQ; + UbrAddrInfo remoteRxEventQ; + UbrAddrInfo localDataStatusQ; + UbrAddrInfo localTxEventQ; + uint64_t outIoId; + uint32_t writePos; uint32_t capacity; - UbrMsgFormat local_msg_space; - uint32_t hb_retry_cnt; - uint32_t ep_last_cap; - volatile EventQState trx_state; + UbrMsgFormat localMsgSpace; + uint32_t hbRetryCnt; + uint32_t epLastCap; + volatile EventQState trxState; } UbrTx; typedef struct TagUbrRx { - UbrAddrInfo local_data_q; - UbrAddrInfo local_rx_event_q; - UbrAddrInfo remote_data_status_q; - UbrAddrInfo remote_tx_event_q; - uint64_t in_io_id; - uint32_t read_pos; + UbrAddrInfo localDataQ; + UbrAddrInfo localRxEventQ; + UbrAddrInfo remoteDataStatusQ; + UbrAddrInfo remoteTxEventQ; + uint64_t inIoId; + uint32_t readPos; uint32_t capacity; - uint32_t deal_msg_num; - uint32_t deal_msg_max_cnt; - uint32_t ep_eof_pos; - volatile EventQState trx_state; + uint32_t dealMsgNum; + uint32_t dealMsgMaxCnt; + uint32_t epEofPos; + volatile EventQState trxState; } UbrRx; typedef struct TagUbrTrx { - UbrTx ubr_tx; - UbrRx ubr_rx; - uint64_t ubr_id; - uint32_t trx_mgr_index; + UbrTx ubrTx; + UbrRx ubrRx; + uint64_t ubrId; + uint32_t trxMgrIndex; UbrTrxType type; - SHM local_shm; - SHM remote_shm; - int timer_fd; - int hb_timer_fd; - int clear_timer_fd; - AtomicInt close_cnt; - AtomicInt close_state; + SHM localShm; + SHM remoteShm; + int timerFd; + int hbTimerFd; + int clearTimerFd; + AtomicInt closeCnt; + AtomicInt closeState; } UbrTrx; typedef struct TagFileLock { - int lock_fd; - char* lock_path; + int lockFd; + char* lockPath; } FileLock; typedef struct TagUbrLinkLock { - int file_lock_num; - FileLock* file_lock; + int fileLockNum; + FileLock* fileLock; } UbrLinkLock; typedef enum { UBR_UB_EVENT, UBR_HEARTBEAT, -} PASSIVE_DISC_TYPE; +}PASSIVE_DISC_TYPE; } } diff --git a/src/butil/iobuf.cpp b/src/butil/iobuf.cpp index fb7b212f9d..349bd7d3a9 100644 --- a/src/butil/iobuf.cpp +++ b/src/butil/iobuf.cpp @@ -1541,7 +1541,7 @@ ssize_t IOPortal::pappend_from_file_descriptor( } ssize_t IOPortal::pappend_from_ub_ring( - brpc::ub::UBRing* _ub_ring, + brpc::ubring::UBRing* _ub_ring, size_t max_count) { iovec vec[MAX_APPEND_IOVEC]; int nvec = 0; diff --git a/src/butil/iobuf.h b/src/butil/iobuf.h index 77bc9d5411..ca41b8eb90 100644 --- a/src/butil/iobuf.h +++ b/src/butil/iobuf.h @@ -34,7 +34,7 @@ #include "butil/macros.h" #include "butil/reader_writer.h" #include "butil/binary_printer.h" -#include "brpc/ub/ub_ring.h" +#include "brpc/ubring/ub_ring.h" // For IOBuf::appendv(const const_iovec*, size_t). The only difference of this // struct from iovec (defined in sys/uio.h) is that iov_base is `const void*' @@ -467,7 +467,7 @@ class IOPortal : public IOBuf { // If `offset' is negative, does exactly what append_from_file_descriptor does. ssize_t pappend_from_file_descriptor(int fd, off_t offset, size_t max_count); - ssize_t pappend_from_ub_ring(brpc::ub::UBRing* _ub_ring, size_t max_count); + ssize_t pappend_from_ub_ring(brpc::ubring::UBRing* _ub_ring, size_t max_count); // Read as many bytes as possible from SSL channel `ssl', and stop until `max_count'. // Returns total bytes read and the ssl error code will be filled into `ssl_error' From 56d2ad82c2cef7649b392104622f30908b8252f8 Mon Sep 17 00:00:00 2001 From: zchuango Date: Mon, 20 Apr 2026 08:10:24 +0000 Subject: [PATCH 60/84] fix some bug for ubring --- example/ubring_performance/client.cpp | 7 +- src/brpc/ubring/shm/shm_ipc.cpp | 30 +++--- src/brpc/ubring/shm/shm_ubs.cpp | 2 +- src/brpc/ubring/timer/timer_mgr.cpp | 107 +++++++++++-------- src/brpc/ubring/ub_ring.cpp | 145 ++++++++++++++++---------- src/brpc/ubring/ub_ring_manager.cpp | 14 +-- 6 files changed, 183 insertions(+), 122 deletions(-) diff --git a/example/ubring_performance/client.cpp b/example/ubring_performance/client.cpp index 492596b664..35b480f1e0 100644 --- a/example/ubring_performance/client.cpp +++ b/example/ubring_performance/client.cpp @@ -17,6 +17,7 @@ #include #include +#include #include #include #include "butil/atomicops.h" @@ -106,6 +107,8 @@ class PerformanceTest { options.connection_type = FLAGS_connection_type; options.timeout_ms = FLAGS_rpc_timeout_ms; options.max_retry = 0; + // Prevent reusing stale sockets from previous test rounds. + options.connection_group = std::to_string(reinterpret_cast(this)); std::string server = g_servers[(rr_index++) % g_servers.size()]; _channel = new brpc::Channel(); if (_channel->Init(server.c_str(), &options) != 0) { @@ -127,7 +130,7 @@ class PerformanceTest { } LOG(WARNING) << "RPC call failed, retrying... (" << retry << " left): " << cntl.ErrorText(); retry--; - bthread_usleep(100000); // 100ms delay before retry + bthread_usleep(1000000); // 100ms delay before retry } LOG(ERROR) << "RPC call failed after multiple retries"; return -1; @@ -322,4 +325,4 @@ int main(int argc, char* argv[]) { return 0; } -#endif \ No newline at end of file +#endif diff --git a/src/brpc/ubring/shm/shm_ipc.cpp b/src/brpc/ubring/shm/shm_ipc.cpp index c5cfad9e4b..ad19502a0a 100644 --- a/src/brpc/ubring/shm/shm_ipc.cpp +++ b/src/brpc/ubring/shm/shm_ipc.cpp @@ -66,8 +66,8 @@ RETURN_CODE IpcShmLocalMalloc(SHM *shm) RETURN_CODE IpcShmMunmap(SHM *shm) { if (shm->addr == NULL) { - LOG(ERROR) << "Input shm param is invalid, addr is NULL."; - return SHM_ERR_INPUT_INVALID; + LOG(DEBUG) << "IPC unmap shm=" << shm->name << " already unmapped."; + return UBRING_OK; } int ret = munmap(shm->addr, shm->len); @@ -82,11 +82,6 @@ RETURN_CODE IpcShmMunmap(SHM *shm) RETURN_CODE IpcShmFree(SHM *shm) { - if (shm->addr == NULL) { - LOG(ERROR) << "Input shm param is invalid, addr is NULL."; - return SHM_ERR_INPUT_INVALID; - } - // free int ret = shm_unlink(shm->name); if (ret != UBRING_OK) { @@ -94,10 +89,14 @@ RETURN_CODE IpcShmFree(SHM *shm) LOG_EVERY_SECOND(ERROR) << "IPC free shm=" << shm->name << " failed, errno=" << errno; return SHM_ERR_RESOURCE_ATTACHED; } + if (errno == ENOENT) { + LOG(DEBUG) << "IPC free shm=" << shm->name << " already deleted."; + shm->addr = NULL; + return SHM_ERR_NOT_FOUND; + } LOG_EVERY_SECOND(ERROR) << "IPC free shm=" << shm->name << " failed, errno=" << errno; return SHM_ERR; } - shm->addr = NULL; LOG(DEBUG) << "IPC free shm=" << shm->name << " success."; return UBRING_OK; } @@ -105,8 +104,8 @@ RETURN_CODE IpcShmFree(SHM *shm) RETURN_CODE IpcShmLocalFree(SHM *shm) { if (shm->addr == NULL) { - LOG(ERROR) << "Input shm param is invalid, addr is NULL."; - return SHM_ERR_INPUT_INVALID; + LOG(DEBUG) << "IPC free local shm=" << shm->name << " already freed."; + return SHM_ERR_NOT_FOUND; } int ret = munmap(shm->addr, shm->len); @@ -120,6 +119,11 @@ RETURN_CODE IpcShmLocalFree(SHM *shm) LOG_EVERY_SECOND(ERROR) << "IPC delete shm=" << shm->name << " failed, ret=" << ret; return SHM_ERR_RESOURCE_ATTACHED; } + if (errno == ENOENT) { + LOG(DEBUG) << "IPC delete shm=" << shm->name << " already deleted by peer."; + shm->addr = NULL; + return SHM_ERR_NOT_FOUND; + } LOG_EVERY_SECOND(ERROR) << "IPC delete shm=" << shm->name << " failed, ret=" << ret; return SHM_ERR; } @@ -136,7 +140,7 @@ RETURN_CODE IpcShmRemoteMalloc(SHM *shm) return SHM_ERR; } - shm->addr = (uint8_t*)mmap(NULL, shm->len, PROT_WRITE, MAP_SHARED, fd, 0); + shm->addr = (uint8_t*)mmap(NULL, shm->len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (shm->addr == (uint8_t*)MAP_FAILED) { LOG(ERROR) << "IPC map shm=" << shm->name << " failed, ret=" << errno; close(fd); @@ -171,8 +175,8 @@ RETURN_CODE IpcShmLocalMmap(SHM *shm, int prot) RETURN_CODE IpcShmRemoteFree(SHM *shm) { if (shm->addr == NULL) { - LOG(ERROR) << "Input shm param is invalid, addr is NULL."; - return SHM_ERR_INPUT_INVALID; + LOG(DEBUG) << "IPC free remote shm=" << shm->name << " already freed."; + return UBRING_OK; } int ret = munmap(shm->addr, shm->len); diff --git a/src/brpc/ubring/shm/shm_ubs.cpp b/src/brpc/ubring/shm/shm_ubs.cpp index f79185c54d..64c43526e8 100644 --- a/src/brpc/ubring/shm/shm_ubs.cpp +++ b/src/brpc/ubring/shm/shm_ubs.cpp @@ -43,7 +43,7 @@ namespace ubring { DEFINE_uint32(node_location, 1, "Location of the ub machine."); DEFINE_bool(shm_wr_delay_comp, true, "Indicates whether to enable the write relay." "0: relay; 1: non-relay."); -DEFINE_int32(ub_flying_io_timeout, 1, "Waiting time for stopping data" +DEFINE_int32(ub_flying_io_timeout, 5, "Waiting time for stopping data" "sending and receiving when the link is disconnected."); char g_regionName[MAX_REGION_NAME_DESC_LENGTH] = {0}; int g_shmTimerFd = 0; diff --git a/src/brpc/ubring/timer/timer_mgr.cpp b/src/brpc/ubring/timer/timer_mgr.cpp index 7688198519..948cca1cfe 100644 --- a/src/brpc/ubring/timer/timer_mgr.cpp +++ b/src/brpc/ubring/timer/timer_mgr.cpp @@ -38,20 +38,33 @@ static int32_t g_timerModuleInitialized; static RETURN_CODE DeleteTimerInner(uint32_t fd) { if (g_timerFdCtxMap == NULL) { - LOG(WARNING) << "The timer is not initialized."; return UBRING_OK; } + if (pthread_spin_lock(&g_timerFdCtxMap[fd].spinLock) != 0) { + return UBRING_ERR; + } + if (g_timerFdCtxMap[fd].status == TIMER_CONTEXT_NOT_USING) { - LOG(WARNING) << "The timer is not using, timerFd=" << fd; + pthread_spin_unlock(&g_timerFdCtxMap[fd].spinLock); return UBRING_OK; } - if (epoll_ctl(g_epollFd, EPOLL_CTL_DEL, (int)fd, NULL) != 0) { - LOG(ERROR) << "Failed to delete the timer fd=" << fd << " with errno=" << errno; - } + g_timerFdCtxMap[fd].status = TIMER_CONTEXT_NOT_USING; + g_timerFdCtxMap[fd].cb = NULL; + g_timerFdCtxMap[fd].args = NULL; + g_timerFdCtxMap[fd].periodical = 0; + g_timerFdCtxMap[fd].fd = 0; + + pthread_spin_unlock(&g_timerFdCtxMap[fd].spinLock); - CloseTimerFd(fd); + // I/O outside lock + epoll_ctl(g_epollFd, EPOLL_CTL_DEL, (int)fd, NULL); + + uint64_t exp = 0; + read((int)fd, &exp, sizeof(exp)); + + close((int)fd); atomic_fetch_sub(&g_totalTimerNum, 1); return UBRING_OK; } @@ -92,23 +105,13 @@ static RETURN_CODE TimerSpinLocksInit(void) return UBRING_OK; } +// Execute callback directly in the epoll thread. +// Previously this spawned a new pthread per timer firing, which caused EAGAIN +// under high load. Since callbacks are lightweight (just setting flags or +// scheduling bthreads), running them inline is safe and avoids thread exhaustion. static RETURN_CODE ExecuteCallback(int32_t timerFd) { - pthread_attr_t attr; - pthread_attr_init(&attr); - error_t err = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); - if (err != 0) { - LOG(ERROR) << "Failed to set thread detach status when executing callback"; - } - - pthread_t cbThread; - err = pthread_create(&cbThread, &attr, UnifiedCallback, (void *)(&g_timerFdCtxMap[timerFd])); - if (err != 0) { - pthread_attr_destroy(&attr); - LOG(ERROR) << "Failed to create thread while executing callback due to errno=" << err; - return UBRING_ERR; - } - pthread_attr_destroy(&attr); + UnifiedCallback((void *)(&g_timerFdCtxMap[timerFd])); return UBRING_OK; } @@ -171,27 +174,30 @@ RETURN_CODE TimerInit(void) void *UnifiedCallback(void *args) { TimerFdCtx *ctx = (TimerFdCtx *)args; - // Try to lock with a small delay if initial try fails - int retry = 0; - while (pthread_spin_trylock(&ctx->spinLock) != 0) { - if (retry >= 3) { - LOG_EVERY_SECOND(WARNING) << "Failed to acquire spin lock after multiple attempts, context status is " << ctx->status; - return NULL; - } - usleep(100); // Small delay before retry - retry++; + if (pthread_spin_lock(&ctx->spinLock) != 0) { + return NULL; } - + if (ctx->status == TIMER_CONTEXT_NOT_USING) { pthread_spin_unlock(&ctx->spinLock); return NULL; } + + // Snapshot callback info under lock, then release before executing + void *(*cb)(void *) = ctx->cb; + void *cbArgs = ctx->args; + uint32_t fd = ctx->fd; + int isPeriodical = ctx->periodical; ctx->status = TIMER_CONTEXT_CALLBACK_ONGOING; - ctx->cb(ctx->args); - if (ctx->periodical != 1) { - DeleteTimerInner((uint32_t)ctx->fd); - } + pthread_spin_unlock(&ctx->spinLock); + + // Execute callback OUTSIDE the spinlock + cb(cbArgs); + + if (!isPeriodical) { + DeleteTimerInner(fd); + } return NULL; } @@ -224,11 +230,13 @@ void *TimerEpoll(void *args) int32_t timerFd = event->data.fd; uint64_t exp = 0; if (read(timerFd, &exp, sizeof(exp)) < 0) { - LOG(ERROR) << "Failed to read timerfd=" << timerFd << " errno=" << errno; + // EBADF means the fd was already closed by DeleteTimerSafe, skip silently + if (errno != EBADF) { + LOG(ERROR) << "Failed to read timerfd=" << timerFd << " errno=" << errno; + } continue; } if (TimerFdCtxValidate((uint32_t)timerFd) != UBRING_OK) { - LOG(ERROR) << "Timer ctx is not valid=" << timerFd; continue; } @@ -246,29 +254,36 @@ void *TimerEpoll(void *args) void DeleteTimerSafe(uint32_t fd) { if (g_timerFdCtxMap == NULL) { - LOG(WARNING) << "The timer is not initialized."; return; } if (pthread_spin_lock(&g_timerFdCtxMap[fd].spinLock) != 0) { - LOG(ERROR) << "Failed to lock while deleting timer=" << fd << " errno=" << errno; return; } if (g_timerFdCtxMap[fd].status == TIMER_CONTEXT_NOT_USING) { - LOG(WARNING) << "The timer is not using, timerFd=" << fd; pthread_spin_unlock(&g_timerFdCtxMap[fd].spinLock); return; } - if (epoll_ctl(g_epollFd, EPOLL_CTL_DEL, (int)fd, NULL) != 0) { - LOG(ERROR) << "Failed to delete the timer fd=" << fd << " with errno=" << errno; - } - - CloseTimerFd(fd); - atomic_fetch_sub(&g_totalTimerNum, 1); + // Mark as not-using under lock so no new callbacks get dispatched + g_timerFdCtxMap[fd].status = TIMER_CONTEXT_NOT_USING; + g_timerFdCtxMap[fd].cb = NULL; + g_timerFdCtxMap[fd].args = NULL; + g_timerFdCtxMap[fd].periodical = 0; + g_timerFdCtxMap[fd].fd = 0; pthread_spin_unlock(&g_timerFdCtxMap[fd].spinLock); + + // I/O operations outside the spin lock to avoid blocking other threads + epoll_ctl(g_epollFd, EPOLL_CTL_DEL, (int)fd, NULL); + + // Drain any pending data so the epoll thread won't read a closed fd + uint64_t exp = 0; + read((int)fd, &exp, sizeof(exp)); + + close((int)fd); + atomic_fetch_sub(&g_totalTimerNum, 1); } void DeleteTimer(uint32_t fd) { diff --git a/src/brpc/ubring/ub_ring.cpp b/src/brpc/ubring/ub_ring.cpp index 97e5371924..80a02b1b68 100644 --- a/src/brpc/ubring/ub_ring.cpp +++ b/src/brpc/ubring/ub_ring.cpp @@ -19,17 +19,19 @@ #include #include #include +#include "bthread/bthread.h" #include "butil/logging.h" #include "brpc/ubring/ub_ring.h" +#include "brpc/ubring/shm/shm_ipc.h" namespace brpc { namespace ubring { uint32_t g_sleepTime[UBR_TASK_STEP_NUM] = {0}; #define TIME_COVERSION 1000 -DEFINE_int32(ub_disconnect_timeout, 1, "Ubshm disconnection timeout."); +DEFINE_int32(ub_disconnect_timeout, 5, "Ubshm disconnection timeout."); DEFINE_int32(ub_connect_timeout, 1, "Ubshm connection timeout."); -DEFINE_int32(ub_hb_timer_interval, 1, "Heartbeat timer interval."); -DEFINE_int32(ub_hb_retry_cnt, 3, "Heartbeat retry times."); +DEFINE_int32(ub_hb_timer_interval, 5, "Heartbeat timer interval."); +DEFINE_int32(ub_hb_retry_cnt, 10, "Heartbeat retry times."); DEFINE_int32(ub_event_queue_timer_interval, 100, "Interval of the disconnection timer."); UBRing::UBRing() @@ -53,10 +55,17 @@ RETURN_CODE UBRing::UbrTrxMapShm(SHM *localShm, SHM *remoteShm) } RETURN_CODE UBRing::UbrTrxClose() { - if (UNLIKELY(UbrTrxCloseCheck(_trx) != UBRING_OK)) { + RETURN_CODE closeCheckRc = UbrTrxCloseCheck(_trx); + if (UNLIKELY(closeCheckRc != UBRING_OK)) { + if (closeCheckRc == UBRING_REENTRY) { + LOG(INFO) << "Trx close skipped, already closing, local name=" << _trx->localShm.name; + return UBRING_OK; + } return UBRING_ERR; } - ((UbrEventQMsg *)_trx->ubrRx.remoteTxEventQ.addr)->flag = UBR_STATE_CLOSING; + if (_trx->ubrRx.remoteTxEventQ.addr != nullptr) { + ((UbrEventQMsg *)_trx->ubrRx.remoteTxEventQ.addr)->flag = UBR_STATE_CLOSING; + } uint32_t disconnectTimeout = FLAGS_ub_disconnect_timeout; uint64_t startTime = GetCurNanoSeconds(); @@ -66,25 +75,51 @@ RETURN_CODE UBRing::UbrTrxClose() { _trx->ubrTx.trxState = UBR_STATE_CLOSED; } - ((UbrEventQMsg *)_trx->ubrTx.remoteRxEventQ.addr)->flag = UBR_STATE_CLOSED; + if (_trx->ubrTx.remoteRxEventQ.addr != nullptr) { + ((UbrEventQMsg *)_trx->ubrTx.remoteRxEventQ.addr)->flag = UBR_STATE_CLOSED; + } while (_trx->ubrRx.localRxEventQ.addr != nullptr && ((UbrEventQMsg *)_trx->ubrRx.localRxEventQ.addr)->flag != UBR_STATE_CLOSED) { UbrSetSleepTask(UBR_TASK_CLOSE); if (HasTimedOut(startTime, disconnectTimeout) != UBRING_OK) { - LOG(ERROR) << "Local shm " << _trx->localShm.name - << " wait for the peer to close the connection failed."; + LOG(WARNING) << "Local shm " << _trx->localShm.name + << " wait for the peer to close timed out, force cleanup."; _trx->ubrRx.trxState = UBR_STATE_CLOSED; - ClearTrxResource(_trx, startTime, UBR_SEND_CLOSE); + // Force synchronous cleanup instead of relying on async timer + DeleteTimerSafe((uint32_t)_trx->timerFd); + DeleteTimerSafe((uint32_t)_trx->hbTimerFd); + if (_trx->ubrTx.remoteRxEventQ.addr != nullptr) { + ((UbrEventQMsg *)_trx->ubrTx.remoteRxEventQ.addr)->flag = UBR_STATE_CLOSED; + } + if (UNLIKELY(ShmRemoteFree(&_trx->remoteShm) != UBRING_OK)) { + LOG(WARNING) << "Force close, remote shm " << _trx->remoteShm.name << " free failed."; + } + if (UNLIKELY(UbrTrxFreeShm(_trx) != UBRING_OK)) { + LOG(WARNING) << "Force close, local shm " << _trx->localShm.name << " free failed."; + } + if (UNLIKELY(UBRingManager::ReleaseUbrTrxFromMgr(_trx) != UBRING_OK)) { + LOG(WARNING) << "Force close, release trx " << _trx->localShm.name << " failed."; + } return UBRING_ERR_TIMEOUT; } - usleep(1); + bthread_usleep(1000); // 1ms, yield to other bthreads } _trx->ubrRx.trxState = UBR_STATE_CLOSED; RETURN_CODE rc; if (UNLIKELY((rc = ClearTrxResource(_trx, startTime, UBR_SEND_CLOSE)) != UBRING_OK)) { + if (rc == UBRING_REENTRY) { + LOG(INFO) << "Trx close, peer is closing, trx local name=" << _trx->localShm.name; + return UBRING_OK; + } LOG(ERROR) << "Trx close, clear trx resource failed, trx local name=" << _trx->localShm.name; return UBRING_ERR; } - LOG(INFO) << "The peer is closed, local name=" << _trx->localShm.name; + // Unlink local shm name immediately so process exit does not leave visible leftovers. + RETURN_CODE unlinkRc = ShmFree(&_trx->localShm); + if (unlinkRc != UBRING_OK && unlinkRc != SHM_ERR_NOT_FOUND && unlinkRc != SHM_ERR_RESOURCE_ATTACHED) { + LOG(WARNING) << "Trx close, unlink local shm failed, trx local name=" << _trx->localShm.name + << ", rc=" << unlinkRc; + } + LOG(DEBUG) << "The peer is closed, local name=" << _trx->localShm.name; return UBRING_OK; } @@ -137,7 +172,7 @@ void* UBRing::UbrTrxCloseCallback(void* args) { int fd = (int)trx->localShm.fd; do { if (ATOMIC_LOAD(trx->closeCnt) == 0) { - LOG(ERROR) << "Trx close callback failed, exist other closing call, name=" << trx->localShm.name; + LOG(DEBUG) << "Trx close callback skipped, already closed, name=" << trx->localShm.name; break; } ATOMIC_SUB(trx->closeCnt, 1); @@ -154,7 +189,8 @@ void* UBRing::UbrTrxCloseCallback(void* args) { break; } remoteRxEventQ->flag = UBR_STATE_CLOSED; - if (UNLIKELY(ClearTrxResource(trx, startTime, UBR_CALL_BACK_CLOSE, 1) != UBRING_OK)) { + RETURN_CODE clearRc = ClearTrxResource(trx, startTime, UBR_CALL_BACK_CLOSE, 1); + if (UNLIKELY(clearRc != UBRING_OK && clearRc != UBRING_REENTRY)) { LOG(ERROR) << "Trx close callback failed, " << trx->localShm.name << " clear trx resource failed."; break; } @@ -182,7 +218,13 @@ RETURN_CODE UBRing::UbrAddHBTimer() { } RETURN_CODE UBRing::UbrPassiveClearTrx(UbrTrx *trx, int fd, PASSIVE_DISC_TYPE type) { - if (UNLIKELY(UbrTrxCloseCheck(trx) != UBRING_OK)) { + RETURN_CODE passiveCloseCheckRc = UbrTrxCloseCheck(trx); + if (UNLIKELY(passiveCloseCheckRc != UBRING_OK)) { + if (passiveCloseCheckRc == UBRING_REENTRY) { + LOG(INFO) << "Passive close skipped, active close in progress, name=" << trx->localShm.name; + uint64_t startTime = GetCurNanoSeconds(); + return ClearTrxResource(trx, startTime, UBR_CALL_BACK_CLOSE); + } return UBRING_ERR; } trx->ubrTx.trxState = UBR_STATE_CLOSED; @@ -196,7 +238,7 @@ RETURN_CODE UBRing::UbrPassiveClearTrx(UbrTrx *trx, int fd, PASSIVE_DISC_TYPE ty DeleteTimerSafe((uint32_t)trx->hbTimerFd); typeName = "Ub event callback"; } - sleep(FLAGS_ub_flying_io_timeout); + bthread_usleep(FLAGS_ub_flying_io_timeout * 1000000LL); // yield-friendly sleep int rc = ShmLocalFree(&trx->remoteShm); if (rc != UBRING_OK) { @@ -254,6 +296,11 @@ RETURN_CODE UBRing::UbrAddAsynClearTimer(UbrTrx *trx) { return UBRING_ERR; } + if (trx->clearTimerFd > 0) { + LOG(DEBUG) << "Trx close timer already added, name=" << trx->localShm.name; + return UBRING_OK; + } + struct itimerspec timeSpec = { .it_interval = {.tv_sec = 0, .tv_nsec = 0}, .it_value = {.tv_sec = FLAGS_ub_flying_io_timeout, .tv_nsec = 0} @@ -363,7 +410,11 @@ int UBRing::UbrTrxRecvBlockMode(uint8_t *dest, uint32_t bufLen) UbrMsgFormat *currentChunk = &dataMsg[ubrRx->readPos]; uint8_t flag = currentChunk->header[UBR_MSG_FLAG_INDEX]; if (flag == UBR_MSG_CHUNK_NONE) { - continue; + if (totalCopied > 0) { + break; + } + errno = EAGAIN; + return -1; } if (flag == UBR_MSG_CHUNK_EOF) { notEofEncountered = false; @@ -595,25 +646,19 @@ RETURN_CODE UBRing::UbrTrxFreeShm(UbrTrx *trx) rc = ShmFree(&trx->localShm); if (UNLIKELY(rc != UBRING_OK)) { - if (UNLIKELY(rc == SHM_ERR_RESOURCE_ATTACHED || rc == SHM_ERR_NOT_FOUND)) { - LOG(INFO) << "Wait for " << trx->remoteShm.name << " remote free shm."; - return UBRING_OK; + if (rc != SHM_ERR_RESOURCE_ATTACHED && rc != SHM_ERR_NOT_FOUND) { + LOG(ERROR) << "Wait for " << trx->localShm.name << " local shm free fail."; + return UBRING_ERR; } - LOG(ERROR) << "Wait for " << trx->localShm.name << " local shm free fail."; - return UBRING_ERR; + LOG(INFO) << "Local shm " << trx->localShm.name << " already freed, continue to free remote shm."; } - size_t nameLen = strlen(trx->remoteShm.name); - if (!(nameLen <= 0 || nameLen > SHM_MAX_NAME_LEN || trx->remoteShm.len <= 0)) { - rc = ShmFree(&trx->remoteShm); + RETURN_CODE remoteRc = UBRING_OK; + if (trx->remoteShm.addr != NULL) { + remoteRc = IpcShmRemoteFree(&trx->remoteShm); } - if (rc != UBRING_OK) { - if (rc == SHM_ERR_RESOURCE_ATTACHED || rc == SHM_ERR_NOT_FOUND) { - LOG(INFO) << "Wait for " << trx->remoteShm.name << " remote free shm."; - return UBRING_OK; - } - LOG(ERROR) << "Wait for " << trx->remoteShm.name << " remote shm free fail."; - return UBRING_ERR; + if (remoteRc != UBRING_OK) { + LOG(WARNING) << "Free remote shm " << trx->remoteShm.name << " failed, rc=" << remoteRc; } return UBRING_OK; @@ -934,23 +979,8 @@ RETURN_CODE UBRing::UbrClearResourceCheck(UbrTrx *trx, uint64_t startTime, UbrCl } UbrEventQMsg* localTxEventQ = (UbrEventQMsg *)trx->ubrTx.localTxEventQ.addr; - while (ATOMIC_LOAD(trx->closeCnt) == 1 && localTxEventQ->flag == UBR_STATE_CLOSING) { - if (HasTimedOut(startTime, FLAGS_ub_disconnect_timeout) != UBRING_OK) { - LOG(ERROR) << "Trx close failed, wait close time out."; - break; - } - usleep(1); - } - int firstClearExpected = UBR_CLOSE_FIRST; - int secondClearExpected = UBR_CLOSE_SECOND; - if (localTxEventQ->flag == UBR_STATE_CLOSING) { - if (ATOMIC_COMPARE_EXCHANGE_STRONG(trx->closeState, firstClearExpected, UBR_CLOSE_SECOND)) { - LOG(ERROR) << "Trx close, exist process is closing, name=" << trx->localShm.name; - return UBRING_REENTRY; - } else if (ATOMIC_COMPARE_EXCHANGE_STRONG(trx->closeState, secondClearExpected, UBR_CLOSE_END)) { - localTxEventQ->flag = UBR_STATE_CLOSED; - trx->ubrTx.trxState = UBR_STATE_CLOSED; - } + if (localTxEventQ->flag == UBR_STATE_CONNECTED) { + localTxEventQ->flag = UBR_STATE_CLOSING; } if (closeType == UBR_SEND_CLOSE) { @@ -959,12 +989,17 @@ RETURN_CODE UBRing::UbrClearResourceCheck(UbrTrx *trx, uint64_t startTime, UbrCl DeleteTimer((uint32_t)trx->timerFd); } DeleteTimerSafe((uint32_t)trx->hbTimerFd); + + if (localTxEventQ->flag == UBR_STATE_CLOSING) { + localTxEventQ->flag = UBR_STATE_CLOSED; + trx->ubrTx.trxState = UBR_STATE_CLOSED; + } + return UBRING_OK; } RETURN_CODE UBRing::ClearTrxResource(UbrTrx *trx, uint64_t startTime, UbrCloseType closeType, int op) { - UbrEventQMsg* localTxEventQ = (UbrEventQMsg *)trx->ubrTx.localTxEventQ.addr; RETURN_CODE rc = UbrClearResourceCheck(trx, startTime, closeType); if (rc != UBRING_OK) { return rc; @@ -987,8 +1022,8 @@ RETURN_CODE UBRing::UbrTrxCloseCheck(UbrTrx *trx) } int expected = MAX_CLOSE_COUNT; if (!ATOMIC_COMPARE_EXCHANGE_STRONG(trx->closeCnt, expected, MAX_CLOSE_COUNT - 1)) { - LOG(ERROR) << "Trx close failed, exist other close acquire, trx local name=" << trx->localShm.name; - return UBRING_ERR; + LOG(INFO) << "Trx close skipped, already closing, trx local name=" << trx->localShm.name; + return UBRING_REENTRY; } if (UNLIKELY(trx->ubrTx.localTxEventQ.addr == nullptr)) { @@ -1012,7 +1047,11 @@ ssize_t UBRing::StartReadv(UbrTrx *trx, const struct iovec *iov, int iovcnt, siz UbrMsgFormat *currentChunk = &dataMsg[trx->ubrRx.readPos]; uint8_t flag = currentChunk->header[UBR_MSG_FLAG_INDEX]; if (flag == UBR_MSG_CHUNK_NONE) { - continue; + if (totalRecvLen > 0) { + break; + } + errno = EAGAIN; + return -1; } if (flag == UBR_MSG_CHUNK_EOF) { notEofEncountered = false; @@ -1045,4 +1084,4 @@ ssize_t UBRing::StartReadv(UbrTrx *trx, const struct iovec *iov, int iovcnt, siz return totalRecvLen; } } // namespace ubring -} // namespace brpc \ No newline at end of file +} // namespace brpc diff --git a/src/brpc/ubring/ub_ring_manager.cpp b/src/brpc/ubring/ub_ring_manager.cpp index f99b1239b0..bf4182abe1 100644 --- a/src/brpc/ubring/ub_ring_manager.cpp +++ b/src/brpc/ubring/ub_ring_manager.cpp @@ -78,7 +78,6 @@ RETURN_CODE UBRingManager::UbrMgrInit() { memset(g_ubrMgr.trxMgrUnitStatus, UBR_MGR_UNIT_FREE, trxMgrStatusSize); LinkInfoInit(); return UBRING_OK; - return UBR_NOT_CONNECTED; } void UBRingManager::UbrMgrFini() { @@ -147,16 +146,17 @@ RETURN_CODE UBRingManager::ReleaseUbrTrxFromMgr(UbrTrx *trx) { } LOCK_GUARD(g_ubrTrxMgrMtx); + uint32_t idx = trx->trxMgrIndex; + if (g_ubrMgr.trxMgrUnitStatus[idx] == UBR_MGR_UNIT_FREE) { + LOG(DEBUG) << "Release trx already freed, name=" << trx->localShm.name; + return UBRING_OK; + } + if (g_ubrMgr.trxNum == 0) { LOG(ERROR) << "Release trx failed, trx number is 0."; return UBRING_ERR; } - uint32_t idx = trx->trxMgrIndex; - if (g_ubrMgr.trxMgrUnitStatus[idx] == UBR_MGR_UNIT_FREE) { - LOG(ERROR) << "Release trx failed, trx is not in manager."; - return UBRING_ERR; - } g_ubrMgr.trxMgrUnitStatus[idx] = UBR_MGR_UNIT_FREE; --g_ubrMgr.trxNum; return UBRING_OK; @@ -260,4 +260,4 @@ int32_t UBRingManager::UbEventCallback(const char *shmName) return UBRING_ERR; } } -} \ No newline at end of file +} From c5d040726a0c8a6761efb96371597d9e2c81c321 Mon Sep 17 00:00:00 2001 From: zchuango Date: Thu, 23 Apr 2026 11:14:19 +0800 Subject: [PATCH 61/84] add some log ubring endpoint --- example/ubring_performance/CMakeLists.txt | 1 + src/brpc/ubring/ub_endpoint.cpp | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/example/ubring_performance/CMakeLists.txt b/example/ubring_performance/CMakeLists.txt index ba4b1bf333..729381ccb8 100644 --- a/example/ubring_performance/CMakeLists.txt +++ b/example/ubring_performance/CMakeLists.txt @@ -106,6 +106,7 @@ set(DYNAMIC_LIB ${OPENSSL_SSL_LIBRARY} ${THRIFT_LIB} dl + z ) if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") diff --git a/src/brpc/ubring/ub_endpoint.cpp b/src/brpc/ubring/ub_endpoint.cpp index 4d0bffd735..379bd751ea 100644 --- a/src/brpc/ubring/ub_endpoint.cpp +++ b/src/brpc/ubring/ub_endpoint.cpp @@ -68,6 +68,7 @@ static butil::Mutex* g_ubring_resource_mutex = NULL; struct HelloMessage { void Serialize(void* data) const; void Deserialize(void* data); + std::string toString() const; uint16_t msg_len; uint16_t hello_ver; @@ -97,6 +98,21 @@ void HelloMessage::Deserialize(void* data) { memcpy(shm_name, current_pos, SHM_MAX_NAME_BUFF_LEN); } +std::string HelloMessage::toString() const { + constexpr size_t MAX_LEN = 16 + 6 + 16 + 6 + 16 + 6 + 20 + 6 + SHM_MAX_NAME_BUFF_LEN + 32; + std::array buf; + int n = snprintf(buf.data(), buf.size(), + "msg_len=%u, hello_ver=%u, impl_ver=%u, len=%lu, shm_name=%.*s", + msg_len, + hello_ver, + impl_ver, + static_cast(len), // 兼容32/64位 + static_cast(SHM_MAX_NAME_BUFF_LEN), // 限制最大输出长度 + shm_name + ); + return std::string(buf.data(), static_cast(n)); +} + UBShmEndpoint::UBShmEndpoint(Socket* s) : _socket(s) , _ub_ring(nullptr) @@ -340,6 +356,7 @@ void* UBShmEndpoint::ProcessHandshakeAtClient(void* arg) { ep->_state = FAILED; return NULL; } + LOG_IF(INFO, FLAGS_ub_trace_verbose) << "client handshake message : " << local_msg.toString(); ep->_state = C_HELLO_WAIT; if (ep->ReadFromFd(data, MAGIC_STR_LEN) < 0) { @@ -468,6 +485,7 @@ void* UBShmEndpoint::ProcessHandshakeAtServer(void* arg) { HelloMessage remote_msg; remote_msg.Deserialize(data); + LOG_IF(INFO, FLAGS_ub_trace_verbose) << "server receive handshake message : " << remote_msg.toString(); if (remote_msg.msg_len < HELLO_MSG_LEN_MIN) { LOG(WARNING) << "Fail to parse Hello Message length from client:" << s->description(); From bfac5e9f4c79d4530ecf4c24dcddf976aca6e2a0 Mon Sep 17 00:00:00 2001 From: zchuango Date: Wed, 6 May 2026 14:40:55 +0800 Subject: [PATCH 62/84] add todo --- example/ubring_performance/client.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/example/ubring_performance/client.cpp b/example/ubring_performance/client.cpp index 35b480f1e0..37fc9af9b5 100644 --- a/example/ubring_performance/client.cpp +++ b/example/ubring_performance/client.cpp @@ -107,8 +107,8 @@ class PerformanceTest { options.connection_type = FLAGS_connection_type; options.timeout_ms = FLAGS_rpc_timeout_ms; options.max_retry = 0; - // Prevent reusing stale sockets from previous test rounds. - options.connection_group = std::to_string(reinterpret_cast(this)); + // TODO A bug exists when the connection_group parameter is used. + // options.connection_group = std::to_string(reinterpret_cast(this)); std::string server = g_servers[(rr_index++) % g_servers.size()]; _channel = new brpc::Channel(); if (_channel->Init(server.c_str(), &options) != 0) { From ad9bb15f5de276a6fb691eb1aec3c3725d22a047 Mon Sep 17 00:00:00 2001 From: zchuango Date: Thu, 7 May 2026 07:14:39 +0000 Subject: [PATCH 63/84] fix the bug for handshake for ub endpoint --- src/brpc/ubring/ub_endpoint.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/brpc/ubring/ub_endpoint.cpp b/src/brpc/ubring/ub_endpoint.cpp index 379bd751ea..d80acc65f5 100644 --- a/src/brpc/ubring/ub_endpoint.cpp +++ b/src/brpc/ubring/ub_endpoint.cpp @@ -115,6 +115,7 @@ std::string HelloMessage::toString() const { UBShmEndpoint::UBShmEndpoint(Socket* s) : _socket(s) + , _state(UNINIT) , _ub_ring(nullptr) , _cq_sid(INVALID_SOCKET_ID) { @@ -132,6 +133,7 @@ void UBShmEndpoint::Reset() { delete _ub_ring; _ub_ring = nullptr; _cq_sid = INVALID_SOCKET_ID; + _state = UNINIT; } void UBConnect::StartConnect(const Socket* socket, @@ -260,11 +262,11 @@ int UBShmEndpoint::ReadFromFd(void* data, size_t len) { int nr = 0; size_t received = 0; do { - const int expected_val = _read_butex->load(butil::memory_order_acquire); const timespec duetime = butil::milliseconds_from_now(WAIT_TIMEOUT_MS); nr = read(_socket->fd(), (uint8_t*)data + received, len - received); if (nr < 0) { if (errno == EAGAIN) { + const int expected_val = _read_butex->load(butil::memory_order_acquire); if (bthread::butex_wait(_read_butex, expected_val, &duetime) < 0) { if (errno != EWOULDBLOCK && errno != ETIMEDOUT) { return -1; From d1198d1986ff53bcbde7d9cc09827cbbd716a78d Mon Sep 17 00:00:00 2001 From: zchuango Date: Thu, 7 May 2026 08:12:25 +0000 Subject: [PATCH 64/84] fix the bug for client ub endpoint --- example/ubring_performance/client.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/ubring_performance/client.cpp b/example/ubring_performance/client.cpp index 37fc9af9b5..05b1d733e5 100644 --- a/example/ubring_performance/client.cpp +++ b/example/ubring_performance/client.cpp @@ -165,7 +165,7 @@ class PerformanceTest { std::unique_ptr cntl_guard(closure->cntl); std::unique_ptr response_guard(closure->resp); if (closure->cntl->Failed()) { - LOG(WARNING) << "RPC call failed: " << closure->cntl->ErrorText(); + LOG(DEBUG) << "RPC call failed: " << closure->cntl->ErrorText(); // Don't stop the test immediately, just log the error and continue } else { g_latency_recorder << closure->cntl->latency_us(); From b54402d406d92302b5963aa86a559d074a8707ff Mon Sep 17 00:00:00 2001 From: zchuango Date: Thu, 7 May 2026 12:34:31 +0000 Subject: [PATCH 65/84] optimize the iobuf file code --- src/brpc/ubring/ub_endpoint.cpp | 2 +- src/brpc/ubring/ub_ring.h | 7 +++- src/butil/iobuf.cpp | 58 --------------------------------- src/butil/iobuf.h | 3 -- 4 files changed, 7 insertions(+), 63 deletions(-) diff --git a/src/brpc/ubring/ub_endpoint.cpp b/src/brpc/ubring/ub_endpoint.cpp index d80acc65f5..438b0229a9 100644 --- a/src/brpc/ubring/ub_endpoint.cpp +++ b/src/brpc/ubring/ub_endpoint.cpp @@ -742,7 +742,7 @@ void UBShmEndpoint::PollIn(UBShmEndpoint* ep, uint32_t epEvent) { once_read = MAX_ONCE_READ; } - const ssize_t nr = s->_read_buf.pappend_from_ub_ring(ep->_ub_ring, once_read); + const ssize_t nr = s->_read_buf.append_from_reader(ep->_ub_ring, once_read); if (nr <= 0) { if (0 == nr) { // Set `read_eof' flag and proceed to feed EOF into `Protocol' diff --git a/src/brpc/ubring/ub_ring.h b/src/brpc/ubring/ub_ring.h index b6a6ef40eb..cb6352f452 100644 --- a/src/brpc/ubring/ub_ring.h +++ b/src/brpc/ubring/ub_ring.h @@ -21,6 +21,7 @@ #include #include #include "butil/macros.h" +#include "butil/reader_writer.h" #include "brpc/ubring/ubr_trx.h" #include "brpc/ubring/ub_ring_manager.h" #include "brpc/ubring/shm/shm_mgr.h" @@ -31,11 +32,15 @@ namespace ubring { DECLARE_int32(ub_flying_io_timeout); extern uint32_t g_sleepTime[UBR_TASK_STEP_NUM]; -class UBRing { +class UBRing : public butil::IReader { public: UBRing(); ~UBRing(); DISALLOW_COPY_AND_ASSIGN(UBRing); + + ssize_t ReadV(const iovec* iov, int iovcnt) override { + return UbrTrxReadv(iov, iovcnt); + } RETURN_CODE UbrTrxMapShm(SHM *localShm, SHM *remoteShm); diff --git a/src/butil/iobuf.cpp b/src/butil/iobuf.cpp index 349bd7d3a9..ce60932327 100644 --- a/src/butil/iobuf.cpp +++ b/src/butil/iobuf.cpp @@ -1540,64 +1540,6 @@ ssize_t IOPortal::pappend_from_file_descriptor( return nr; } -ssize_t IOPortal::pappend_from_ub_ring( - brpc::ubring::UBRing* _ub_ring, - size_t max_count) { - iovec vec[MAX_APPEND_IOVEC]; - int nvec = 0; - size_t space = 0; - Block* prev_p = NULL; - Block* p = _block; - // Prepare at most MAX_APPEND_IOVEC blocks or space of blocks >= max_count - do { - if (p == NULL) { - p = iobuf::acquire_tls_block(); - if (BAIDU_UNLIKELY(!p)) { - errno = ENOMEM; - return -1; - } - if (prev_p != NULL) { - prev_p->u.portal_next = p; - } else { - _block = p; - } - } - vec[nvec].iov_base = p->data + p->size; - vec[nvec].iov_len = std::min(p->left_space(), max_count - space); - space += vec[nvec].iov_len; - ++nvec; - if (space >= max_count || nvec >= MAX_APPEND_IOVEC) { - break; - } - prev_p = p; - p = p->u.portal_next; - } while (1); - - ssize_t nr = 0; - nr = _ub_ring->UbrTrxReadv(vec, nvec); - if (nr <= 0) { // -1 or 0 - if (empty()) { - return_cached_blocks(); - } - return nr; - } - - size_t total_len = nr; - do { - const size_t len = std::min(total_len, _block->left_space()); - total_len -= len; - const IOBuf::BlockRef r = { _block->size, (uint32_t)len, _block }; - _push_back_ref(r); - _block->size += len; - if (_block->full()) { - Block* const saved_next = _block->u.portal_next; - _block->dec_ref(); // _block may be deleted - _block = saved_next; - } - } while (total_len); - return nr; -} - ssize_t IOPortal::append_from_reader(IReader* reader, size_t max_count) { iovec vec[MAX_APPEND_IOVEC]; int nvec = 0; diff --git a/src/butil/iobuf.h b/src/butil/iobuf.h index ca41b8eb90..239e82d950 100644 --- a/src/butil/iobuf.h +++ b/src/butil/iobuf.h @@ -34,7 +34,6 @@ #include "butil/macros.h" #include "butil/reader_writer.h" #include "butil/binary_printer.h" -#include "brpc/ubring/ub_ring.h" // For IOBuf::appendv(const const_iovec*, size_t). The only difference of this // struct from iovec (defined in sys/uio.h) is that iov_base is `const void*' @@ -467,8 +466,6 @@ class IOPortal : public IOBuf { // If `offset' is negative, does exactly what append_from_file_descriptor does. ssize_t pappend_from_file_descriptor(int fd, off_t offset, size_t max_count); - ssize_t pappend_from_ub_ring(brpc::ubring::UBRing* _ub_ring, size_t max_count); - // Read as many bytes as possible from SSL channel `ssl', and stop until `max_count'. // Returns total bytes read and the ssl error code will be filled into `ssl_error' ssize_t append_from_SSL_channel(struct ssl_st* ssl, int* ssl_error, From 7970a35b8db591d43ea3ff84e67cef8affed9abb Mon Sep 17 00:00:00 2001 From: zchuango Date: Fri, 8 May 2026 03:49:31 +0000 Subject: [PATCH 66/84] modify the log level --- src/brpc/ubring/shm/shm_ipc.cpp | 26 +++++++++++++------------- src/brpc/ubring/shm/shm_mgr.cpp | 4 ++-- src/brpc/ubring/shm/shm_ubs.cpp | 24 ++++++++++++------------ src/brpc/ubring/ub_ring.cpp | 5 ----- src/brpc/ubring/ub_ring_manager.cpp | 6 +++--- 5 files changed, 30 insertions(+), 35 deletions(-) diff --git a/src/brpc/ubring/shm/shm_ipc.cpp b/src/brpc/ubring/shm/shm_ipc.cpp index ad19502a0a..280b8c57fa 100644 --- a/src/brpc/ubring/shm/shm_ipc.cpp +++ b/src/brpc/ubring/shm/shm_ipc.cpp @@ -59,14 +59,14 @@ RETURN_CODE IpcShmLocalMalloc(SHM *shm) } close(fd); - LOG(DEBUG) << "IPC Create shm=" << shm->name << " length=" << shm->len << " success."; + LOG(INFO) << "IPC Create shm=" << shm->name << " length=" << shm->len << " success."; return UBRING_OK; } RETURN_CODE IpcShmMunmap(SHM *shm) { if (shm->addr == NULL) { - LOG(DEBUG) << "IPC unmap shm=" << shm->name << " already unmapped."; + LOG(INFO) << "IPC unmap shm=" << shm->name << " already unmapped."; return UBRING_OK; } @@ -76,7 +76,7 @@ RETURN_CODE IpcShmMunmap(SHM *shm) return SHM_ERR; } - LOG(DEBUG) << "IPC unmap shm=" << shm->name << " length=" << shm->len << " success."; + LOG(INFO) << "IPC unmap shm=" << shm->name << " length=" << shm->len << " success."; return UBRING_OK; } @@ -90,21 +90,21 @@ RETURN_CODE IpcShmFree(SHM *shm) return SHM_ERR_RESOURCE_ATTACHED; } if (errno == ENOENT) { - LOG(DEBUG) << "IPC free shm=" << shm->name << " already deleted."; + LOG(INFO) << "IPC free shm=" << shm->name << " already deleted."; shm->addr = NULL; return SHM_ERR_NOT_FOUND; } LOG_EVERY_SECOND(ERROR) << "IPC free shm=" << shm->name << " failed, errno=" << errno; return SHM_ERR; } - LOG(DEBUG) << "IPC free shm=" << shm->name << " success."; + LOG(INFO) << "IPC free shm=" << shm->name << " success."; return UBRING_OK; } RETURN_CODE IpcShmLocalFree(SHM *shm) { if (shm->addr == NULL) { - LOG(DEBUG) << "IPC free local shm=" << shm->name << " already freed."; + LOG(INFO) << "IPC free local shm=" << shm->name << " already freed."; return SHM_ERR_NOT_FOUND; } @@ -120,7 +120,7 @@ RETURN_CODE IpcShmLocalFree(SHM *shm) return SHM_ERR_RESOURCE_ATTACHED; } if (errno == ENOENT) { - LOG(DEBUG) << "IPC delete shm=" << shm->name << " already deleted by peer."; + LOG(INFO) << "IPC delete shm=" << shm->name << " already deleted by peer."; shm->addr = NULL; return SHM_ERR_NOT_FOUND; } @@ -128,7 +128,7 @@ RETURN_CODE IpcShmLocalFree(SHM *shm) return SHM_ERR; } shm->addr = NULL; - LOG(DEBUG) << "IPC free local shm=" << shm->name << " success."; + LOG(INFO) << "IPC free local shm=" << shm->name << " success."; return UBRING_OK; } @@ -148,7 +148,7 @@ RETURN_CODE IpcShmRemoteMalloc(SHM *shm) } close(fd); - LOG(DEBUG) << "IPC malloc remote shm=" << shm->name << " length=" << shm->len << " success."; + LOG(INFO) << "IPC malloc remote shm=" << shm->name << " length=" << shm->len << " success."; return UBRING_OK; } @@ -168,14 +168,14 @@ RETURN_CODE IpcShmLocalMmap(SHM *shm, int prot) } close(fd); - LOG(DEBUG) << "IPC mmap remote shm=" << shm->name << " length=" << shm->len << " success."; + LOG(INFO) << "IPC mmap remote shm=" << shm->name << " length=" << shm->len << " success."; return UBRING_OK; } RETURN_CODE IpcShmRemoteFree(SHM *shm) { if (shm->addr == NULL) { - LOG(DEBUG) << "IPC free remote shm=" << shm->name << " already freed."; + LOG(INFO) << "IPC free remote shm=" << shm->name << " already freed."; return UBRING_OK; } @@ -185,8 +185,8 @@ RETURN_CODE IpcShmRemoteFree(SHM *shm) return SHM_ERR; } - LOG(DEBUG) << "IPC free remote shm=" << shm->name << " success."; + LOG(INFO) << "IPC free remote shm=" << shm->name << " success."; return UBRING_OK; } } -} \ No newline at end of file +} diff --git a/src/brpc/ubring/shm/shm_mgr.cpp b/src/brpc/ubring/shm/shm_mgr.cpp index a295c3da2b..bef282b674 100644 --- a/src/brpc/ubring/shm/shm_mgr.cpp +++ b/src/brpc/ubring/shm/shm_mgr.cpp @@ -71,7 +71,7 @@ RETURN_CODE ShmMgrInit(void) return UBRING_ERR; } } - LOG(DEBUG) << "shm mgr init success, shm type=" << g_shmType; + LOG(INFO) << "shm mgr init success, shm type=" << g_shmType; return UBRING_OK; } @@ -83,7 +83,7 @@ void ShmMgrFini(void) return; } } - LOG(DEBUG) << "shm mgr fini success, shm type=" << g_shmType; + LOG(INFO) << "shm mgr fini success, shm type=" << g_shmType; } void SetShmType(SHM_TYPE type) diff --git a/src/brpc/ubring/shm/shm_ubs.cpp b/src/brpc/ubring/shm/shm_ubs.cpp index 64c43526e8..1f5b2b1b98 100644 --- a/src/brpc/ubring/shm/shm_ubs.cpp +++ b/src/brpc/ubring/shm/shm_ubs.cpp @@ -163,7 +163,7 @@ do { // 通过MXE获取memid shm->memid = 1; // 暂时打桩 - LOG(DEBUG) << "Ubs malloc local shm=" << shm->name << " length=" << shm->len << " memid=" << shm->memid << " success."; + LOG(INFO) << "Ubs malloc local shm=" << shm->name << " length=" << shm->len << " memid=" << shm->memid << " success."; return UBRING_OK; } @@ -186,7 +186,7 @@ RETURN_CODE UbsShmMunmap(SHM *shm) return SHM_ERR; } - LOG(DEBUG) << "Ubs unmap shm=" << shm->name << " length=" << shm->len << " success."; + LOG(INFO) << "Ubs unmap shm=" << shm->name << " length=" << shm->len << " success."; return UBRING_OK; } @@ -201,17 +201,17 @@ RETURN_CODE UbsShmFree(SHM *shm) int ret = ubsmem_shmem_deallocate(shm->name); if (ret != UBSM_OK) { if (ret == UBSM_ERR_IN_USING) { - LOG(DEBUG) << "Ubs free shm=" << shm->name << " failed, resource attached=" << ret; + LOG(INFO) << "Ubs free shm=" << shm->name << " failed, resource attached=" << ret; return SHM_ERR_RESOURCE_ATTACHED; } else if (ret == UBSM_ERR_NOT_FOUND) { - LOG(DEBUG) << "Ubs free shm=" << shm->name << " failed, resource not found=" << ret; + LOG(INFO) << "Ubs free shm=" << shm->name << " failed, resource not found=" << ret; return SHM_ERR_NOT_FOUND; } LOG(ERROR) << "Ubs free shm="<< shm->name << " failed, ret=" << ret; return SHM_ERR; } shm->addr = NULL; - LOG(DEBUG) << "Ubs free shm=" << shm->name << " length=" << shm->len << " success."; + LOG(INFO) << "Ubs free shm=" << shm->name << " length=" << shm->len << " success."; return UBRING_OK; } @@ -244,7 +244,7 @@ RETURN_CODE UbsShmLocalFree(SHM *shm) return SHM_ERR; } shm->addr = NULL; - LOG(DEBUG) << "Ubs free local shm=" << shm->name << " length=" << shm->len << " success."; + LOG(INFO) << "Ubs free local shm=" << shm->name << " length=" << shm->len << " success."; return UBRING_OK; } @@ -268,7 +268,7 @@ RETURN_CODE UbsShmLocalMmap(SHM *shm, int prot) return SHM_ERR; } - LOG(DEBUG) << "Ubs mmap remote shm=" << shm->name << " length=" << shm->len << " success."; + LOG(INFO) << "Ubs mmap remote shm=" << shm->name << " length=" << shm->len << " success."; return UBRING_OK; } @@ -291,7 +291,7 @@ RETURN_CODE UbsShmRemoteFree(SHM *shm) return SHM_ERR; } - LOG(DEBUG) << "Ubs free Remote shm=" << shm->name << " length=" << shm->len << " success."; + LOG(INFO) << "Ubs free Remote shm=" << shm->name << " length=" << shm->len << " success."; return UBRING_OK; } @@ -400,7 +400,7 @@ static void DeleteShmToList(ShmList* shmList) } else { shmList->tail = NULL; } - LOG(DEBUG) << "Delete shm to list, name=" << curNode->shm.name << " size=" << shmList->size; + LOG(INFO) << "Delete shm to list, name=" << curNode->shm.name << " size=" << shmList->size; FREE_PTR(curNode); shmList->size--; } @@ -429,7 +429,7 @@ void *UbsShmCallback(void* args) LOG(ERROR) << "Ubs unmap shm=" << shm.name << " length=" << shm.len << " failed, ret=" << ret; return NULL; } - LOG(DEBUG) << "Ubs unmap shm=" << shm.name << " length=" << shm.len << " success."; + LOG(INFO) << "Ubs unmap shm=" << shm.name << " length=" << shm.len << " success."; ret = ubsmem_shmem_deallocate(shm.name); if (ret != UBSM_OK) { @@ -438,7 +438,7 @@ void *UbsShmCallback(void* args) return NULL; } DeleteShmToList(shmList); - LOG(DEBUG) << "Ubs free local shm=" << shm.name << " length=" << shm.len << " success."; + LOG(INFO) << "Ubs free local shm=" << shm.name << " length=" << shm.len << " success."; } return NULL; @@ -554,7 +554,7 @@ RETURN_CODE AddShmToList(ShmList *shmList, SHM *shm) shmList->tail = newShmNode; } shmList->size++; - LOG(DEBUG) << "Add shm to list success, shm name=" << shm->name << " size=" << shmList->size; + LOG(INFO) << "Add shm to list success, shm name=" << shm->name << " size=" << shmList->size; return UBRING_OK; } } diff --git a/src/brpc/ubring/ub_ring.cpp b/src/brpc/ubring/ub_ring.cpp index 80a02b1b68..1945d91139 100644 --- a/src/brpc/ubring/ub_ring.cpp +++ b/src/brpc/ubring/ub_ring.cpp @@ -119,7 +119,6 @@ RETURN_CODE UBRing::UbrTrxClose() { LOG(WARNING) << "Trx close, unlink local shm failed, trx local name=" << _trx->localShm.name << ", rc=" << unlinkRc; } - LOG(DEBUG) << "The peer is closed, local name=" << _trx->localShm.name; return UBRING_OK; } @@ -172,7 +171,6 @@ void* UBRing::UbrTrxCloseCallback(void* args) { int fd = (int)trx->localShm.fd; do { if (ATOMIC_LOAD(trx->closeCnt) == 0) { - LOG(DEBUG) << "Trx close callback skipped, already closed, name=" << trx->localShm.name; break; } ATOMIC_SUB(trx->closeCnt, 1); @@ -297,7 +295,6 @@ RETURN_CODE UBRing::UbrAddAsynClearTimer(UbrTrx *trx) { } if (trx->clearTimerFd > 0) { - LOG(DEBUG) << "Trx close timer already added, name=" << trx->localShm.name; return UBRING_OK; } @@ -722,7 +719,6 @@ RETURN_CODE UBRing::UbrTrxMapLocalShm(SHM *localShm) _trx->ubrTx.localDataStatusQ.addr = localShm->addr + DATASTATUSQ_ADDR_OFFSET; _trx->ubrTx.localDataStatusQ.len = UBR_DATASTATUSQ_LEN; size_t addrAlignedOffset = Aligned64Offset(localShm->addr + DATAQ_ADDR_OFFSET); - LOG(DEBUG) << "UbrRx's localDataQ address will aligned with offset=" << addrAlignedOffset; _trx->ubrRx.localDataQ.addr = localShm->addr + DATAQ_ADDR_OFFSET + addrAlignedOffset; _trx->ubrRx.localDataQ.len = localShm->len - DATAQ_ADDR_OFFSET - addrAlignedOffset; return UBRING_OK; @@ -746,7 +742,6 @@ RETURN_CODE UBRing::UbrTrxMapRemoteShm(SHM *remoteShm) _trx->ubrRx.remoteDataStatusQ.addr = remoteShm->addr + DATASTATUSQ_ADDR_OFFSET; _trx->ubrRx.remoteDataStatusQ.len = UBR_DATASTATUSQ_LEN; size_t addrAlignedOffset = Aligned64Offset(remoteShm->addr + DATAQ_ADDR_OFFSET); - LOG(DEBUG) << "UbrTx's remoteDataQ will aligned with offset=" << addrAlignedOffset; _trx->ubrTx.remoteDataQ.addr = remoteShm->addr + DATAQ_ADDR_OFFSET + addrAlignedOffset; _trx->ubrTx.remoteDataQ.len = remoteShm->len - DATAQ_ADDR_OFFSET - addrAlignedOffset; return UBRING_OK; diff --git a/src/brpc/ubring/ub_ring_manager.cpp b/src/brpc/ubring/ub_ring_manager.cpp index bf4182abe1..794fa4c845 100644 --- a/src/brpc/ubring/ub_ring_manager.cpp +++ b/src/brpc/ubring/ub_ring_manager.cpp @@ -148,7 +148,7 @@ RETURN_CODE UBRingManager::ReleaseUbrTrxFromMgr(UbrTrx *trx) { LOCK_GUARD(g_ubrTrxMgrMtx); uint32_t idx = trx->trxMgrIndex; if (g_ubrMgr.trxMgrUnitStatus[idx] == UBR_MGR_UNIT_FREE) { - LOG(DEBUG) << "Release trx already freed, name=" << trx->localShm.name; + LOG(INFO) << "Release trx already freed, name=" << trx->localShm.name; return UBRING_OK; } @@ -242,7 +242,7 @@ int32_t UBRingManager::UbEventCallback(const char *shmName) LOG(ERROR) << "Ub event callback failed, trx mgr is null."; return UBRING_ERR; } - LOG(DEBUG) << "Ub event callback is processing. shm_name=" << shmName; + LOG(INFO) << "Ub event callback is processing. shm_name=" << shmName; for (uint32_t i = 0; i < g_ubrMgr.trxCap; ++i) { if (g_ubrMgr.trxMgrUnitStatus[i] == UBR_MGR_UNIT_FREE) { @@ -253,7 +253,7 @@ int32_t UBRingManager::UbEventCallback(const char *shmName) strcmp(g_ubrMgr.trxMgr[i].remoteShm.name, shmName) == 0) { // 故障链路为该trx的对端shm ++g_ubEventCnt; int fd = (int)g_ubrMgr.trxMgr[i].localShm.fd; - LOG(INFO) << "Ub event callback, the fd of the faulty link is " << fd; + LOG(WARNING) << "Ub event callback, the fd of the faulty link is " << fd; return UBRing::UbrPassiveClearTrx(&g_ubrMgr.trxMgr[i], fd, UBR_UB_EVENT); } } From 2a2383955a2257fc49ca1049a984b7be66fbb42d Mon Sep 17 00:00:00 2001 From: zchuango Date: Fri, 8 May 2026 06:46:06 +0000 Subject: [PATCH 67/84] fix the declare_shm_ubs define not found bug --- ...declare_shm_ubs.h.tmpl => declare_shm_ubs.h} | 17 +++++++++++++++++ src/brpc/ubring/shm/shm_ubs.cpp | 4 ++-- 2 files changed, 19 insertions(+), 2 deletions(-) rename src/brpc/ubring/rack_mem/{declare_shm_ubs.h.tmpl => declare_shm_ubs.h} (64%) diff --git a/src/brpc/ubring/rack_mem/declare_shm_ubs.h.tmpl b/src/brpc/ubring/rack_mem/declare_shm_ubs.h similarity index 64% rename from src/brpc/ubring/rack_mem/declare_shm_ubs.h.tmpl rename to src/brpc/ubring/rack_mem/declare_shm_ubs.h index 79dd2dbaf3..b09b2bf943 100644 --- a/src/brpc/ubring/rack_mem/declare_shm_ubs.h.tmpl +++ b/src/brpc/ubring/rack_mem/declare_shm_ubs.h @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + #ifndef UBRING_MK_UBSM #error Do not include this file unless you know what you are doing. #endif diff --git a/src/brpc/ubring/shm/shm_ubs.cpp b/src/brpc/ubring/shm/shm_ubs.cpp index 1f5b2b1b98..3c183b6087 100644 --- a/src/brpc/ubring/shm/shm_ubs.cpp +++ b/src/brpc/ubring/shm/shm_ubs.cpp @@ -37,7 +37,7 @@ namespace brpc { namespace ubring { #define UBRING_MK_UBSM(ret, fn, args) ret (*fn) args = NULL -#include "brpc/ubring/rack_mem/declare_shm_ubs.h.tmpl" +#include "brpc/ubring/rack_mem/declare_shm_ubs.h" #define SHM_RIGHT_MODE 0666 #define UBRING_REGION_NAME_PREFIX "UbrONE2ALLRegion" DEFINE_uint32(node_location, 1, "Location of the ub machine."); @@ -77,7 +77,7 @@ RETURN_CODE UbsShmInterfacesLoad(void) return UBRING_ERR; \ } \ } while (0) -#include "brpc/ubring/rack_mem/declare_shm_ubs.h.tmpl" +#include "brpc/ubring/rack_mem/declare_shm_ubs.h" dlclose(dlhandler); dlhandler = NULL; From b767d54d9168e1767b25cac08b139616267d919d Mon Sep 17 00:00:00 2001 From: zchuango Date: Fri, 8 May 2026 08:37:17 +0000 Subject: [PATCH 68/84] add the timer_mgr support for macos and format code style --- src/brpc/ubring/common/common.h | 27 +++-- src/brpc/ubring/shm/shm_ipc.cpp | 3 - src/brpc/ubring/timer/timer_mgr.cpp | 169 +++++++++++++++++++--------- src/brpc/ubring/timer/timer_mgr.h | 11 +- src/brpc/ubring/ub_helper.h | 41 ++++--- 5 files changed, 157 insertions(+), 94 deletions(-) diff --git a/src/brpc/ubring/common/common.h b/src/brpc/ubring/common/common.h index b2ab945b7f..a4f0ca0b3d 100644 --- a/src/brpc/ubring/common/common.h +++ b/src/brpc/ubring/common/common.h @@ -51,7 +51,8 @@ using AtomicUintFast8 = std::atomic; #define ATOMIC_LOAD(var) var.load() #define ATOMIC_ADD(var, value) var.fetch_add(value) #define ATOMIC_SUB(var, value) var.fetch_sub(value) -#define ATOMIC_COMPARE_EXCHANGE_STRONG(var, expected, desired) var.compare_exchange_strong((expected), (desired)) +#define ATOMIC_COMPARE_EXCHANGE_STRONG(var, expected, desired) \ + var.compare_exchange_strong((expected), (desired)) #else #include typedef atomic_int AtomicInt; @@ -66,6 +67,7 @@ typedef atomic_uint_fast8_t AtomicUintFast8; #define ATOMIC_COMPARE_EXCHANGE_STRONG(var, expected, desired) \ atomic_compare_exchange_strong(&(var), &(expected), (desired)) #endif + #define ISB() __asm__ __volatile__("isb" ::: "memory") #define DSB() __asm__ __volatile__("dsb sy" ::: "memory") @@ -75,11 +77,12 @@ typedef int errno_t; #ifndef EOK #define EOK 0 #endif + #define MAX_NODE_NUM 8 #define IPV4_FIRST_BYTE_OFFSET 24 #define COPY_ALIGNED_DATA_BYTES 64 -static inline int Copy64Byte(int8_t *dst, int8_t *src) -{ + +static inline int Copy64Byte(int8_t *dst, int8_t *src) { #ifdef LS64 asm volatile ( "mov x12, %0\n" @@ -110,8 +113,8 @@ static inline int Copy64Byte(int8_t *dst, int8_t *src) #define MSEC_TO_SEC 1000 #define MAX_IP_PORT_STR_LEN 23 #define DECIMAL_BASE 10 -static inline uint64_t GetCurNanoSeconds(void) -{ + +static inline uint64_t GetCurNanoSeconds(void) { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); uint64_t timestamp = (uint64_t)ts.tv_sec * SEC_TO_NSEC + (uint64_t)ts.tv_nsec; @@ -132,15 +135,12 @@ typedef enum { UBRING_RETRY = -2, UBRING_REENTRY = -3, UBRING_ERR_TIMEOUT = -4, - // SHM Module SHM_ERR = -100, SHM_ERR_INPUT_INVALID = -101, SHM_ERR_EXIST = -102, SHM_ERR_RESOURCE_ATTACHED = -103, SHM_ERR_NOT_FOUND = -104, SHM_ERR_UBSM_NET_ERR = -105, - - // MPA模块 MPA_UDP_ERR = -200, MPA_UDP_NO_TRX = -201, MPA_UDP_STATUS_NOT_JOINED = -202, @@ -152,20 +152,18 @@ typedef enum { MPA_UDP_STATUS_ALREADY_CONNECTED = -208, MPA_UDP_OLD_RDLIST = -209, MPA_UDP_RDLIST_FULL = -210, - // ubr模块 UBR_NOT_CONNECTED = -300, UBR_ERR_ADDR_IN_USE = -301, } RETURN_CODE; #define ALIGN_BYTES 0x40 #define CHECKED_ALIGN_BITS (ALIGN_BYTES - 1) -static inline size_t Aligned64Offset(uint8_t *addr) -{ + +static inline size_t Aligned64Offset(uint8_t *addr) { return ((ALIGN_BYTES - (((size_t)(addr)) & CHECKED_ALIGN_BITS)) & CHECKED_ALIGN_BITS); } -static inline RETURN_CODE HasTimedOut(const uint64_t startTime, const uint32_t timeout) -{ +static inline RETURN_CODE HasTimedOut(const uint64_t startTime, const uint32_t timeout) { uint64_t endTime = startTime + (uint64_t)timeout * SEC_TO_NSEC; if (GetCurNanoSeconds() > endTime) { LOG(ERROR) << "task time out " << timeout << " seconds."; @@ -173,4 +171,5 @@ static inline RETURN_CODE HasTimedOut(const uint64_t startTime, const uint32_t t } return UBRING_OK; } -#endif //BRPC_COMMON_H \ No newline at end of file + +#endif // BRPC_COMMON_H \ No newline at end of file diff --git a/src/brpc/ubring/shm/shm_ipc.cpp b/src/brpc/ubring/shm/shm_ipc.cpp index 280b8c57fa..878ca093ea 100644 --- a/src/brpc/ubring/shm/shm_ipc.cpp +++ b/src/brpc/ubring/shm/shm_ipc.cpp @@ -59,7 +59,6 @@ RETURN_CODE IpcShmLocalMalloc(SHM *shm) } close(fd); - LOG(INFO) << "IPC Create shm=" << shm->name << " length=" << shm->len << " success."; return UBRING_OK; } @@ -97,7 +96,6 @@ RETURN_CODE IpcShmFree(SHM *shm) LOG_EVERY_SECOND(ERROR) << "IPC free shm=" << shm->name << " failed, errno=" << errno; return SHM_ERR; } - LOG(INFO) << "IPC free shm=" << shm->name << " success."; return UBRING_OK; } @@ -148,7 +146,6 @@ RETURN_CODE IpcShmRemoteMalloc(SHM *shm) } close(fd); - LOG(INFO) << "IPC malloc remote shm=" << shm->name << " length=" << shm->len << " success."; return UBRING_OK; } diff --git a/src/brpc/ubring/timer/timer_mgr.cpp b/src/brpc/ubring/timer/timer_mgr.cpp index 948cca1cfe..8995332af5 100644 --- a/src/brpc/ubring/timer/timer_mgr.cpp +++ b/src/brpc/ubring/timer/timer_mgr.cpp @@ -28,6 +28,7 @@ namespace brpc { namespace ubring { + int32_t g_epollFd = -1; std::atomic g_totalTimerNum; TimerFdCtx *g_timerFdCtxMap = NULL; @@ -35,8 +36,14 @@ uint32_t maxSystemFd; static pthread_t g_epollExecuteThread; static int32_t g_timerModuleInitialized; -static RETURN_CODE DeleteTimerInner(uint32_t fd) -{ +#if defined(OS_MACOSX) +static int timerfd_create_macosx(int clockid, int flags); +static int timerfd_settime_macosx(int fd, int flags, + const struct itimerspec *new_value, + struct itimerspec *old_value); +#endif + +static RETURN_CODE DeleteTimerInner(uint32_t fd) { if (g_timerFdCtxMap == NULL) { return UBRING_OK; } @@ -58,8 +65,13 @@ static RETURN_CODE DeleteTimerInner(uint32_t fd) pthread_spin_unlock(&g_timerFdCtxMap[fd].spinLock); - // I/O outside lock +#if defined(OS_LINUX) epoll_ctl(g_epollFd, EPOLL_CTL_DEL, (int)fd, NULL); +#elif defined(OS_MACOSX) + struct kevent evt; + EV_SET(&evt, fd, EVFILT_TIMER, EV_DELETE, 0, 0, NULL); + kevent(g_epollFd, &evt, 1, NULL, 0, NULL); +#endif uint64_t exp = 0; read((int)fd, &exp, sizeof(exp)); @@ -69,11 +81,14 @@ static RETURN_CODE DeleteTimerInner(uint32_t fd) return UBRING_OK; } -static RETURN_CODE StartTimeEpoll(void) -{ +static RETURN_CODE StartTimeEpoll(void) { +#if defined(OS_LINUX) g_epollFd = epoll_create1(0); +#elif defined(OS_MACOSX) + g_epollFd = kqueue(); +#endif if (UNLIKELY(g_epollFd == -1)) { - LOG(ERROR) << "Failed to create epoll. errno=" << errno; + LOG(ERROR) << "Failed to create epoll/kqueue. errno=" << errno; return UBRING_ERR; } @@ -85,15 +100,15 @@ static RETURN_CODE StartTimeEpoll(void) return UBRING_OK; } -static RETURN_CODE TimerSpinLocksInit(void) -{ +static RETURN_CODE TimerSpinLocksInit(void) { if (g_timerFdCtxMap == NULL) { LOG(ERROR) << "Timer module is not fully initialized."; return UBRING_ERR; } for (uint32_t fd = 0; fd < maxSystemFd; fd++) { - int ret = pthread_spin_init(&g_timerFdCtxMap[fd].spinLock, PTHREAD_PROCESS_PRIVATE); + int ret = pthread_spin_init(&g_timerFdCtxMap[fd].spinLock, + PTHREAD_PROCESS_PRIVATE); if (ret != EOK) { LOG(ERROR) << "Failed to initialize spin lock for fd=" << fd; for (uint32_t cleanupFd = 0; cleanupFd < fd; cleanupFd++) { @@ -105,20 +120,13 @@ static RETURN_CODE TimerSpinLocksInit(void) return UBRING_OK; } -// Execute callback directly in the epoll thread. -// Previously this spawned a new pthread per timer firing, which caused EAGAIN -// under high load. Since callbacks are lightweight (just setting flags or -// scheduling bthreads), running them inline is safe and avoids thread exhaustion. -static RETURN_CODE ExecuteCallback(int32_t timerFd) -{ +static RETURN_CODE ExecuteCallback(int32_t timerFd) { UnifiedCallback((void *)(&g_timerFdCtxMap[timerFd])); return UBRING_OK; } -static RETURN_CODE TimerCtxMapCompletion(void) -{ - memset(g_timerFdCtxMap, 0, - sizeof(TimerFdCtx) * maxSystemFd); +static RETURN_CODE TimerCtxMapCompletion(void) { + memset(g_timerFdCtxMap, 0, sizeof(TimerFdCtx) * maxSystemFd); RETURN_CODE ret = TimerSpinLocksInit(); if (ret != UBRING_OK) { @@ -128,8 +136,7 @@ static RETURN_CODE TimerCtxMapCompletion(void) return UBRING_OK; } -RETURN_CODE TimerInit(void) -{ +RETURN_CODE TimerInit(void) { if (g_timerModuleInitialized > 0) { return UBRING_OK; } @@ -171,8 +178,7 @@ RETURN_CODE TimerInit(void) return UBRING_OK; } -void *UnifiedCallback(void *args) -{ +void *UnifiedCallback(void *args) { TimerFdCtx *ctx = (TimerFdCtx *)args; if (pthread_spin_lock(&ctx->spinLock) != 0) { return NULL; @@ -183,7 +189,6 @@ void *UnifiedCallback(void *args) return NULL; } - // Snapshot callback info under lock, then release before executing void *(*cb)(void *) = ctx->cb; void *cbArgs = ctx->args; uint32_t fd = ctx->fd; @@ -192,7 +197,6 @@ void *UnifiedCallback(void *args) pthread_spin_unlock(&ctx->spinLock); - // Execute callback OUTSIDE the spinlock cb(cbArgs); if (!isPeriodical) { @@ -201,36 +205,52 @@ void *UnifiedCallback(void *args) return NULL; } -void *TimerEpoll(void *args) -{ +void *TimerEpoll(void *args) { UNREFERENCE_PARAM(args); +#if defined(OS_LINUX) struct epoll_event readyEvents[MAX_TIMER]; +#elif defined(OS_MACOSX) + struct kevent readyEvents[MAX_TIMER]; +#endif + while (1) { if (g_timerModuleInitialized <= 0) { LOG(ERROR) << "The Timer module is not initialized."; break; } - - int32_t readyNum = epoll_wait(g_epollFd, readyEvents, MAX_TIMER, TIMER_EPOLL_WAIT_TIMEOUT); + +#if defined(OS_LINUX) + int32_t readyNum = epoll_wait(g_epollFd, readyEvents, MAX_TIMER, + TIMER_EPOLL_WAIT_TIMEOUT); +#elif defined(OS_MACOSX) + struct timespec timeout = {0, TIMER_EPOLL_WAIT_TIMEOUT * 1000000}; + int32_t readyNum = kevent(g_epollFd, NULL, 0, readyEvents, MAX_TIMER, &timeout); +#endif + if (UNLIKELY(readyNum == -1)) { error_t err = errno; if (err == EINTR) { - LOG_EVERY_SECOND(WARNING) << "Epoll wait was interrupted. errno=" << err; + LOG_EVERY_SECOND(WARNING) << "Epoll/Kqueue wait was interrupted. errno=" << err; continue; } else if (err == EBADF) { LOG(WARNING) << "The Timer module is destroyed."; break; } - LOG(ERROR) << "Epoll wait internal error. errno=" << err; + LOG(ERROR) << "Epoll/Kqueue wait internal error. errno=" << err; break; } for (int32_t i = 0; i < readyNum; i++) { +#if defined(OS_LINUX) struct epoll_event *event = &readyEvents[i]; int32_t timerFd = event->data.fd; +#elif defined(OS_MACOSX) + struct kevent *event = &readyEvents[i]; + int32_t timerFd = event->ident; +#endif + uint64_t exp = 0; if (read(timerFd, &exp, sizeof(exp)) < 0) { - // EBADF means the fd was already closed by DeleteTimerSafe, skip silently if (errno != EBADF) { LOG(ERROR) << "Failed to read timerfd=" << timerFd << " errno=" << errno; } @@ -251,8 +271,7 @@ void *TimerEpoll(void *args) return NULL; } -void DeleteTimerSafe(uint32_t fd) -{ +void DeleteTimerSafe(uint32_t fd) { if (g_timerFdCtxMap == NULL) { return; } @@ -266,7 +285,6 @@ void DeleteTimerSafe(uint32_t fd) return; } - // Mark as not-using under lock so no new callbacks get dispatched g_timerFdCtxMap[fd].status = TIMER_CONTEXT_NOT_USING; g_timerFdCtxMap[fd].cb = NULL; g_timerFdCtxMap[fd].args = NULL; @@ -275,18 +293,22 @@ void DeleteTimerSafe(uint32_t fd) pthread_spin_unlock(&g_timerFdCtxMap[fd].spinLock); - // I/O operations outside the spin lock to avoid blocking other threads +#if defined(OS_LINUX) epoll_ctl(g_epollFd, EPOLL_CTL_DEL, (int)fd, NULL); +#elif defined(OS_MACOSX) + struct kevent evt; + EV_SET(&evt, fd, EVFILT_TIMER, EV_DELETE, 0, 0, NULL); + kevent(g_epollFd, &evt, 1, NULL, 0, NULL); +#endif - // Drain any pending data so the epoll thread won't read a closed fd uint64_t exp = 0; read((int)fd, &exp, sizeof(exp)); close((int)fd); atomic_fetch_sub(&g_totalTimerNum, 1); } -void DeleteTimer(uint32_t fd) -{ + +void DeleteTimer(uint32_t fd) { if (g_timerFdCtxMap == NULL) { LOG(WARNING) << "The timer is not initialized."; return; @@ -295,14 +317,18 @@ void DeleteTimer(uint32_t fd) g_timerFdCtxMap[fd].periodical = 0; } -int32_t TimerStart(const struct itimerspec *time, void *(*cb)(void *), void *args) -{ +int32_t TimerStart(const struct itimerspec *time, void *(*cb)(void *), void *args) { if (g_epollFd == -1) { - LOG(ERROR) << "Timer epoll encountered internal error."; + LOG(ERROR) << "Timer epoll/kqueue encountered internal error."; return -1; } +#if defined(OS_LINUX) int timerFd = timerfd_create(CLOCK_MONOTONIC, 0); +#elif defined(OS_MACOSX) + int timerFd = timerfd_create_macosx(CLOCK_MONOTONIC, 0); +#endif + if (UNLIKELY(timerFd >= (int)maxSystemFd || timerFd == -1)) { LOG(ERROR) << "Failed to create timerfd=" << timerFd << " errno=" << errno; return -1; @@ -312,28 +338,49 @@ int32_t TimerStart(const struct itimerspec *time, void *(*cb)(void *), void *arg g_timerFdCtxMap[timerFd].cb = cb; g_timerFdCtxMap[timerFd].args = args; g_timerFdCtxMap[timerFd].fd = (uint32_t)timerFd; - + if (LIKELY(time->it_interval.tv_sec > 0 || time->it_interval.tv_nsec > 0)) { g_timerFdCtxMap[timerFd].periodical = 1; } +#if defined(OS_LINUX) struct epoll_event event = { .events = EPOLLIN, .data = {.fd = timerFd} }; int32_t ret = epoll_ctl(g_epollFd, EPOLL_CTL_ADD, timerFd, &event); +#elif defined(OS_MACOSX) + struct kevent event; + uint64_t timeout_nsec = time->it_value.tv_sec * 1000000000ULL + time->it_value.tv_nsec; + uint64_t interval_nsec = time->it_interval.tv_sec * 1000000000ULL + time->it_interval.tv_nsec; + EV_SET(&event, timerFd, EVFILT_TIMER, EV_ADD | EV_ENABLE, 0, + timeout_nsec / 1000000, NULL); + int32_t ret = kevent(g_epollFd, &event, 1, NULL, 0, NULL); +#endif + if (UNLIKELY(ret != 0)) { CloseTimerFd((uint32_t)timerFd); - LOG(ERROR) << "Failed to add event to epoll. errno=" << errno; + LOG(ERROR) << "Failed to add event to epoll/kqueue. errno=" << errno; return -1; } atomic_fetch_add(&g_totalTimerNum, 1); +#if defined(OS_LINUX) ret = timerfd_settime(timerFd, 0, time, NULL); +#elif defined(OS_MACOSX) + ret = timerfd_settime_macosx(timerFd, 0, time, NULL); +#endif + if (UNLIKELY(ret != 0)) { +#if defined(OS_LINUX) if (epoll_ctl(g_epollFd, EPOLL_CTL_DEL, timerFd, NULL) != 0) { +#elif defined(OS_MACOSX) + struct kevent evt; + EV_SET(&evt, timerFd, EVFILT_TIMER, EV_DELETE, 0, 0, NULL); + if (kevent(g_epollFd, &evt, 1, NULL, 0, NULL) != 0) { +#endif LOG(ERROR) << "Failed to delete the timer fd=" << timerFd << " with errno=" << errno; } CloseTimerFd((uint32_t)timerFd); @@ -345,13 +392,11 @@ int32_t TimerStart(const struct itimerspec *time, void *(*cb)(void *), void *arg return timerFd; } -uint32_t GetActiveTimerNum(void) -{ +uint32_t GetActiveTimerNum(void) { return atomic_load(&g_totalTimerNum); } -void CloseTimerFd(uint32_t fd) -{ +void CloseTimerFd(uint32_t fd) { g_timerFdCtxMap[fd].cb = NULL; g_timerFdCtxMap[fd].args = NULL; g_timerFdCtxMap[fd].status = TIMER_CONTEXT_NOT_USING; @@ -363,8 +408,7 @@ void CloseTimerFd(uint32_t fd) } } -void TimerModuleDestroy(void) -{ +void TimerModuleDestroy(void) { uint32_t maxFd = maxSystemFd; if (g_timerFdCtxMap) { for (uint32_t fd = 0; fd < maxFd; fd++) { @@ -384,8 +428,7 @@ void TimerModuleDestroy(void) } } -RETURN_CODE TimerFdCtxValidate(uint32_t fd) -{ +RETURN_CODE TimerFdCtxValidate(uint32_t fd) { if (fd >= maxSystemFd) { LOG(ERROR) << "TimerFd=" << fd << " is out of range=" << maxSystemFd; return UBRING_ERR; @@ -401,5 +444,25 @@ RETURN_CODE TimerFdCtxValidate(uint32_t fd) return UBRING_OK; } + +#if defined(OS_MACOSX) +static int timerfd_create_macosx(int clockid, int flags) { + int pipefd[2]; + if (pipe(pipefd) == -1) { + return -1; + } + return pipefd[0]; } -} \ No newline at end of file + +static int timerfd_settime_macosx(int fd, int flags, + const struct itimerspec *new_value, + struct itimerspec *old_value) { + if (old_value != NULL) { + memset(old_value, 0, sizeof(struct itimerspec)); + } + return 0; +} +#endif + +} // namespace ubring +} // namespace brpc \ No newline at end of file diff --git a/src/brpc/ubring/timer/timer_mgr.h b/src/brpc/ubring/timer/timer_mgr.h index 0ef1532715..ad62b23989 100644 --- a/src/brpc/ubring/timer/timer_mgr.h +++ b/src/brpc/ubring/timer/timer_mgr.h @@ -17,11 +17,18 @@ #ifndef BRPC_TIMER_MGR_H #define BRPC_TIMER_MGR_H -#include -#include #include #include "brpc/ubring/common/common.h" +#if defined(OS_LINUX) +#include +#include +#elif defined(OS_MACOSX) +#include +#include +#include +#endif + #define MAX_TIMER 1024 #define TIMER_EPOLL_WAIT_TIMEOUT 1000 diff --git a/src/brpc/ubring/ub_helper.h b/src/brpc/ubring/ub_helper.h index 4de3ead291..6ad9ebe3eb 100644 --- a/src/brpc/ubring/ub_helper.h +++ b/src/brpc/ubring/ub_helper.h @@ -24,42 +24,39 @@ #include #include "bthread/types.h" - namespace brpc { - namespace ubring { +namespace ubring { - void GlobalRelease(); +void GlobalRelease(); - void GlobalUBInitializeOrDie(); +void GlobalUBInitializeOrDie(); - bool InitPollingModeWithTag(bthread_tag_t tag, - std::function callback = nullptr, - std::function init_fn = nullptr, - std::function release_fn = nullptr); +bool InitPollingModeWithTag(bthread_tag_t tag, + std::function callback = nullptr, + std::function init_fn = nullptr, + std::function release_fn = nullptr); - // If the UB environment is available - bool IsUBAvailable(); +bool IsUBAvailable(); - // Disable UB in the remaining lifetime of the process - void GlobalDisableUb(); +void GlobalDisableUb(); - // If the given protocol supported by UB - bool SupportedByUB(std::string protocol); +bool SupportedByUB(std::string protocol); - } // namespace ubring +} // namespace ubring } // namespace brpc + #else + namespace brpc { - namespace ubring { +namespace ubring { - void GlobalRelease(); +void GlobalRelease(); - // Initialize UB environment - // Exit if failed - void GlobalUBInitializeOrDie(); +void GlobalUBInitializeOrDie(); - } // namespace ubring +} // namespace ubring } // namespace brpc + #endif // if BRPC_WITH_UBRING -#endif //BRPC_UB_HELPER_H +#endif // BRPC_UB_HELPER_H \ No newline at end of file From f5f2dfba52b615d3e94a50cf6b7c09c4ae66ef2e Mon Sep 17 00:00:00 2001 From: zchuango Date: Fri, 8 May 2026 11:16:06 +0000 Subject: [PATCH 69/84] fix the bug for macos epoll --- src/brpc/ubring/common/common.h | 4 +++ src/brpc/ubring/shm/shm_def.h | 42 +++++++++++++------------- src/brpc/ubring/shm/shm_ipc.h | 19 ++++++------ src/brpc/ubring/shm/shm_mgr.cpp | 46 +++++++++++------------------ src/brpc/ubring/shm/shm_ubs.cpp | 6 +++- src/brpc/ubring/timer/timer_mgr.cpp | 2 +- src/brpc/ubring/timer/timer_mgr.h | 1 + 7 files changed, 58 insertions(+), 62 deletions(-) diff --git a/src/brpc/ubring/common/common.h b/src/brpc/ubring/common/common.h index a4f0ca0b3d..4390726954 100644 --- a/src/brpc/ubring/common/common.h +++ b/src/brpc/ubring/common/common.h @@ -82,6 +82,10 @@ typedef int errno_t; #define IPV4_FIRST_BYTE_OFFSET 24 #define COPY_ALIGNED_DATA_BYTES 64 +#if defined(OS_MACOSX) +#define EPOLLET 0x80000000 +#endif + static inline int Copy64Byte(int8_t *dst, int8_t *src) { #ifdef LS64 asm volatile ( diff --git a/src/brpc/ubring/shm/shm_def.h b/src/brpc/ubring/shm/shm_def.h index 230943877e..0c28084b96 100644 --- a/src/brpc/ubring/shm/shm_def.h +++ b/src/brpc/ubring/shm/shm_def.h @@ -35,29 +35,29 @@ #define SHM_ALLOC_UNIT_SIZE (4 * 1024 * 1024) // 4MB namespace brpc { - namespace ubring { - typedef enum { SHM_TYPE_UB, SHM_TYPE_IPC, SHM_TYPE_UBS, SHM_TYPE_UNSUPPORT } SHM_TYPE; +namespace ubring { +typedef enum { SHM_TYPE_UB, SHM_TYPE_IPC, SHM_TYPE_UBS, SHM_TYPE_UNSUPPORT } SHM_TYPE; - typedef struct { - uint8_t *addr; - size_t len; - uint64_t memid; - char name[SHM_MAX_NAME_BUFF_LEN]; - uint32_t fd; - } SHM; +typedef struct { + uint8_t *addr; + size_t len; + uint64_t memid; + char name[SHM_MAX_NAME_BUFF_LEN]; + uint32_t fd; +} SHM; - typedef struct ShmListNode { - SHM shm; - struct ShmListNode *next; - struct ShmListNode *prev; - } ShmListNode; +typedef struct ShmListNode { + SHM shm; + struct ShmListNode *next; + struct ShmListNode *prev; +} ShmListNode; - typedef struct { - ShmListNode* head; - ShmListNode* tail; - size_t size; - pthread_mutex_t shmLock; - } ShmList; - } +typedef struct { + ShmListNode* head; + ShmListNode* tail; + size_t size; + pthread_mutex_t shmLock; +} ShmList; +} } #endif //BRPC_SHM_DEF_H \ No newline at end of file diff --git a/src/brpc/ubring/shm/shm_ipc.h b/src/brpc/ubring/shm/shm_ipc.h index 29d88307fe..34e8307bb8 100644 --- a/src/brpc/ubring/shm/shm_ipc.h +++ b/src/brpc/ubring/shm/shm_ipc.h @@ -18,21 +18,20 @@ #ifndef BRPC_SHM_IPC_H #define BRPC_SHM_IPC_H - #include "shm_def.h" #define SHM_IPC_MODE 0666 namespace brpc { - namespace ubring { - RETURN_CODE IpcShmLocalMalloc(SHM *shm); - RETURN_CODE IpcShmMunmap(SHM *shm); - RETURN_CODE IpcShmFree(SHM *shm); - RETURN_CODE IpcShmLocalFree(SHM *shm); - RETURN_CODE IpcShmRemoteMalloc(SHM *shm); - RETURN_CODE IpcShmRemoteFree(SHM *shm); - RETURN_CODE IpcShmLocalMmap(SHM *shm, int prot); - } +namespace ubring { + RETURN_CODE IpcShmLocalMalloc(SHM *shm); + RETURN_CODE IpcShmMunmap(SHM *shm); + RETURN_CODE IpcShmFree(SHM *shm); + RETURN_CODE IpcShmLocalFree(SHM *shm); + RETURN_CODE IpcShmRemoteMalloc(SHM *shm); + RETURN_CODE IpcShmRemoteFree(SHM *shm); + RETURN_CODE IpcShmLocalMmap(SHM *shm, int prot); +} } #endif //BRPC_SHM_IPC_H \ No newline at end of file diff --git a/src/brpc/ubring/shm/shm_mgr.cpp b/src/brpc/ubring/shm/shm_mgr.cpp index bef282b674..74e722d344 100644 --- a/src/brpc/ubring/shm/shm_mgr.cpp +++ b/src/brpc/ubring/shm/shm_mgr.cpp @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. - #include #include #include @@ -31,8 +30,7 @@ namespace ubring { DEFINE_int32(ub_shm_type, 1, "shm type: 1-ipc; 2-ub_ring"); static SHM_TYPE g_shmType; -static bool CheckInputShmParam(SHM *shm) -{ +static bool CheckInputShmParam(SHM *shm) { if (shm == NULL) { LOG(ERROR) << "Input Param shm is NULL."; return false; @@ -40,7 +38,8 @@ static bool CheckInputShmParam(SHM *shm) size_t nameLen = strlen(shm->name); if (nameLen <= 0 || nameLen > SHM_MAX_NAME_LEN) { - LOG(ERROR) << "Shm name=" << shm->name << ", length=" << shm->len << ", which is not between 1 and " << SHM_MAX_NAME_LEN; + LOG(ERROR) << "Shm name=" << shm->name << ", length=" << shm->len + << ", which is not between 1 and " << SHM_MAX_NAME_LEN; return false; } @@ -57,8 +56,7 @@ static bool CheckInputShmParam(SHM *shm) return true; } -RETURN_CODE ShmMgrInit(void) -{ +RETURN_CODE ShmMgrInit(void) { if (UNLIKELY(FLAGS_ub_shm_type >= (uint32_t)SHM_TYPE_UNSUPPORT)) { LOG(ERROR) << "Shm type config=" << FLAGS_ub_shm_type << " is not supported."; return UBRING_ERR; @@ -75,8 +73,7 @@ RETURN_CODE ShmMgrInit(void) return UBRING_OK; } -void ShmMgrFini(void) -{ +void ShmMgrFini(void) { if (g_shmType == SHM_TYPE_UBS) { if (UbsShmFini() != UBRING_OK) { LOG(ERROR) << "Fini beiming ubs shm failed."; @@ -86,13 +83,11 @@ void ShmMgrFini(void) LOG(INFO) << "shm mgr fini success, shm type=" << g_shmType; } -void SetShmType(SHM_TYPE type) -{ +void SetShmType(SHM_TYPE type) { g_shmType = type; } -RETURN_CODE ShmLocalMalloc(SHM *shm) -{ +RETURN_CODE ShmLocalMalloc(SHM *shm) { if (UNLIKELY(!CheckInputShmParam(shm))) { LOG(ERROR) << "Input param shm is invalid."; return SHM_ERR_INPUT_INVALID; @@ -113,8 +108,7 @@ RETURN_CODE ShmLocalMalloc(SHM *shm) return rc; } -RETURN_CODE ShmLocalCalloc(SHM *shm) -{ +RETURN_CODE ShmLocalCalloc(SHM *shm) { RETURN_CODE rc = ShmLocalMalloc(shm); if (UNLIKELY(rc != UBRING_OK)) { LOG(ERROR) << "Failed to alloc local shm."; @@ -124,8 +118,7 @@ RETURN_CODE ShmLocalCalloc(SHM *shm) return UBRING_OK; } -RETURN_CODE ShmLocalFree(SHM *shm) -{ +RETURN_CODE ShmLocalFree(SHM *shm) { if (UNLIKELY(!CheckInputShmParam(shm))) { LOG(ERROR) << "Input param shm is invalid."; return SHM_ERR_INPUT_INVALID; @@ -146,8 +139,7 @@ RETURN_CODE ShmLocalFree(SHM *shm) return rc; } -RETURN_CODE ShmRemoteMalloc(SHM *shm) -{ +RETURN_CODE ShmRemoteMalloc(SHM *shm) { if (UNLIKELY(!CheckInputShmParam(shm))) { LOG(ERROR) << "Input param shm is invalid."; return SHM_ERR_INPUT_INVALID; @@ -168,8 +160,7 @@ RETURN_CODE ShmRemoteMalloc(SHM *shm) return rc; } -RETURN_CODE ShmRemoteFree(SHM *shm) -{ +RETURN_CODE ShmRemoteFree(SHM *shm) { if (UNLIKELY(!CheckInputShmParam(shm))) { LOG(ERROR) << "Input param shm is invalid."; return SHM_ERR_INPUT_INVALID; @@ -190,8 +181,7 @@ RETURN_CODE ShmRemoteFree(SHM *shm) return rc; } -RETURN_CODE ShmLocalMmap(SHM *shm, int prot) -{ +RETURN_CODE ShmLocalMmap(SHM *shm, int prot) { if (UNLIKELY(!CheckInputShmParam(shm))) { LOG(ERROR) << "Input param shm is invalid."; return SHM_ERR_INPUT_INVALID; @@ -212,13 +202,12 @@ RETURN_CODE ShmLocalMmap(SHM *shm, int prot) return rc; } -RETURN_CODE ShmMunmap(SHM *shm) -{ +RETURN_CODE ShmMunmap(SHM *shm) { if (UNLIKELY(!CheckInputShmParam(shm))) { LOG(ERROR) << "Input param shm is invalid."; return SHM_ERR_INPUT_INVALID; } - + RETURN_CODE rc = UBRING_OK; switch (g_shmType) { case SHM_TYPE_IPC: @@ -233,14 +222,13 @@ RETURN_CODE ShmMunmap(SHM *shm) } return rc; } - -RETURN_CODE ShmFree(SHM *shm) -{ + +RETURN_CODE ShmFree(SHM *shm) { if (UNLIKELY(!CheckInputShmParam(shm))) { LOG(ERROR) << "Input param shm is invalid."; return SHM_ERR_INPUT_INVALID; } - + RETURN_CODE rc = UBRING_OK; switch (g_shmType) { case SHM_TYPE_IPC: diff --git a/src/brpc/ubring/shm/shm_ubs.cpp b/src/brpc/ubring/shm/shm_ubs.cpp index 3c183b6087..0ef725686d 100644 --- a/src/brpc/ubring/shm/shm_ubs.cpp +++ b/src/brpc/ubring/shm/shm_ubs.cpp @@ -55,9 +55,13 @@ RETURN_CODE UbsShmInterfacesLoad(void) { #ifndef UT const char *ubsmSdkLocation = "/usr/local/ubs_mem/lib/libubsm_sdk.so"; +#if defined(OS_LINUX) void* dlhandler = dlmopen(LM_ID_NEWLM, ubsmSdkLocation, RTLD_NOW | RTLD_LOCAL | RTLD_NODELETE | RTLD_DEEPBIND); +#elif defined(OS_MACOSX) + void* dlhandler = dlopen(ubsmSdkLocation, RTLD_NOW | RTLD_LOCAL | RTLD_NODELETE); +#endif if (dlhandler == NULL) { - LOG(ERROR) << "Dlmopen libubsm_sdk.so in " << ubsmSdkLocation << " failed, error:" << dlerror(); + LOG(ERROR) << "Dlopen libubsm_sdk.so in " << ubsmSdkLocation << " failed, error:" << dlerror(); return UBRING_ERR; } diff --git a/src/brpc/ubring/timer/timer_mgr.cpp b/src/brpc/ubring/timer/timer_mgr.cpp index 8995332af5..d62b321e60 100644 --- a/src/brpc/ubring/timer/timer_mgr.cpp +++ b/src/brpc/ubring/timer/timer_mgr.cpp @@ -228,7 +228,7 @@ void *TimerEpoll(void *args) { #endif if (UNLIKELY(readyNum == -1)) { - error_t err = errno; + errno_t err = errno; if (err == EINTR) { LOG_EVERY_SECOND(WARNING) << "Epoll/Kqueue wait was interrupted. errno=" << err; continue; diff --git a/src/brpc/ubring/timer/timer_mgr.h b/src/brpc/ubring/timer/timer_mgr.h index ad62b23989..a925c0bf58 100644 --- a/src/brpc/ubring/timer/timer_mgr.h +++ b/src/brpc/ubring/timer/timer_mgr.h @@ -18,6 +18,7 @@ #ifndef BRPC_TIMER_MGR_H #define BRPC_TIMER_MGR_H #include +#include #include "brpc/ubring/common/common.h" #if defined(OS_LINUX) From f63342be953447f1b976588abdf06ab8b7fdeec5 Mon Sep 17 00:00:00 2001 From: zchuango Date: Fri, 8 May 2026 11:44:25 +0000 Subject: [PATCH 70/84] fix the timespece bug --- src/brpc/ubring/shm/shm_ubs.cpp | 2 +- src/brpc/ubring/timer/timer_mgr.cpp | 12 ++++++------ src/brpc/ubring/timer/timer_mgr.h | 2 +- src/brpc/ubring/ub_ring.cpp | 6 +++--- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/brpc/ubring/shm/shm_ubs.cpp b/src/brpc/ubring/shm/shm_ubs.cpp index 0ef725686d..096b1a8678 100644 --- a/src/brpc/ubring/shm/shm_ubs.cpp +++ b/src/brpc/ubring/shm/shm_ubs.cpp @@ -451,7 +451,7 @@ void *UbsShmCallback(void* args) RETURN_CODE UbsShmAddTimer(ShmList *shmList) { uint32_t timerInterval = FLAGS_ub_flying_io_timeout; - struct itimerspec timeSpec = { + ::itimerspec timeSpec = { .it_interval = {.tv_sec = timerInterval, .tv_nsec = 0}, .it_value = {.tv_sec = 0, .tv_nsec = 1} }; diff --git a/src/brpc/ubring/timer/timer_mgr.cpp b/src/brpc/ubring/timer/timer_mgr.cpp index d62b321e60..5fb4f84f95 100644 --- a/src/brpc/ubring/timer/timer_mgr.cpp +++ b/src/brpc/ubring/timer/timer_mgr.cpp @@ -39,8 +39,8 @@ static int32_t g_timerModuleInitialized; #if defined(OS_MACOSX) static int timerfd_create_macosx(int clockid, int flags); static int timerfd_settime_macosx(int fd, int flags, - const struct itimerspec *new_value, - struct itimerspec *old_value); + const ::itimerspec *new_value, + ::itimerspec *old_value); #endif static RETURN_CODE DeleteTimerInner(uint32_t fd) { @@ -317,7 +317,7 @@ void DeleteTimer(uint32_t fd) { g_timerFdCtxMap[fd].periodical = 0; } -int32_t TimerStart(const struct itimerspec *time, void *(*cb)(void *), void *args) { +int32_t TimerStart(const ::itimerspec *time, void *(*cb)(void *), void *args) { if (g_epollFd == -1) { LOG(ERROR) << "Timer epoll/kqueue encountered internal error."; return -1; @@ -455,10 +455,10 @@ static int timerfd_create_macosx(int clockid, int flags) { } static int timerfd_settime_macosx(int fd, int flags, - const struct itimerspec *new_value, - struct itimerspec *old_value) { + const ::itimerspec *new_value, + ::itimerspec *old_value) { if (old_value != NULL) { - memset(old_value, 0, sizeof(struct itimerspec)); + memset(old_value, 0, sizeof(::itimerspec)); } return 0; } diff --git a/src/brpc/ubring/timer/timer_mgr.h b/src/brpc/ubring/timer/timer_mgr.h index a925c0bf58..a0169b3003 100644 --- a/src/brpc/ubring/timer/timer_mgr.h +++ b/src/brpc/ubring/timer/timer_mgr.h @@ -54,7 +54,7 @@ RETURN_CODE TimerInit(void); void TimerModuleDestroy(void); void *UnifiedCallback(void *args); void *TimerEpoll(void *args); -int32_t TimerStart(const struct itimerspec *time, void *(*cb)(void *), void *args); +int32_t TimerStart(const ::itimerspec *time, void *(*cb)(void *), void *args); uint32_t GetActiveTimerNum(void); void CloseTimerFd(uint32_t fd); diff --git a/src/brpc/ubring/ub_ring.cpp b/src/brpc/ubring/ub_ring.cpp index 1945d91139..f7a6bb25f4 100644 --- a/src/brpc/ubring/ub_ring.cpp +++ b/src/brpc/ubring/ub_ring.cpp @@ -129,7 +129,7 @@ RETURN_CODE UBRing::UbrAddCloseTimer() { } uint32_t eventQTimerInterval = FLAGS_ub_event_queue_timer_interval * TIME_COVERSION; - struct itimerspec timeSpec = { + ::itimerspec timeSpec = { .it_interval = {.tv_sec = 0, .tv_nsec = eventQTimerInterval}, .it_value = {.tv_sec = 0, .tv_nsec = 1} }; @@ -202,7 +202,7 @@ RETURN_CODE UBRing::UbrAddHBTimer() { return UBRING_ERR; } - struct itimerspec timeSpec = { + ::itimerspec timeSpec = { .it_interval = {.tv_sec = FLAGS_ub_hb_timer_interval, .tv_nsec = 0}, .it_value = {.tv_sec = 0, .tv_nsec = 1} }; @@ -298,7 +298,7 @@ RETURN_CODE UBRing::UbrAddAsynClearTimer(UbrTrx *trx) { return UBRING_OK; } - struct itimerspec timeSpec = { + ::itimerspec timeSpec = { .it_interval = {.tv_sec = 0, .tv_nsec = 0}, .it_value = {.tv_sec = FLAGS_ub_flying_io_timeout, .tv_nsec = 0} }; From fde0034f35632affad9c1d49853bcdf87efee735 Mon Sep 17 00:00:00 2001 From: zchuango Date: Sat, 9 May 2026 02:52:11 +0000 Subject: [PATCH 71/84] adaptor the itimerspec for macos platform --- src/brpc/ubring/shm/shm_ubs.cpp | 2 +- src/brpc/ubring/timer/timer_mgr.cpp | 12 ++++++------ src/brpc/ubring/timer/timer_mgr.h | 9 ++++++++- src/brpc/ubring/ub_ring.cpp | 7 ++++--- src/brpc/ubring/ub_ring.h | 1 - src/brpc/ubring/ub_ring_manager.cpp | 1 + src/brpc/ubring/ub_ring_manager.h | 5 ++--- 7 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/brpc/ubring/shm/shm_ubs.cpp b/src/brpc/ubring/shm/shm_ubs.cpp index 096b1a8678..057806e107 100644 --- a/src/brpc/ubring/shm/shm_ubs.cpp +++ b/src/brpc/ubring/shm/shm_ubs.cpp @@ -451,7 +451,7 @@ void *UbsShmCallback(void* args) RETURN_CODE UbsShmAddTimer(ShmList *shmList) { uint32_t timerInterval = FLAGS_ub_flying_io_timeout; - ::itimerspec timeSpec = { + itimerspec timeSpec = { .it_interval = {.tv_sec = timerInterval, .tv_nsec = 0}, .it_value = {.tv_sec = 0, .tv_nsec = 1} }; diff --git a/src/brpc/ubring/timer/timer_mgr.cpp b/src/brpc/ubring/timer/timer_mgr.cpp index 5fb4f84f95..cba30118f1 100644 --- a/src/brpc/ubring/timer/timer_mgr.cpp +++ b/src/brpc/ubring/timer/timer_mgr.cpp @@ -39,8 +39,8 @@ static int32_t g_timerModuleInitialized; #if defined(OS_MACOSX) static int timerfd_create_macosx(int clockid, int flags); static int timerfd_settime_macosx(int fd, int flags, - const ::itimerspec *new_value, - ::itimerspec *old_value); + const itimerspec *new_value, + itimerspec *old_value); #endif static RETURN_CODE DeleteTimerInner(uint32_t fd) { @@ -317,7 +317,7 @@ void DeleteTimer(uint32_t fd) { g_timerFdCtxMap[fd].periodical = 0; } -int32_t TimerStart(const ::itimerspec *time, void *(*cb)(void *), void *args) { +int32_t TimerStart(const itimerspec *time, void *(*cb)(void *), void *args) { if (g_epollFd == -1) { LOG(ERROR) << "Timer epoll/kqueue encountered internal error."; return -1; @@ -455,10 +455,10 @@ static int timerfd_create_macosx(int clockid, int flags) { } static int timerfd_settime_macosx(int fd, int flags, - const ::itimerspec *new_value, - ::itimerspec *old_value) { + const itimerspec *new_value, + itimerspec *old_value) { if (old_value != NULL) { - memset(old_value, 0, sizeof(::itimerspec)); + memset(old_value, 0, sizeof(itimerspec)); } return 0; } diff --git a/src/brpc/ubring/timer/timer_mgr.h b/src/brpc/ubring/timer/timer_mgr.h index a0169b3003..74576a4885 100644 --- a/src/brpc/ubring/timer/timer_mgr.h +++ b/src/brpc/ubring/timer/timer_mgr.h @@ -33,6 +33,13 @@ #define MAX_TIMER 1024 #define TIMER_EPOLL_WAIT_TIMEOUT 1000 +#if defined(OS_MACOSX) +struct itimerspec +{ + struct timespec it_interval; + struct timespec it_value; +}; +#endif namespace brpc { namespace ubring { typedef enum { @@ -54,7 +61,7 @@ RETURN_CODE TimerInit(void); void TimerModuleDestroy(void); void *UnifiedCallback(void *args); void *TimerEpoll(void *args); -int32_t TimerStart(const ::itimerspec *time, void *(*cb)(void *), void *args); +int32_t TimerStart(const itimerspec *time, void *(*cb)(void *), void *args); uint32_t GetActiveTimerNum(void); void CloseTimerFd(uint32_t fd); diff --git a/src/brpc/ubring/ub_ring.cpp b/src/brpc/ubring/ub_ring.cpp index f7a6bb25f4..c14bdeddfc 100644 --- a/src/brpc/ubring/ub_ring.cpp +++ b/src/brpc/ubring/ub_ring.cpp @@ -22,6 +22,7 @@ #include "bthread/bthread.h" #include "butil/logging.h" #include "brpc/ubring/ub_ring.h" +#include "brpc/ubring/ub_ring_manager.h" #include "brpc/ubring/shm/shm_ipc.h" namespace brpc { @@ -129,7 +130,7 @@ RETURN_CODE UBRing::UbrAddCloseTimer() { } uint32_t eventQTimerInterval = FLAGS_ub_event_queue_timer_interval * TIME_COVERSION; - ::itimerspec timeSpec = { + itimerspec timeSpec = { .it_interval = {.tv_sec = 0, .tv_nsec = eventQTimerInterval}, .it_value = {.tv_sec = 0, .tv_nsec = 1} }; @@ -202,7 +203,7 @@ RETURN_CODE UBRing::UbrAddHBTimer() { return UBRING_ERR; } - ::itimerspec timeSpec = { + itimerspec timeSpec = { .it_interval = {.tv_sec = FLAGS_ub_hb_timer_interval, .tv_nsec = 0}, .it_value = {.tv_sec = 0, .tv_nsec = 1} }; @@ -298,7 +299,7 @@ RETURN_CODE UBRing::UbrAddAsynClearTimer(UbrTrx *trx) { return UBRING_OK; } - ::itimerspec timeSpec = { + itimerspec timeSpec = { .it_interval = {.tv_sec = 0, .tv_nsec = 0}, .it_value = {.tv_sec = FLAGS_ub_flying_io_timeout, .tv_nsec = 0} }; diff --git a/src/brpc/ubring/ub_ring.h b/src/brpc/ubring/ub_ring.h index cb6352f452..c0cbc2f7ca 100644 --- a/src/brpc/ubring/ub_ring.h +++ b/src/brpc/ubring/ub_ring.h @@ -23,7 +23,6 @@ #include "butil/macros.h" #include "butil/reader_writer.h" #include "brpc/ubring/ubr_trx.h" -#include "brpc/ubring/ub_ring_manager.h" #include "brpc/ubring/shm/shm_mgr.h" #include "brpc/ubring/timer/timer_mgr.h" diff --git a/src/brpc/ubring/ub_ring_manager.cpp b/src/brpc/ubring/ub_ring_manager.cpp index 794fa4c845..9d6094d77b 100644 --- a/src/brpc/ubring/ub_ring_manager.cpp +++ b/src/brpc/ubring/ub_ring_manager.cpp @@ -16,6 +16,7 @@ // under the License. #include +#include "brpc/ubring/ub_ring.h" #include "brpc/ubring/ub_ring_manager.h" #include "butil/logging.h" diff --git a/src/brpc/ubring/ub_ring_manager.h b/src/brpc/ubring/ub_ring_manager.h index b57cfdcd47..a55fa66da2 100644 --- a/src/brpc/ubring/ub_ring_manager.h +++ b/src/brpc/ubring/ub_ring_manager.h @@ -18,9 +18,8 @@ #ifndef BRPC_UB_RING_MANAGER_H #define BRPC_UB_RING_MANAGER_H -#include -#include -#include "brpc/ubring/ub_ring.h" +#include "brpc/ubring/ubr_trx.h" +#include "brpc/ubring/shm/shm_def.h" #include "brpc/ubring/common/common.h" namespace brpc { From 2217a6ca37614ec885c5942aa40a5f7acdf032a3 Mon Sep 17 00:00:00 2001 From: zchuango Date: Sat, 9 May 2026 04:04:38 +0000 Subject: [PATCH 72/84] optimize the cmakelist config --- CMakeLists.txt | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d84ba69343..b702cd012f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -276,15 +276,6 @@ if(WITH_RDMA) endif() endif() -if(WITH_UBRING) - message("brpc compile with ub") - # find_path(RDMA_INCLUDE_PATH NAMES infiniband/verbs.h) - # find_library(UB_LIB NAMES ibverbs) - # if((NOT RDMA_INCLUDE_PATH) OR (NOT UB_LIB)) - # message(FATAL_ERROR "Fail to find ibverbs") - # endif() -endif() - find_library(PROTOC_LIB NAMES protoc) if(NOT PROTOC_LIB) message(FATAL_ERROR "Fail to find protoc lib") @@ -338,6 +329,7 @@ if(WITH_RDMA) endif() if(WITH_UBRING) + message(STATUS "brpc compile with ubring") list(APPEND DYNAMIC_LIB ${UB_LIB}) endif() From e9bda71710586dea9a89a1370a863518435e6b88 Mon Sep 17 00:00:00 2001 From: Bright Chen Date: Mon, 13 Apr 2026 14:14:03 +0800 Subject: [PATCH 73/84] Use monotonic time instead of wall time (#3268) --- example/auto_concurrency_limiter/client.cpp | 6 +- example/auto_concurrency_limiter/server.cpp | 6 +- example/dynamic_partition_echo_c++/server.cpp | 4 +- .../multi_threaded_echo_fns_c++/server.cpp | 4 +- example/partition_echo_c++/server.cpp | 4 +- example/rdma_performance/client.cpp | 8 +-- example/selective_echo_c++/server.cpp | 4 +- src/brpc/cluster_recover_policy.cpp | 4 +- src/brpc/details/health_check.cpp | 4 +- src/brpc/global.cpp | 6 +- src/brpc/policy/auto_concurrency_limiter.cpp | 4 +- .../policy/locality_aware_load_balancer.cpp | 6 +- .../policy/timeout_concurrency_limiter.cpp | 2 +- src/brpc/server.cpp | 6 +- src/brpc/socket.cpp | 4 +- src/brpc/trackme.cpp | 2 +- src/bvar/collector.cpp | 6 +- src/bvar/collector.h | 2 +- src/bvar/default_variables.cpp | 6 +- src/bvar/detail/sampler.cpp | 6 +- src/bvar/detail/sampler.h | 4 +- ...brpc_auto_concurrency_limiter_unittest.cpp | 2 +- test/brpc_channel_unittest.cpp | 56 +++++++-------- test/brpc_h2_unsent_message_unittest.cpp | 8 +-- test/brpc_http_rpc_protocol_unittest.cpp | 8 +-- test/brpc_load_balancer_unittest.cpp | 12 ++-- test/brpc_socket_unittest.cpp | 29 ++++---- ...c_timeout_concurrency_limiter_unittest.cpp | 68 +++++++++---------- test/bvar_recorder_unittest.cpp | 18 ++--- test/bvar_reducer_unittest.cpp | 4 +- tools/rpc_press/info_thread.cpp | 6 +- tools/rpc_press/rpc_press_impl.cpp | 4 +- tools/rpc_replay/info_thread.cpp | 6 +- tools/rpc_replay/rpc_replay.cpp | 4 +- 34 files changed, 161 insertions(+), 162 deletions(-) diff --git a/example/auto_concurrency_limiter/client.cpp b/example/auto_concurrency_limiter/client.cpp index d33325ddf9..af293af9ad 100644 --- a/example/auto_concurrency_limiter/client.cpp +++ b/example/auto_concurrency_limiter/client.cpp @@ -119,13 +119,13 @@ struct TestCaseContext { , stage_index(0) , test_case(tc) , next_stage_sec(test_case.qps_stage_list(0).duration_sec() + - butil::gettimeofday_s()) { + butil::cpuwide_time_s()) { DisplayStage(test_case.qps_stage_list(stage_index)); Update(); } bool Update() { - if (butil::gettimeofday_s() >= next_stage_sec) { + if (butil::cpuwide_time_s() >= next_stage_sec) { ++stage_index; if (stage_index < test_case.qps_stage_list_size()) { next_stage_sec += test_case.qps_stage_list(stage_index).duration_sec(); @@ -144,7 +144,7 @@ struct TestCaseContext { } else if (qps_stage.type() == test::SMOOTH) { qps = lower_bound + (upper_bound - lower_bound) / double(qps_stage.duration_sec()) * (qps_stage.duration_sec() - next_stage_sec - + butil::gettimeofday_s()); + + butil::cpuwide_time_s()); } interval_us.store(1.0 / qps * 1000000, butil::memory_order_relaxed); return true; diff --git a/example/auto_concurrency_limiter/server.cpp b/example/auto_concurrency_limiter/server.cpp index 61f826fab7..a161b188ed 100644 --- a/example/auto_concurrency_limiter/server.cpp +++ b/example/auto_concurrency_limiter/server.cpp @@ -96,7 +96,7 @@ class EchoServiceImpl : public test::EchoService { void SetTestCase(const test::TestCase& test_case) { _test_case = test_case; _next_stage_start = _test_case.latency_stage_list(0).duration_sec() + - butil::gettimeofday_s(); + butil::cpuwide_time_s(); _stage_index = 0; _running_case = false; DisplayStage(_test_case.latency_stage_list(_stage_index)); @@ -137,7 +137,7 @@ class EchoServiceImpl : public test::EchoService { void ComputeLatency() { if (_stage_index < _test_case.latency_stage_list_size() && - butil::gettimeofday_s() > _next_stage_start) { + butil::cpuwide_time_s() > _next_stage_start) { ++_stage_index; if (_stage_index < _test_case.latency_stage_list_size()) { _next_stage_start += _test_case.latency_stage_list(_stage_index).duration_sec(); @@ -167,7 +167,7 @@ class EchoServiceImpl : public test::EchoService { int latency = lower_bound + (upper_bound - lower_bound) / double(latency_stage.duration_sec()) * (latency_stage.duration_sec() - _next_stage_start + - butil::gettimeofday_s()); + butil::cpuwide_time_s()); _latency.store(latency, butil::memory_order_relaxed); } else { LOG(FATAL) << "Wrong Type:" << latency_stage.type(); diff --git a/example/dynamic_partition_echo_c++/server.cpp b/example/dynamic_partition_echo_c++/server.cpp index e86affaa91..eda57b4243 100644 --- a/example/dynamic_partition_echo_c++/server.cpp +++ b/example/dynamic_partition_echo_c++/server.cpp @@ -69,8 +69,8 @@ class EchoServiceImpl : public example::EchoService { } } if (FLAGS_spin) { - int64_t end_time = butil::gettimeofday_us() + (int64_t)delay; - while (butil::gettimeofday_us() < end_time) {} + int64_t end_time = butil::cpuwide_time_us() + (int64_t)delay; + while (butil::cpuwide_time_us() < end_time) {} } else { bthread_usleep((int64_t)delay); } diff --git a/example/multi_threaded_echo_fns_c++/server.cpp b/example/multi_threaded_echo_fns_c++/server.cpp index 2e837bbf46..a25ef96e81 100644 --- a/example/multi_threaded_echo_fns_c++/server.cpp +++ b/example/multi_threaded_echo_fns_c++/server.cpp @@ -70,8 +70,8 @@ class EchoServiceImpl : public example::EchoService { } } if (FLAGS_spin) { - int64_t end_time = butil::gettimeofday_us() + (int64_t)delay; - while (butil::gettimeofday_us() < end_time) {} + int64_t end_time = butil::cpuwide_time_us() + (int64_t)delay; + while (butil::cpuwide_time_us() < end_time) {} } else { bthread_usleep((int64_t)delay); } diff --git a/example/partition_echo_c++/server.cpp b/example/partition_echo_c++/server.cpp index aa65d41b7a..7dbcf8f519 100644 --- a/example/partition_echo_c++/server.cpp +++ b/example/partition_echo_c++/server.cpp @@ -69,8 +69,8 @@ class EchoServiceImpl : public example::EchoService { } } if (FLAGS_spin) { - int64_t end_time = butil::gettimeofday_us() + (int64_t)delay; - while (butil::gettimeofday_us() < end_time) {} + int64_t end_time = butil::cpuwide_time_us() + (int64_t)delay; + while (butil::cpuwide_time_us() < end_time) {} } else { bthread_usleep((int64_t)delay); } diff --git a/example/rdma_performance/client.cpp b/example/rdma_performance/client.cpp index a7ed2c99c6..2e8acc4051 100644 --- a/example/rdma_performance/client.cpp +++ b/example/rdma_performance/client.cpp @@ -176,7 +176,7 @@ class PerformanceTest { } --closure->test->_iterations; uint64_t last = g_last_time.load(butil::memory_order_relaxed); - uint64_t now = butil::gettimeofday_us(); + uint64_t now = butil::cpuwide_time_us(); if (now > last && now - last > 100000) { if (g_last_time.exchange(now, butil::memory_order_relaxed) == last) { g_client_cpu_recorder << @@ -192,7 +192,7 @@ class PerformanceTest { static void* RunTest(void* arg) { PerformanceTest* test = (PerformanceTest*)arg; - test->_start_time = butil::gettimeofday_us(); + test->_start_time = butil::cpuwide_time_us(); test->_iterations = FLAGS_test_iterations; for (int i = 0; i < FLAGS_queue_depth; ++i) { @@ -235,7 +235,7 @@ void Test(int thread_num, int attachment_size) { } tests.push_back(t); } - uint64_t start_time = butil::gettimeofday_us(); + uint64_t start_time = butil::cpuwide_time_us(); bthread_t tid[thread_num]; if (FLAGS_expected_qps > 0) { bthread_t tid; @@ -250,7 +250,7 @@ void Test(int thread_num, int attachment_size) { bthread_usleep(10000); } } - uint64_t end_time = butil::gettimeofday_us(); + uint64_t end_time = butil::cpuwide_time_us(); double throughput = g_total_bytes / 1.048576 / (end_time - start_time); if (FLAGS_test_iterations == 0) { std::cout << "Avg-Latency: " << g_latency_recorder.latency(10) diff --git a/example/selective_echo_c++/server.cpp b/example/selective_echo_c++/server.cpp index 0705a32890..d1ed8bb940 100644 --- a/example/selective_echo_c++/server.cpp +++ b/example/selective_echo_c++/server.cpp @@ -68,8 +68,8 @@ class EchoServiceImpl : public example::EchoService { } } if (FLAGS_spin) { - int64_t end_time = butil::gettimeofday_us() + (int64_t)delay; - while (butil::gettimeofday_us() < end_time) {} + int64_t end_time = butil::cpuwide_time_us() + (int64_t)delay; + while (butil::cpuwide_time_us() < end_time) {} } else { bthread_usleep((int64_t)delay); } diff --git a/src/brpc/cluster_recover_policy.cpp b/src/brpc/cluster_recover_policy.cpp index d1338f6609..bb59a23c0f 100644 --- a/src/brpc/cluster_recover_policy.cpp +++ b/src/brpc/cluster_recover_policy.cpp @@ -51,7 +51,7 @@ bool DefaultClusterRecoverPolicy::StopRecoverIfNecessary() { if (!_recovering) { return false; } - int64_t now_ms = butil::gettimeofday_ms(); + int64_t now_ms = butil::cpuwide_time_ms(); std::unique_lock mu(_mutex); if (_last_usable_change_time_ms != 0 && _last_usable != 0 && (now_ms - _last_usable_change_time_ms > _hold_seconds * 1000)) { @@ -92,7 +92,7 @@ bool DefaultClusterRecoverPolicy::DoReject(const std::vector& server_l if (!_recovering) { return false; } - int64_t now_ms = butil::gettimeofday_ms(); + int64_t now_ms = butil::cpuwide_time_ms(); uint64_t usable = GetUsableServerCount(now_ms, server_list); if (_last_usable != usable) { std::unique_lock mu(_mutex); diff --git a/src/brpc/details/health_check.cpp b/src/brpc/details/health_check.cpp index cfd49104d6..7cf4e32bcf 100644 --- a/src/brpc/details/health_check.cpp +++ b/src/brpc/details/health_check.cpp @@ -99,7 +99,7 @@ void* HealthCheckManager::AppCheck(void* arg) { done->cntl.Reset(); done->cntl.http_request().uri() = done->hc_option.health_check_path; ControllerPrivateAccessor(&done->cntl).set_health_check_call(); - done->last_check_time_ms = butil::gettimeofday_ms(); + done->last_check_time_ms = butil::cpuwide_time_ms(); done->channel.CallMethod(NULL, &done->cntl, NULL, NULL, done); return NULL; } @@ -126,7 +126,7 @@ void OnAppHealthCheckDone::Run() { << ", " << cntl.ErrorText(); int64_t sleep_time_ms = - last_check_time_ms + interval_s * 1000 - butil::gettimeofday_ms(); + last_check_time_ms + interval_s * 1000 - butil::cpuwide_time_ms(); if (sleep_time_ms > 0) { // TODO(zhujiashun): we need to handle the case when timer fails // and bthread_usleep returns immediately. In most situations, diff --git a/src/brpc/global.cpp b/src/brpc/global.cpp index 1f67aee20b..90f19cd5bc 100644 --- a/src/brpc/global.cpp +++ b/src/brpc/global.cpp @@ -238,13 +238,13 @@ static void* GlobalUpdate(void*) { } std::vector conns; - const int64_t start_time_us = butil::gettimeofday_us(); + const int64_t start_time_us = butil::cpuwide_time_us(); const int WARN_NOSLEEP_THRESHOLD = 2; int64_t last_time_us = start_time_us; int consecutive_nosleep = 0; int64_t last_return_free_memory_time = start_time_us; while (1) { - const int64_t sleep_us = 1000000L + last_time_us - butil::gettimeofday_us(); + const int64_t sleep_us = 1000000L + last_time_us - butil::cpuwide_time_us(); if (sleep_us > 0) { if (bthread_usleep(sleep_us) < 0) { PLOG_IF(FATAL, errno != ESTOP) << "Fail to sleep"; @@ -257,7 +257,7 @@ static void* GlobalUpdate(void*) { LOG(WARNING) << __FUNCTION__ << " is too busy!"; } } - last_time_us = butil::gettimeofday_us(); + last_time_us = butil::cpuwide_time_us(); TrackMe(); diff --git a/src/brpc/policy/auto_concurrency_limiter.cpp b/src/brpc/policy/auto_concurrency_limiter.cpp index 51ea56d765..e9cce0fa43 100644 --- a/src/brpc/policy/auto_concurrency_limiter.cpp +++ b/src/brpc/policy/auto_concurrency_limiter.cpp @@ -87,7 +87,7 @@ DEFINE_double(auto_cl_error_rate_punish_threshold, 0, AutoConcurrencyLimiter::AutoConcurrencyLimiter() : _max_concurrency(FLAGS_auto_cl_initial_max_concurrency) - , _remeasure_start_us(NextResetTime(butil::gettimeofday_us())) + , _remeasure_start_us(NextResetTime(butil::cpuwide_time_us())) , _reset_latency_us(0) , _min_latency_us(-1) , _ema_max_qps(-1) @@ -111,7 +111,7 @@ void AutoConcurrencyLimiter::OnResponded(int error_code, int64_t latency_us) { return; } - const int64_t now_time_us = butil::gettimeofday_us(); + const int64_t now_time_us = butil::cpuwide_time_us(); int64_t last_sampling_time_us = _last_sampling_time_us.load(butil::memory_order_relaxed); diff --git a/src/brpc/policy/locality_aware_load_balancer.cpp b/src/brpc/policy/locality_aware_load_balancer.cpp index beea51690e..455f6fc397 100644 --- a/src/brpc/policy/locality_aware_load_balancer.cpp +++ b/src/brpc/policy/locality_aware_load_balancer.cpp @@ -18,7 +18,7 @@ #include // numeric_limits #include -#include "butil/time.h" // gettimeofday_us +#include "butil/time.h" // cpuwide_time_us #include "butil/fast_rand.h" #include "brpc/log.h" #include "brpc/socket.h" @@ -376,7 +376,7 @@ void LocalityAwareLoadBalancer::Feedback(const CallInfo& info) { int64_t LocalityAwareLoadBalancer::Weight::Update( const CallInfo& ci, size_t index) { - const int64_t end_time_us = butil::gettimeofday_us(); + const int64_t end_time_us = butil::cpuwide_time_us(); const int64_t latency = end_time_us - ci.begin_time_us; BAIDU_SCOPED_LOCK(_mutex); if (Disabled()) { @@ -524,7 +524,7 @@ void LocalityAwareLoadBalancer::Describe( if (_db_servers.Read(&s) != 0) { os << "fail to read _db_servers"; } else { - const int64_t now = butil::gettimeofday_us(); + const int64_t now = butil::cpuwide_time_us(); const size_t n = s->weight_tree.size(); os << '['; for (size_t i = 0; i < n; ++i) { diff --git a/src/brpc/policy/timeout_concurrency_limiter.cpp b/src/brpc/policy/timeout_concurrency_limiter.cpp index b2582eb12b..21aad33fc1 100644 --- a/src/brpc/policy/timeout_concurrency_limiter.cpp +++ b/src/brpc/policy/timeout_concurrency_limiter.cpp @@ -91,7 +91,7 @@ void TimeoutConcurrencyLimiter::OnResponded(int error_code, return; } - const int64_t now_time_us = butil::gettimeofday_us(); + const int64_t now_time_us = butil::cpuwide_time_us(); int64_t last_sampling_time_us = _last_sampling_time_us.load(butil::memory_order_relaxed); diff --git a/src/brpc/server.cpp b/src/brpc/server.cpp index 9470220d09..3a5da7b771 100644 --- a/src/brpc/server.cpp +++ b/src/brpc/server.cpp @@ -372,10 +372,10 @@ void* Server::UpdateDerivedVars(void* arg) { } #endif - int64_t last_time = butil::gettimeofday_us(); + int64_t last_time = butil::cpuwide_time_us(); int consecutive_nosleep = 0; while (1) { - const int64_t sleep_us = 1000000L + last_time - butil::gettimeofday_us(); + const int64_t sleep_us = 1000000L + last_time - butil::cpuwide_time_us(); if (sleep_us < 1000L) { if (++consecutive_nosleep >= 2) { consecutive_nosleep = 0; @@ -388,7 +388,7 @@ void* Server::UpdateDerivedVars(void* arg) { return NULL; } } - last_time = butil::gettimeofday_us(); + last_time = butil::cpuwide_time_us(); // Update stats of accepted sockets. if (server->_am) { diff --git a/src/brpc/socket.cpp b/src/brpc/socket.cpp index da926c9ff5..78bd4e23d2 100644 --- a/src/brpc/socket.cpp +++ b/src/brpc/socket.cpp @@ -572,7 +572,7 @@ int Socket::ResetFileDescriptor(int fd) { // MUST store `_fd' before adding itself into epoll device to avoid // race conditions with the callback function inside epoll _fd.store(fd, butil::memory_order_release); - _reset_fd_real_us = butil::gettimeofday_us(); + _reset_fd_real_us = butil::cpuwide_time_us(); if (!ValidFileDescriptor(fd)) { return 0; } @@ -2316,7 +2316,7 @@ void Socket::DebugSocket(std::ostream& os, SocketId id) { << "\nnevent=" << ptr->_nevent.load(butil::memory_order_relaxed) << "\nfd=" << fd << "\ntos=" << ptr->_tos - << "\nreset_fd_to_now=" << butil::gettimeofday_us() - ptr->_reset_fd_real_us << "us" + << "\nreset_fd_to_now=" << butil::cpuwide_time_us() - ptr->_reset_fd_real_us << "us" << "\nremote_side=" << ptr->_remote_side << "\nlocal_side=" << ptr->_local_side << "\non_et_events=" << (void*)ptr->_on_edge_triggered_events diff --git a/src/brpc/trackme.cpp b/src/brpc/trackme.cpp index 85598a68a0..4decc35269 100644 --- a/src/brpc/trackme.cpp +++ b/src/brpc/trackme.cpp @@ -225,7 +225,7 @@ void TrackMe() { if (FLAGS_trackme_server.empty()) { return; } - int64_t now = butil::gettimeofday_us(); + int64_t now = butil::cpuwide_time_us(); std::unique_lock mu(s_trackme_mutex); if (s_trackme_last_time == 0) { // Delay the first ping randomly within s_trackme_interval. This diff --git a/src/bvar/collector.cpp b/src/bvar/collector.cpp index a01f45fdbc..c4adf634b7 100644 --- a/src/bvar/collector.cpp +++ b/src/bvar/collector.cpp @@ -305,8 +305,8 @@ void Collector::update_speed_limit(CollectorSpeedLimit* sl, size_t new_sampling_range = 0; const size_t old_sampling_range = sl->sampling_range; if (!sl->ever_grabbed) { - if (sl->first_sample_real_us) { - interval_us = butil::gettimeofday_us() - sl->first_sample_real_us; + if (sl->first_sample_us) { + interval_us = butil::cpuwide_time_us() - sl->first_sample_us; if (interval_us < 0) { interval_us = 0; } @@ -350,7 +350,7 @@ size_t is_collectable_before_first_time_grabbed(CollectorSpeedLimit* sl) { int before_add = sl->count_before_grabbed.fetch_add( 1, butil::memory_order_relaxed); if (before_add == 0) { - sl->first_sample_real_us = butil::gettimeofday_us(); + sl->first_sample_us = butil::cpuwide_time_us(); } else if (before_add >= FLAGS_bvar_collector_expected_per_second) { butil::get_leaky_singleton()->wakeup_grab_thread(); } diff --git a/src/bvar/collector.h b/src/bvar/collector.h index a603d96b34..473d4ac7ab 100644 --- a/src/bvar/collector.h +++ b/src/bvar/collector.h @@ -40,7 +40,7 @@ struct CollectorSpeedLimit { size_t sampling_range; bool ever_grabbed; butil::static_atomic count_before_grabbed; - int64_t first_sample_real_us; + int64_t first_sample_us; }; static const size_t COLLECTOR_SAMPLING_BASE = 16384; diff --git a/src/bvar/default_variables.cpp b/src/bvar/default_variables.cpp index a84c3ab08b..1d60063526 100644 --- a/src/bvar/default_variables.cpp +++ b/src/bvar/default_variables.cpp @@ -144,7 +144,7 @@ class CachedReader { template static const T& get_value(const ReadFn& fn) { CachedReader* p = butil::get_leaky_singleton(); - const int64_t now = butil::gettimeofday_us(); + const int64_t now = butil::cpuwide_time_us(); if (now > p->_mtime_us + CACHED_INTERVAL_US) { pthread_mutex_lock(&p->_mutex); if (now > p->_mtime_us + CACHED_INTERVAL_US) { @@ -625,10 +625,10 @@ static void get_kernel_version(std::ostream& os, void*) { // ====================================== -static int64_t g_starting_time = butil::gettimeofday_us(); +static int64_t g_starting_time = butil::cpuwide_time_us(); static timeval get_uptime(void*) { - int64_t uptime_us = butil::gettimeofday_us() - g_starting_time; + int64_t uptime_us = butil::cpuwide_time_us() - g_starting_time; timeval tm; tm.tv_sec = uptime_us / 1000000L; tm.tv_usec = uptime_us - tm.tv_sec * 1000000L; diff --git a/src/bvar/detail/sampler.cpp b/src/bvar/detail/sampler.cpp index dd6271e7ba..f3462558c1 100644 --- a/src/bvar/detail/sampler.cpp +++ b/src/bvar/detail/sampler.cpp @@ -155,7 +155,7 @@ void SamplerCollector::run() { butil::LinkNode root; int consecutive_nosleep = 0; while (!_stop) { - int64_t abstime = butil::gettimeofday_us(); + int64_t abstime = butil::cpuwide_time_ns(); Sampler* s = this->reset(); if (s) { s->InsertBeforeAsList(&root); @@ -176,13 +176,13 @@ void SamplerCollector::run() { p = saved_next; } bool slept = false; - int64_t now = butil::gettimeofday_us(); + int64_t now = butil::cpuwide_time_ns(); _cumulated_time_us += now - abstime; abstime += 1000000L; while (abstime > now) { ::usleep(abstime - now); slept = true; - now = butil::gettimeofday_us(); + now = butil::cpuwide_time_ns(); } if (slept) { consecutive_nosleep = 0; diff --git a/src/bvar/detail/sampler.h b/src/bvar/detail/sampler.h index 85ec237453..32b976dcbd 100644 --- a/src/bvar/detail/sampler.h +++ b/src/bvar/detail/sampler.h @@ -26,7 +26,7 @@ #include "butil/logging.h" // LOG() #include "butil/containers/bounded_queue.h"// BoundedQueue #include "butil/type_traits.h" // is_same -#include "butil/time.h" // gettimeofday_us +#include "butil/time.h" // cpuwide_time_us #include "butil/class_name.h" namespace bvar { @@ -136,7 +136,7 @@ class ReducerSampler : public Sampler { // get_value() of _reducer can still be called. latest.data = _reducer->get_value(); } - latest.time_us = butil::gettimeofday_us(); + latest.time_us = butil::cpuwide_time_us(); _q.elim_push(latest); } diff --git a/test/brpc_auto_concurrency_limiter_unittest.cpp b/test/brpc_auto_concurrency_limiter_unittest.cpp index 77163e2fb8..ac572b82a8 100644 --- a/test/brpc_auto_concurrency_limiter_unittest.cpp +++ b/test/brpc_auto_concurrency_limiter_unittest.cpp @@ -78,7 +78,7 @@ void AddSamplesAndTriggerWindow(brpc::policy::AutoConcurrencyLimiter& limiter, int succ_count, int64_t succ_latency, int fail_count, int64_t fail_latency) { ASSERT_GT(succ_count, 0) << "Need at least 1 success to trigger window"; - int64_t now = butil::gettimeofday_us(); + int64_t now = butil::cpuwide_time_us(); // Add successful samples (reserve one for the trigger) for (int i = 0; i < succ_count - 1; ++i) { diff --git a/test/brpc_channel_unittest.cpp b/test/brpc_channel_unittest.cpp index de33b44393..2004767470 100644 --- a/test/brpc_channel_unittest.cpp +++ b/test/brpc_channel_unittest.cpp @@ -495,9 +495,9 @@ class ChannelTest : public ::testing::Test{ EXPECT_EQ("received " + std::string(__FUNCTION__), res.message()); if (short_connection) { // Sleep to let `_messenger' detect `Socket' being `SetFailed' - const int64_t start_time = butil::gettimeofday_us(); + const int64_t start_time = butil::cpuwide_time_us(); while (_messenger.ConnectionCount() != 0) { - EXPECT_LT(butil::gettimeofday_us(), start_time + 100000L/*100ms*/); + EXPECT_LT(butil::cpuwide_time_us(), start_time + 100000L/*100ms*/); bthread_usleep(1000); } } else { @@ -655,9 +655,9 @@ class ChannelTest : public ::testing::Test{ } if (short_connection) { // Sleep to let `_messenger' detect `Socket' being `SetFailed' - const int64_t start_time = butil::gettimeofday_us(); + const int64_t start_time = butil::cpuwide_time_us(); while (_messenger.ConnectionCount() != 0) { - EXPECT_LT(butil::gettimeofday_us(), start_time + 100000L/*100ms*/); + EXPECT_LT(butil::cpuwide_time_us(), start_time + 100000L/*100ms*/); bthread_usleep(1000); } } else { @@ -707,9 +707,9 @@ class ChannelTest : public ::testing::Test{ } if (short_connection) { // Sleep to let `_messenger' detect `Socket' being `SetFailed' - const int64_t start_time = butil::gettimeofday_us(); + const int64_t start_time = butil::cpuwide_time_us(); while (_messenger.ConnectionCount() != 0) { - EXPECT_LT(butil::gettimeofday_us(), start_time + 100000L/*100ms*/); + EXPECT_LT(butil::cpuwide_time_us(), start_time + 100000L/*100ms*/); bthread_usleep(1000); } } else { @@ -751,9 +751,9 @@ class ChannelTest : public ::testing::Test{ if (short_connection) { // Sleep to let `_messenger' detect `Socket' being `SetFailed' - const int64_t start_time = butil::gettimeofday_us(); + const int64_t start_time = butil::cpuwide_time_us(); while (_messenger.ConnectionCount() != 0) { - EXPECT_LT(butil::gettimeofday_us(), start_time + 100000L/*100ms*/); + EXPECT_LT(butil::cpuwide_time_us(), start_time + 100000L/*100ms*/); bthread_usleep(1000); } } else { @@ -800,9 +800,9 @@ class ChannelTest : public ::testing::Test{ } if (short_connection) { // Sleep to let `_messenger' detect `Socket' being `SetFailed' - const int64_t start_time = butil::gettimeofday_us(); + const int64_t start_time = butil::cpuwide_time_us(); while (_messenger.ConnectionCount() != 0) { - EXPECT_LT(butil::gettimeofday_us(), start_time + 100000L/*100ms*/); + EXPECT_LT(butil::cpuwide_time_us(), start_time + 100000L/*100ms*/); bthread_usleep(1000); } } else { @@ -861,9 +861,9 @@ class ChannelTest : public ::testing::Test{ } if (short_connection) { // Sleep to let `_messenger' detect `Socket' being `SetFailed' - const int64_t start_time = butil::gettimeofday_us(); + const int64_t start_time = butil::cpuwide_time_us(); while (_messenger.ConnectionCount() != 0) { - EXPECT_LT(butil::gettimeofday_us(), start_time + 100000L/*100ms*/); + EXPECT_LT(butil::cpuwide_time_us(), start_time + 100000L/*100ms*/); bthread_usleep(1000); } } else { @@ -914,9 +914,9 @@ class ChannelTest : public ::testing::Test{ ASSERT_EQ((int)1, res.code_list(0)); if (short_connection) { // Sleep to let `_messenger' detect `Socket' being `SetFailed' - const int64_t start_time = butil::gettimeofday_us(); + const int64_t start_time = butil::cpuwide_time_us(); while (_messenger.ConnectionCount() != 0) { - EXPECT_LT(butil::gettimeofday_us(), start_time + 100000L/*100ms*/); + EXPECT_LT(butil::cpuwide_time_us(), start_time + 100000L/*100ms*/); bthread_usleep(1000); } } else { @@ -1215,9 +1215,9 @@ class ChannelTest : public ::testing::Test{ EXPECT_EQ("received " + std::string(__FUNCTION__), res.message()); if (short_connection) { // Sleep to let `_messenger' detect `Socket' being `SetFailed' - const int64_t start_time = butil::gettimeofday_us(); + const int64_t start_time = butil::cpuwide_time_us(); while (_messenger.ConnectionCount() != 0) { - EXPECT_LT(butil::gettimeofday_us(), start_time + 100000L/*100ms*/); + EXPECT_LT(butil::cpuwide_time_us(), start_time + 100000L/*100ms*/); bthread_usleep(1000); } } else { @@ -1672,9 +1672,9 @@ class ChannelTest : public ::testing::Test{ EXPECT_EQ(0, cntl.ErrorCode()) << cntl.ErrorText(); EXPECT_EQ("received " + std::string(__FUNCTION__), res.message()); // Sleep to let `_messenger' detect `Socket' being `SetFailed' - const int64_t start_time = butil::gettimeofday_us(); + const int64_t start_time = butil::cpuwide_time_us(); while (_messenger.ConnectionCount() != 0) { - EXPECT_LT(butil::gettimeofday_us(), start_time + 100000L/*100ms*/); + EXPECT_LT(butil::cpuwide_time_us(), start_time + 100000L/*100ms*/); bthread_usleep(1000); } @@ -1705,9 +1705,9 @@ class ChannelTest : public ::testing::Test{ EXPECT_EQ(0, cntl.ErrorCode()) << cntl.ErrorText(); EXPECT_EQ("received " + std::string(__FUNCTION__), res.message()); // Sleep to let `_messenger' detect `Socket' being `SetFailed' - const int64_t start_time = butil::gettimeofday_us(); + const int64_t start_time = butil::cpuwide_time_us(); while (_messenger.ConnectionCount() != 0) { - EXPECT_LT(butil::gettimeofday_us(), start_time + 100000L/*100ms*/); + EXPECT_LT(butil::cpuwide_time_us(), start_time + 100000L/*100ms*/); bthread_usleep(1000); } StopAndJoin(); @@ -1741,9 +1741,9 @@ class ChannelTest : public ::testing::Test{ ASSERT_EQ(0, cntl.sub(0)->ErrorCode()); // Sleep to let `_messenger' detect `Socket' being `SetFailed' - const int64_t start_time = butil::gettimeofday_us(); + const int64_t start_time = butil::cpuwide_time_us(); while (_messenger.ConnectionCount() != 0) { - EXPECT_LT(butil::gettimeofday_us(), start_time + 100000L/*100ms*/); + EXPECT_LT(butil::cpuwide_time_us(), start_time + 100000L/*100ms*/); bthread_usleep(1000); } StopAndJoin(); @@ -1938,9 +1938,9 @@ class ChannelTest : public ::testing::Test{ EXPECT_EQ(0, cntl.ErrorCode()) << cntl.ErrorText(); EXPECT_EQ(1, cntl.retried_count()); - const int64_t start_time = butil::gettimeofday_us(); + const int64_t start_time = butil::cpuwide_time_us(); while (_messenger.ConnectionCount() != 0) { - EXPECT_LT(butil::gettimeofday_us(), start_time + 100000L/*100ms*/); + EXPECT_LT(butil::cpuwide_time_us(), start_time + 100000L/*100ms*/); bthread_usleep(1000); } } else { @@ -2096,9 +2096,9 @@ class ChannelTest : public ::testing::Test{ if (short_connection) { // Sleep to let `_messenger' detect `Socket' being `SetFailed' - const int64_t start_time = butil::gettimeofday_us(); + const int64_t start_time = butil::cpuwide_time_us(); while (_messenger.ConnectionCount() != 0) { - EXPECT_LT(butil::gettimeofday_us(), start_time + 100000L/*100ms*/); + EXPECT_LT(butil::cpuwide_time_us(), start_time + 100000L/*100ms*/); bthread_usleep(1000); } } else { @@ -2156,9 +2156,9 @@ class ChannelTest : public ::testing::Test{ if (short_connection) { // Sleep to let `_messenger' detect `Socket' being `SetFailed' - const int64_t start_time = butil::gettimeofday_us(); + const int64_t start_time = butil::cpuwide_time_us(); while (_messenger.ConnectionCount() != 0) { - ASSERT_LT(butil::gettimeofday_us(), start_time + 100000L/*100ms*/); + ASSERT_LT(butil::cpuwide_time_us(), start_time + 100000L/*100ms*/); bthread_usleep(1000); } } else { diff --git a/test/brpc_h2_unsent_message_unittest.cpp b/test/brpc_h2_unsent_message_unittest.cpp index acb79039ff..5e3b266dfe 100644 --- a/test/brpc_h2_unsent_message_unittest.cpp +++ b/test/brpc_h2_unsent_message_unittest.cpp @@ -57,12 +57,12 @@ TEST(H2UnsentMessage, request_throughput) { // calc H2UnsentRequest throughput butil::IOBuf dummy_buf; ProfilerStart("h2_unsent_req.prof"); - int64_t start_us = butil::gettimeofday_us(); + int64_t start_us = butil::cpuwide_time_us(); for (int i = 0; i < ntotal; ++i) { brpc::policy::H2UnsentRequest* req = brpc::policy::H2UnsentRequest::New(&cntl); req->AppendAndDestroySelf(&dummy_buf, h2_client_sock.get()); } - int64_t end_us = butil::gettimeofday_us(); + int64_t end_us = butil::cpuwide_time_us(); ProfilerStop(); int64_t elapsed = end_us - start_us; LOG(INFO) << "H2UnsentRequest average qps=" @@ -71,7 +71,7 @@ TEST(H2UnsentMessage, request_throughput) { // calc H2UnsentResponse throughput dummy_buf.clear(); - start_us = butil::gettimeofday_us(); + start_us = butil::cpuwide_time_us(); for (int i = 0; i < ntotal; ++i) { // H2UnsentResponse::New would release cntl.http_response() and swap // cntl.response_attachment() @@ -80,7 +80,7 @@ TEST(H2UnsentMessage, request_throughput) { brpc::policy::H2UnsentResponse* res = brpc::policy::H2UnsentResponse::New(&cntl, 0, false); res->AppendAndDestroySelf(&dummy_buf, h2_client_sock.get()); } - end_us = butil::gettimeofday_us(); + end_us = butil::cpuwide_time_us(); elapsed = end_us - start_us; LOG(INFO) << "H2UnsentResponse average qps=" << (ntotal * 1000000L) / elapsed << "/s, data throughput=" diff --git a/test/brpc_http_rpc_protocol_unittest.cpp b/test/brpc_http_rpc_protocol_unittest.cpp index f13c6877f7..b75a6da3a4 100644 --- a/test/brpc_http_rpc_protocol_unittest.cpp +++ b/test/brpc_http_rpc_protocol_unittest.cpp @@ -2008,14 +2008,14 @@ void ReadOneResponse(brpc::SocketUniquePtr& sock, #endif butil::IOPortal read_buf; - int64_t start_time = butil::gettimeofday_us(); + int64_t start_time = butil::cpuwide_time_us(); while (true) { const ssize_t nr = read_buf.append_from_file_descriptor(sock->fd(), 4096); LOG(INFO) << "nr=" << nr; LOG(INFO) << butil::ToPrintableString(read_buf); ASSERT_TRUE(nr > 0 || (nr < 0 && errno == EAGAIN)); if (errno == EAGAIN) { - ASSERT_LT(butil::gettimeofday_us(), start_time + 1000000L) << "Too long!"; + ASSERT_LT(butil::cpuwide_time_us(), start_time + 1000000L) << "Too long!"; bthread_usleep(1000); continue; } @@ -2059,10 +2059,10 @@ TEST_F(HttpTest, http_expect) { request_buf.append(content); ASSERT_EQ(0, sock->Write(&header_buf)); - int64_t start_time = butil::gettimeofday_us(); + int64_t start_time = butil::cpuwide_time_us(); while (sock->fd() < 0) { bthread_usleep(1000); - ASSERT_LT(butil::gettimeofday_us(), start_time + 1000000L) << "Too long!"; + ASSERT_LT(butil::cpuwide_time_us(), start_time + 1000000L) << "Too long!"; } // 100 Continue brpc::DestroyingPtr imsg_guard; diff --git a/test/brpc_load_balancer_unittest.cpp b/test/brpc_load_balancer_unittest.cpp index 07059484d7..2a2be242aa 100644 --- a/test/brpc_load_balancer_unittest.cpp +++ b/test/brpc_load_balancer_unittest.cpp @@ -174,8 +174,8 @@ void DBDMultiBthread() { } // Modify during reading. - int64_t start = butil::gettimeofday_ms(); - while (butil::gettimeofday_ms() - start < 10 * 1000) { + int64_t start = butil::cpuwide_time_ms(); + while (butil::cpuwide_time_ms() - start < 10 * 1000) { d.Modify(AddN, 1); typename DBD::ScopedPtr ptr; d.Read(&ptr); @@ -277,9 +277,9 @@ void PerfTest(int thread_num, bool modify_during_reading) { ProfilerStart(prof_name); int64_t run_ms = 5 * 1000; if (modify_during_reading) { - int64_t start = butil::gettimeofday_ms(); + int64_t start = butil::cpuwide_time_ms(); int i = 1; - while (butil::gettimeofday_ms() - start < run_ms) { + while (butil::cpuwide_time_ms() - start < run_ms) { ASSERT_TRUE(dbd.Modify(AddMapN, i++)); usleep(1000); } @@ -1276,8 +1276,8 @@ TEST_F(LoadBalancerTest, revived_from_all_failed_intergrated) { ASSERT_EQ(0, server2.AddService(&service2, brpc::SERVER_DOESNT_OWN_SERVICE)); ASSERT_EQ(0, server2.Start(point2, NULL)); - int64_t start_ms = butil::gettimeofday_ms(); - while ((butil::gettimeofday_ms() - start_ms) < 3500) { + int64_t start_ms = butil::cpuwide_time_ms(); + while ((butil::cpuwide_time_ms() - start_ms) < 3500) { Done* done = new Done; done->req.set_message("123"); stub.Echo(&done->cntl, &done->req, &done->res, done); diff --git a/test/brpc_socket_unittest.cpp b/test/brpc_socket_unittest.cpp index 8e9f90e833..9851287411 100644 --- a/test/brpc_socket_unittest.cpp +++ b/test/brpc_socket_unittest.cpp @@ -37,7 +37,6 @@ #include "brpc/policy/hulu_pbrpc_protocol.h" #include "brpc/policy/most_common_message.h" #include "brpc/policy/http_rpc_protocol.h" -#include "brpc/nshead.h" #include "brpc/server.h" #include "brpc/channel.h" #include "brpc/controller.h" @@ -399,10 +398,10 @@ TEST_F(SocketTest, single_threaded_connect_and_write) { my_connect->MakeConnectDone(); ASSERT_LT(0, called); // serialized } - int64_t start_time = butil::gettimeofday_us(); + int64_t start_time = butil::cpuwide_time_us(); while (s->fd() < 0) { bthread_usleep(1000); - ASSERT_LT(butil::gettimeofday_us(), start_time + 1000000L) << "Too long!"; + ASSERT_LT(butil::cpuwide_time_us(), start_time + 1000000L) << "Too long!"; } #if defined(OS_LINUX) ASSERT_EQ(0, bthread_fd_wait(s->fd(), EPOLLIN)); @@ -502,10 +501,10 @@ TEST_F(SocketTest, fail_to_connect) { ASSERT_EQ(-1, s->fd()); } // KeepWrite is possibly still running. - int64_t start_time = butil::gettimeofday_us(); + int64_t start_time = butil::cpuwide_time_us(); while (global_sock != NULL) { bthread_usleep(1000); - ASSERT_LT(butil::gettimeofday_us(), start_time + 1000000L) << "Too long!"; + ASSERT_LT(butil::cpuwide_time_us(), start_time + 1000000L) << "Too long!"; } ASSERT_EQ(-1, brpc::Socket::Status(id)); // The id is invalid. @@ -567,10 +566,10 @@ TEST_F(SocketTest, not_health_check_when_nref_hits_0) { // is NULL(set in CheckRecycle::BeforeRecycle). Notice that you should // not spin until Socket::Status(id) becomes -1 and assert global_sock // to be NULL because invalidating id happens before calling BeforeRecycle. - const int64_t start_time = butil::gettimeofday_us(); + const int64_t start_time = butil::cpuwide_time_us(); while (global_sock != NULL) { bthread_usleep(1000); - ASSERT_LT(butil::gettimeofday_us(), start_time + 1000000L); + ASSERT_LT(butil::cpuwide_time_us(), start_time + 1000000L); } ASSERT_EQ(-1, brpc::Socket::Status(id)); } @@ -751,11 +750,11 @@ TEST_F(SocketTest, health_check) { ASSERT_EQ(0, messenger->AddHandler(pairs[0])); ASSERT_EQ(0, messenger->StartAccept(listening_fd, -1, NULL, false)); - int64_t start_time = butil::gettimeofday_us(); + int64_t start_time = butil::cpuwide_time_us(); nref = -1; while (brpc::Socket::Status(id, &nref) != 0) { bthread_usleep(1000); - ASSERT_LT(butil::gettimeofday_us(), + ASSERT_LT(butil::cpuwide_time_us(), start_time + kCheckInteval * 1000000L + 100000L/*100ms*/); } //ASSERT_EQ(2, nref); @@ -772,10 +771,10 @@ TEST_F(SocketTest, health_check) { // SetFailed again, should reconnect and succeed soon. ASSERT_EQ(0, s->SetFailed()); ASSERT_EQ(fd, s->fd()); - start_time = butil::gettimeofday_us(); + start_time = butil::cpuwide_time_us(); while (brpc::Socket::Status(id) != 0) { bthread_usleep(1000); - ASSERT_LT(butil::gettimeofday_us(), start_time + 1200000L); + ASSERT_LT(butil::cpuwide_time_us(), start_time + 1200000L); } ASSERT_TRUE(global_sock); @@ -797,10 +796,10 @@ TEST_F(SocketTest, health_check) { ASSERT_EQ(0, brpc::Socket::SetFailed(id)); // StartHealthCheck is possibly still addressing the Socket. - start_time = butil::gettimeofday_us(); + start_time = butil::cpuwide_time_us(); while (global_sock != NULL) { bthread_usleep(1000); - ASSERT_LT(butil::gettimeofday_us(), start_time + 1000000L); + ASSERT_LT(butil::cpuwide_time_us(), start_time + 1000000L); } nref = 0; ASSERT_EQ(-1, brpc::Socket::Status(id, &nref)) << "nref=" << nref; @@ -879,7 +878,7 @@ TEST_F(SocketTest, multi_threaded_write) { } butil::IOPortal dest; - const int64_t start_time = butil::gettimeofday_us(); + const int64_t start_time = butil::cpuwide_time_us(); for (;;) { ssize_t nr = dest.append_from_file_descriptor(fds[0], 32768); if (nr < 0) { @@ -890,7 +889,7 @@ TEST_F(SocketTest, multi_threaded_write) { ASSERT_EQ(EAGAIN, errno) << berror(); } bthread_usleep(1000); - if (butil::gettimeofday_us() >= start_time + 2000000L) { + if (butil::cpuwide_time_us() >= start_time + 2000000L) { LOG(FATAL) << "Wait too long!"; break; } diff --git a/test/brpc_timeout_concurrency_limiter_unittest.cpp b/test/brpc_timeout_concurrency_limiter_unittest.cpp index 11b9e23bf0..a8e11ecd1b 100644 --- a/test/brpc_timeout_concurrency_limiter_unittest.cpp +++ b/test/brpc_timeout_concurrency_limiter_unittest.cpp @@ -35,56 +35,56 @@ TEST(TimeoutConcurrencyLimiterTest, AddSample) { brpc::policy::FLAGS_timeout_cl_max_sample_count = 10; brpc::policy::TimeoutConcurrencyLimiter limiter; - limiter.AddSample(0, 50, butil::gettimeofday_us()); - limiter.AddSample(0, 50, butil::gettimeofday_us()); - limiter.AddSample(0, 50, butil::gettimeofday_us()); - limiter.AddSample(0, 50, butil::gettimeofday_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); bthread_usleep(10 * 1000); - limiter.AddSample(0, 50, butil::gettimeofday_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); ASSERT_EQ(limiter._sw.succ_count, 0); ASSERT_EQ(limiter._sw.failed_count, 0); - limiter.AddSample(0, 50, butil::gettimeofday_us()); - limiter.AddSample(0, 50, butil::gettimeofday_us()); - limiter.AddSample(0, 50, butil::gettimeofday_us()); - limiter.AddSample(0, 50, butil::gettimeofday_us()); - limiter.AddSample(0, 50, butil::gettimeofday_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); bthread_usleep(10 * 1000); - limiter.AddSample(0, 50, butil::gettimeofday_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); ASSERT_EQ(limiter._sw.succ_count, 0); ASSERT_EQ(limiter._sw.failed_count, 0); ASSERT_EQ(limiter._avg_latency_us, 50); - limiter.AddSample(0, 50, butil::gettimeofday_us()); - limiter.AddSample(0, 50, butil::gettimeofday_us()); - limiter.AddSample(0, 50, butil::gettimeofday_us()); - limiter.AddSample(0, 50, butil::gettimeofday_us()); - limiter.AddSample(0, 50, butil::gettimeofday_us()); - limiter.AddSample(0, 50, butil::gettimeofday_us()); - limiter.AddSample(0, 50, butil::gettimeofday_us()); - limiter.AddSample(0, 50, butil::gettimeofday_us()); - limiter.AddSample(0, 50, butil::gettimeofday_us()); - limiter.AddSample(0, 50, butil::gettimeofday_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); ASSERT_EQ(limiter._sw.succ_count, 0); ASSERT_EQ(limiter._sw.failed_count, 0); ASSERT_EQ(limiter._avg_latency_us, 50); - limiter.AddSample(0, 50, butil::gettimeofday_us()); - limiter.AddSample(0, 50, butil::gettimeofday_us()); - limiter.AddSample(0, 50, butil::gettimeofday_us()); - limiter.AddSample(0, 50, butil::gettimeofday_us()); - limiter.AddSample(0, 50, butil::gettimeofday_us()); - limiter.AddSample(0, 50, butil::gettimeofday_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); ASSERT_EQ(limiter._sw.succ_count, 6); ASSERT_EQ(limiter._sw.failed_count, 0); - limiter.ResetSampleWindow(butil::gettimeofday_us()); - limiter.AddSample(0, 50, butil::gettimeofday_us()); - limiter.AddSample(0, 50, butil::gettimeofday_us()); - limiter.AddSample(0, 50, butil::gettimeofday_us()); - limiter.AddSample(1, 50, butil::gettimeofday_us()); - limiter.AddSample(1, 50, butil::gettimeofday_us()); - limiter.AddSample(1, 50, butil::gettimeofday_us()); + limiter.ResetSampleWindow(butil::cpuwide_time_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); + limiter.AddSample(0, 50, butil::cpuwide_time_us()); + limiter.AddSample(1, 50, butil::cpuwide_time_us()); + limiter.AddSample(1, 50, butil::cpuwide_time_us()); + limiter.AddSample(1, 50, butil::cpuwide_time_us()); ASSERT_EQ(limiter._sw.succ_count, 3); ASSERT_EQ(limiter._sw.failed_count, 3); } diff --git a/test/bvar_recorder_unittest.cpp b/test/bvar_recorder_unittest.cpp index c0d3206244..7493c34541 100644 --- a/test/bvar_recorder_unittest.cpp +++ b/test/bvar_recorder_unittest.cpp @@ -96,10 +96,10 @@ TEST(RecorderTest, window) { bvar::Window w3(&c1, 3); const int N = 10000; - int64_t last_time = butil::gettimeofday_us(); + int64_t last_time = butil::cpuwide_time_us(); for (int i = 1; i <= N; ++i) { c1 << i; - int64_t now = butil::gettimeofday_us(); + int64_t now = butil::cpuwide_time_us(); if (now - last_time >= 1000000L) { last_time = now; LOG(INFO) << "c1=" << c1 << " w1=" << w1 << " w2=" << w2 << " w3=" << w3; @@ -244,15 +244,15 @@ TEST(RecorderTest, latency_recorder_qps_accuracy) { double err = fabs(qps_sum / 1000.0 - exp_qps); return err; }; - ASSERT_GT(0.1, read(lr1, 10/2.0)); - ASSERT_GT(0.1, read(lr2, 11/2.0)); - ASSERT_GT(0.1, read(lr3, 3/2.0)); - ASSERT_GT(0.1, read(lr4, 1/2.0)); + ASSERT_GT(0.2, read(lr1, 10/2.0)); + ASSERT_GT(0.2, read(lr2, 11/2.0)); + ASSERT_GT(0.2, read(lr3, 3/2.0)); + ASSERT_GT(0.2, read(lr4, 1/2.0)); - ASSERT_GT(0.1, read(lr1, 10/3.0, 3)); + ASSERT_GT(0.2, read(lr1, 10/3.0, 3)); ASSERT_GT(0.2, read(lr2, 11/3.0, 3)); - ASSERT_GT(0.1, read(lr3, 3/3.0, 3)); - ASSERT_GT(0.1, read(lr4, 1/3.0, 3)); + ASSERT_GT(0.2, read(lr3, 3/3.0, 3)); + ASSERT_GT(0.2, read(lr4, 1/3.0, 3)); } } // namespace diff --git a/test/bvar_reducer_unittest.cpp b/test/bvar_reducer_unittest.cpp index 48e13b3c0b..5bd3477ce5 100644 --- a/test/bvar_reducer_unittest.cpp +++ b/test/bvar_reducer_unittest.cpp @@ -218,14 +218,14 @@ void ReducerTest_window() { const int N = 6000; int count = 0; int total_count = 0; - int64_t last_time = butil::gettimeofday_us(); + int64_t last_time = butil::cpuwide_time_us(); for (int i = 1; i <= N; ++i) { c1 << 1; c2 << N - i; c3 << i; ++count; ++total_count; - int64_t now = butil::gettimeofday_us(); + int64_t now = butil::cpuwide_time_us(); if (now - last_time >= 1000000L) { last_time = now; ASSERT_EQ(total_count, c1.get_value()); diff --git a/tools/rpc_press/info_thread.cpp b/tools/rpc_press/info_thread.cpp index 99c9db1cae..fc3d8f87bb 100644 --- a/tools/rpc_press/info_thread.cpp +++ b/tools/rpc_press/info_thread.cpp @@ -36,18 +36,18 @@ void InfoThread::run() { int64_t last_sent_count = 0; int64_t last_succ_count = 0; int64_t last_error_count = 0; - int64_t start_time = butil::gettimeofday_us(); + int64_t start_time = butil::cpuwide_time_us(); while (!_stop) { int64_t end_time = 0; while (!_stop && - (end_time = butil::gettimeofday_us()) < start_time + 1000000L) { + (end_time = butil::cpuwide_time_us()) < start_time + 1000000L) { BAIDU_SCOPED_LOCK(_mutex); if (!_stop) { timespec ts = butil::microseconds_to_timespec(end_time); pthread_cond_timedwait(&_cond, &_mutex, &ts); } } - start_time = butil::gettimeofday_us(); + start_time = butil::cpuwide_time_us(); char buf[64]; const time_t tm_s = start_time / 1000000L; struct tm lt; diff --git a/tools/rpc_press/rpc_press_impl.cpp b/tools/rpc_press/rpc_press_impl.cpp index 07c8dbcfac..8da10c1495 100644 --- a/tools/rpc_press/rpc_press_impl.cpp +++ b/tools/rpc_press/rpc_press_impl.cpp @@ -195,7 +195,7 @@ void RpcPress::handle_response(brpc::Controller* cntl, Message* response, int64_t start_time){ if (!cntl->Failed()){ - int64_t rpc_call_time_us = butil::gettimeofday_us() - start_time; + int64_t rpc_call_time_us = butil::cpuwide_time_us() - start_time; _latency_recorder << rpc_call_time_us; if (_output_json) { @@ -235,7 +235,7 @@ void RpcPress::sync_client() { msg_index = (msg_index + _options.test_thread_num) % _msgs.size(); Message* request = _msgs[msg_index]; Message* response = _pbrpc_client->get_output_message(); - const int64_t start_time = butil::gettimeofday_us(); + const int64_t start_time = butil::cpuwide_time_us(); google::protobuf::Closure* done = brpc::NewCallback< RpcPress, RpcPress*, diff --git a/tools/rpc_replay/info_thread.cpp b/tools/rpc_replay/info_thread.cpp index d20d70e874..f31e597141 100644 --- a/tools/rpc_replay/info_thread.cpp +++ b/tools/rpc_replay/info_thread.cpp @@ -36,18 +36,18 @@ void InfoThread::run() { int64_t last_sent_count = 0; int64_t last_succ_count = 0; int64_t last_error_count = 0; - int64_t start_time = butil::gettimeofday_us(); + int64_t start_time = butil::cpuwide_time_us(); while (!_stop) { int64_t end_time = 0; while (!_stop && - (end_time = butil::gettimeofday_us()) < start_time + 1000000L) { + (end_time = butil::cpuwide_time_us()) < start_time + 1000000L) { BAIDU_SCOPED_LOCK(_mutex); if (!_stop) { timespec ts = butil::microseconds_to_timespec(end_time); pthread_cond_timedwait(&_cond, &_mutex, &ts); } } - start_time = butil::gettimeofday_us(); + start_time = butil::cpuwide_time_us(); char buf[64]; const time_t tm_s = start_time / 1000000L; struct tm lt; diff --git a/tools/rpc_replay/rpc_replay.cpp b/tools/rpc_replay/rpc_replay.cpp index c3cd7c4c3a..395da6b67e 100644 --- a/tools/rpc_replay/rpc_replay.cpp +++ b/tools/rpc_replay/rpc_replay.cpp @@ -119,7 +119,7 @@ static void handle_response(brpc::Controller* cntl, int64_t start_time, // TODO(gejun): some bthreads are starved when new bthreads are created // continuously, which happens when server is down and RPC keeps failing. // Sleep a while on error to avoid that now. - const int64_t end_time = butil::gettimeofday_us(); + const int64_t end_time = butil::cpuwide_time_us(); const int64_t elp = end_time - start_time; if (!cntl->Failed()) { g_latency_recorder << elp; @@ -190,7 +190,7 @@ static void* replay_thread(void* arg) { req.serialized_data() = sample->request.movable(); } g_sent_count << 1; - const int64_t start_time = butil::gettimeofday_us(); + const int64_t start_time = butil::cpuwide_time_us(); if (FLAGS_qps <= 0) { chan->CallMethod(NULL/*use rpc_dump_context in cntl instead*/, cntl, req_ptr, NULL/*ignore response*/, NULL); From a012dc38574b1a257951b5b548d06305d2b15eeb Mon Sep 17 00:00:00 2001 From: Bright Chen Date: Tue, 14 Apr 2026 11:01:54 +0800 Subject: [PATCH 74/84] Fix RDMA resource (#3269) --- src/brpc/rdma/rdma_endpoint.cpp | 1 + test/bvar_variable_unittest.cpp | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/src/brpc/rdma/rdma_endpoint.cpp b/src/brpc/rdma/rdma_endpoint.cpp index a939332f4c..ad2f684e35 100644 --- a/src/brpc/rdma/rdma_endpoint.cpp +++ b/src/brpc/rdma/rdma_endpoint.cpp @@ -1200,6 +1200,7 @@ int RdmaEndpoint::AllocateResources() { options.on_edge_triggered_events = PollCq; if (Socket::Create(options, &_cq_sid) < 0) { PLOG(WARNING) << "Fail to create socket for cq"; + return -1; } } else { SocketOptions options; diff --git a/test/bvar_variable_unittest.cpp b/test/bvar_variable_unittest.cpp index 7450da6910..55a0e4b6bd 100644 --- a/test/bvar_variable_unittest.cpp +++ b/test/bvar_variable_unittest.cpp @@ -324,6 +324,18 @@ TEST_F(VariableTest, latency_recorder) { ASSERT_EQ(-1, rec.expose("latency")); ASSERT_EQ(-1, rec.expose("Latency")); + std::string saved_bvar_latency_p1; + std::string saved_bvar_latency_p2; + std::string saved_bvar_latency_p3; + + GFLAGS_NAMESPACE::GetCommandLineOption("bvar_latency_p1", &saved_bvar_latency_p1); + GFLAGS_NAMESPACE::GetCommandLineOption("bvar_latency_p2", &saved_bvar_latency_p2); + GFLAGS_NAMESPACE::GetCommandLineOption("bvar_latency_p3", &saved_bvar_latency_p3); + + GFLAGS_NAMESPACE::SetCommandLineOption("bvar_latency_p1", "80"); + GFLAGS_NAMESPACE::SetCommandLineOption("bvar_latency_p2", "90"); + GFLAGS_NAMESPACE::SetCommandLineOption("bvar_latency_p3", "99"); + ASSERT_EQ(0, rec.expose("FooBar__latency")); std::vector names; @@ -373,6 +385,10 @@ TEST_F(VariableTest, latency_recorder) { ASSERT_EQ("ba_na_na_latency_percentiles", names[8]); ASSERT_EQ("ba_na_na_max_latency", names[9]); ASSERT_EQ("ba_na_na_qps", names[10]); + + GFLAGS_NAMESPACE::SetCommandLineOption("bvar_latency_p1", saved_bvar_latency_p1.c_str()); + GFLAGS_NAMESPACE::SetCommandLineOption("bvar_latency_p2", saved_bvar_latency_p2.c_str()); + GFLAGS_NAMESPACE::SetCommandLineOption("bvar_latency_p3", saved_bvar_latency_p3.c_str()); } TEST_F(VariableTest, recursive_mutex) { From 79af90e17df194c065d4d7d7e9b9f1e66133a807 Mon Sep 17 00:00:00 2001 From: Sai Asish Y Date: Wed, 15 Apr 2026 20:54:03 -0700 Subject: [PATCH 75/84] butil: fix 'sucess' -> 'success' typos in process_util.h comments (#3272) Doc comments in src/butil/process_util.h read 'on sucess' (lines 31, 36). Fixed to 'on success'. Comment-only change. Signed-off-by: SAY-5 Co-authored-by: SAY-5 --- src/butil/process_util.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/butil/process_util.h b/src/butil/process_util.h index 5493e263d6..c6e8ab55c8 100644 --- a/src/butil/process_util.h +++ b/src/butil/process_util.h @@ -28,12 +28,12 @@ namespace butil { // Read command line of this program. If `with_args' is true, args are // included and separated with spaces. -// Returns length of the command line on sucess, -1 otherwise. +// Returns length of the command line on success, -1 otherwise. // NOTE: `buf' does not end with zero. ssize_t ReadCommandLine(char* buf, size_t len, bool with_args); // Get absolute path of this program. -// Returns length of the absolute path on sucess, -1 otherwise. +// Returns length of the absolute path on success, -1 otherwise. // NOTE: `buf' does not end with zero. ssize_t GetProcessAbsolutePath(char* buf, size_t len); From f2da0d74332f0179b451ebea5062b0025d02cb84 Mon Sep 17 00:00:00 2001 From: Youmu Date: Mon, 20 Apr 2026 23:15:49 +0800 Subject: [PATCH 76/84] fix(bthread/context): Add .previous after GNU-stack note (#3266) Add missing .previous directive after each .note.GNU-stack section in ARM inline assembly blocks. This ensures proper section switching and prevents potential assembler errors when building with asan. See #1186 --- src/bthread/context.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/bthread/context.cpp b/src/bthread/context.cpp index b7be731eae..7f913adfc1 100644 --- a/src/bthread/context.cpp +++ b/src/bthread/context.cpp @@ -576,6 +576,7 @@ __asm ( ".size bthread_jump_fcontext,.-bthread_jump_fcontext\n" "@ Mark that we don't need executable stack.\n" ".section .note.GNU-stack,\"\",%progbits\n" +".previous\n" ); #endif @@ -607,6 +608,7 @@ __asm ( ".size bthread_make_fcontext,.-bthread_make_fcontext\n" "@ Mark that we don't need executable stack.\n" ".section .note.GNU-stack,\"\",%progbits\n" +".previous\n" ); #endif @@ -678,6 +680,7 @@ __asm ( ".size bthread_jump_fcontext,.-bthread_jump_fcontext\n" "# Mark that we don't need executable stack.\n" ".section .note.GNU-stack,\"\",%progbits\n" +".previous\n" ); #endif @@ -710,6 +713,7 @@ __asm ( ".size bthread_make_fcontext,.-bthread_make_fcontext\n" "# Mark that we don't need executable stack.\n" ".section .note.GNU-stack,\"\",%progbits\n" +".previous\n" ); #endif From 93c55f91070ffc67b19e2cec7b8f19087cc283bc Mon Sep 17 00:00:00 2001 From: randomkang <75484924+randomkang@users.noreply.github.com> Date: Mon, 20 Apr 2026 23:18:04 +0800 Subject: [PATCH 77/84] Support rdma ece (#3255) --- src/brpc/rdma/rdma_endpoint.cpp | 18 ++++++++++++++++++ src/brpc/rdma/rdma_helper.cpp | 4 ++++ 2 files changed, 22 insertions(+) diff --git a/src/brpc/rdma/rdma_endpoint.cpp b/src/brpc/rdma/rdma_endpoint.cpp index ad2f684e35..c69bf8ec07 100644 --- a/src/brpc/rdma/rdma_endpoint.cpp +++ b/src/brpc/rdma/rdma_endpoint.cpp @@ -47,6 +47,8 @@ extern ibv_qp* (*IbvCreateQp)(ibv_pd*, ibv_qp_init_attr*); extern int (*IbvModifyQp)(ibv_qp*, ibv_qp_attr*, ibv_qp_attr_mask); extern int (*IbvQueryQp)(ibv_qp*, ibv_qp_attr*, ibv_qp_attr_mask, ibv_qp_init_attr*); extern int (*IbvDestroyQp)(ibv_qp*); +extern int (*IbvQueryEce)(ibv_qp*, ibv_ece*); +extern int (*IbvSetEce)(ibv_qp*, ibv_ece*); extern bool g_skip_rdma_init; DEFINE_int32(rdma_sq_size, 128, "SQ size for RDMA"); @@ -64,6 +66,7 @@ DEFINE_bool(rdma_use_polling, false, "Use polling mode for RDMA."); DEFINE_int32(rdma_poller_num, 1, "Poller number in RDMA polling mode."); DEFINE_bool(rdma_poller_yield, false, "Yield thread in RDMA polling mode."); DEFINE_bool(rdma_disable_bthread, false, "Disable bthread in RDMA"); +DEFINE_bool(rdma_ece, false, "Open ece in RDMA, should use this feature when rdma nics are from the same merchant."); static const size_t IOBUF_BLOCK_HEADER_LEN = 32; // implementation-dependent @@ -1251,6 +1254,21 @@ int RdmaEndpoint::BringUpQp(uint16_t lid, ibv_gid gid, uint32_t qp_num) { return -1; } + if (FLAGS_rdma_ece) { + struct ibv_ece ece; + int err = IbvQueryEce(_resource->qp, &ece); + if (err != 0) { + LOG(WARNING) << "Fail to IbvQueryEce: " << berror(err); + return -1; + } + // ToDo: should check if remote qp support ece + err = IbvSetEce(_resource->qp, &ece); + if (err != 0) { + LOG(WARNING) << "Fail to IbvSetEce: " << berror(err); + return -1; + } + } + if (PostRecv(_rq_size, true) < 0) { PLOG(WARNING) << "Fail to post recv wr"; return -1; diff --git a/src/brpc/rdma/rdma_helper.cpp b/src/brpc/rdma/rdma_helper.cpp index 9bad33750c..35a47bd35a 100644 --- a/src/brpc/rdma/rdma_helper.cpp +++ b/src/brpc/rdma/rdma_helper.cpp @@ -72,6 +72,8 @@ void (*IbvAckCqEvents)(ibv_cq*, unsigned int) = NULL; int (*IbvGetAsyncEvent)(ibv_context*, ibv_async_event*) = NULL; void (*IbvAckAsyncEvent)(ibv_async_event*) = NULL; const char* (*IbvEventTypeStr)(ibv_event_type) = NULL; +int (*IbvQueryEce)(ibv_qp*, ibv_ece*) = NULL; +int (*IbvSetEce)(ibv_qp*, ibv_ece*) = NULL; // NOTE: // ibv_post_send, ibv_post_recv, ibv_poll_cq, ibv_req_notify_cq are all inline function @@ -386,6 +388,8 @@ static int ReadRdmaDynamicLib() { LoadSymbol(g_handle_ibverbs, IbvGetAsyncEvent, "ibv_get_async_event"); LoadSymbol(g_handle_ibverbs, IbvAckAsyncEvent, "ibv_ack_async_event"); LoadSymbol(g_handle_ibverbs, IbvEventTypeStr, "ibv_event_type_str"); + LoadSymbol(g_handle_ibverbs, IbvQueryEce, "ibv_query_ece"); + LoadSymbol(g_handle_ibverbs, IbvSetEce, "ibv_set_ece"); return 0; } From 0b885d2d05ff55953704c456508bc3060b127b8b Mon Sep 17 00:00:00 2001 From: huangjun Date: Thu, 23 Apr 2026 11:00:13 +0800 Subject: [PATCH 78/84] [bvar] Fix sampler interval after switch to cpuwide_time_ns (#3278) Commit 12fb539a ("Use monotonic time instead of wall time", #3268) switched the three time-source calls in SamplerCollector::run() from gettimeofday_us() to cpuwide_time_ns(), but the surrounding code still treats the timestamps as microseconds: - abstime += 1000000L now represents 1 ms (not 1 s), causing the sampler to spin at ~1 kHz instead of 1 Hz; - usleep(abstime - now) receives a nanosecond delta, which usleep() interprets as microseconds. Use cpuwide_time_us() instead, which preserves the monotonic behavior from #3268 while keeping the existing microsecond-based arithmetic correct. Fixes #3277. Co-authored-by: huangjun --- src/bvar/detail/sampler.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bvar/detail/sampler.cpp b/src/bvar/detail/sampler.cpp index f3462558c1..4632938bb2 100644 --- a/src/bvar/detail/sampler.cpp +++ b/src/bvar/detail/sampler.cpp @@ -155,7 +155,7 @@ void SamplerCollector::run() { butil::LinkNode root; int consecutive_nosleep = 0; while (!_stop) { - int64_t abstime = butil::cpuwide_time_ns(); + int64_t abstime = butil::cpuwide_time_us(); Sampler* s = this->reset(); if (s) { s->InsertBeforeAsList(&root); @@ -176,13 +176,13 @@ void SamplerCollector::run() { p = saved_next; } bool slept = false; - int64_t now = butil::cpuwide_time_ns(); + int64_t now = butil::cpuwide_time_us(); _cumulated_time_us += now - abstime; abstime += 1000000L; while (abstime > now) { ::usleep(abstime - now); slept = true; - now = butil::cpuwide_time_ns(); + now = butil::cpuwide_time_us(); } if (slept) { consecutive_nosleep = 0; From 3a1fb1de3218fb7fc448165133b0700a267033e5 Mon Sep 17 00:00:00 2001 From: Bright Chen Date: Fri, 24 Apr 2026 13:34:58 +0800 Subject: [PATCH 79/84] Remove unused comp channel (#3279) --- src/brpc/rdma/rdma_helper.cpp | 1 - src/brpc/socket.cpp | 6 +++++- test/bvar_recorder_unittest.cpp | 16 ++++++++-------- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/brpc/rdma/rdma_helper.cpp b/src/brpc/rdma/rdma_helper.cpp index 35a47bd35a..768bf615e2 100644 --- a/src/brpc/rdma/rdma_helper.cpp +++ b/src/brpc/rdma/rdma_helper.cpp @@ -505,7 +505,6 @@ static void GlobalRdmaInitializeOrDieImpl() { } else { LOG(INFO) << "RDMA GID Index: " << (int)g_gid_index; } - IbvCreateCompChannel(g_context); // Create protection domain g_pd = IbvAllocPd(g_context); diff --git a/src/brpc/socket.cpp b/src/brpc/socket.cpp index 78bd4e23d2..005873e9b0 100644 --- a/src/brpc/socket.cpp +++ b/src/brpc/socket.cpp @@ -2512,8 +2512,12 @@ void Socket::DebugSocket(std::ostream& os, SocketId id) { << "\n}"; } #endif + + os << "\nrdma={\n"; ptr->_transport->Debug(os); - { os << "\nbthread_tag=" << ptr->_io_event.bthread_tag(); } + os << "}\n"; + + os << "\nbthread_tag=" << ptr->_io_event.bthread_tag(); } int Socket::CheckHealth() { diff --git a/test/bvar_recorder_unittest.cpp b/test/bvar_recorder_unittest.cpp index 7493c34541..a385b9b7bd 100644 --- a/test/bvar_recorder_unittest.cpp +++ b/test/bvar_recorder_unittest.cpp @@ -244,15 +244,15 @@ TEST(RecorderTest, latency_recorder_qps_accuracy) { double err = fabs(qps_sum / 1000.0 - exp_qps); return err; }; - ASSERT_GT(0.2, read(lr1, 10/2.0)); - ASSERT_GT(0.2, read(lr2, 11/2.0)); - ASSERT_GT(0.2, read(lr3, 3/2.0)); - ASSERT_GT(0.2, read(lr4, 1/2.0)); + ASSERT_GT(0.1, read(lr1, 10/2.0)); + ASSERT_GT(0.1, read(lr2, 11/2.0)); + ASSERT_GT(0.1, read(lr3, 3/2.0)); + ASSERT_GT(0.1, read(lr4, 1/2.0)); - ASSERT_GT(0.2, read(lr1, 10/3.0, 3)); - ASSERT_GT(0.2, read(lr2, 11/3.0, 3)); - ASSERT_GT(0.2, read(lr3, 3/3.0, 3)); - ASSERT_GT(0.2, read(lr4, 1/3.0, 3)); + ASSERT_GT(0.1, read(lr1, 10/3.0, 3)); + ASSERT_GT(0.1, read(lr2, 11/3.0, 3)); + ASSERT_GT(0.1, read(lr3, 3/3.0, 3)); + ASSERT_GT(0.1, read(lr4, 1/3.0, 3)); } } // namespace From 76058650e5b62e5f756656686322a6794bce4f6e Mon Sep 17 00:00:00 2001 From: Zhou <43895234@qq.com> Date: Sun, 26 Apr 2026 17:29:44 +0800 Subject: [PATCH 80/84] Fix avoid SIGSEGV in read_proc_status during static initialization (#3282) read_proc_status can be sampled while default bvars are initialized before main(). If reading /proc/self/stat fails at that time, logging through glog may access uninitialized glog state and crash. Print the warning to stderr instead, matching the read_proc_io fallback. Signed-off-by: zhoulei Co-authored-by: zhoulei --- src/bvar/default_variables.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/bvar/default_variables.cpp b/src/bvar/default_variables.cpp index 1d60063526..40d30c56e7 100644 --- a/src/bvar/default_variables.cpp +++ b/src/bvar/default_variables.cpp @@ -81,7 +81,12 @@ static bool read_proc_status(ProcStat &stat) { // see http://man7.org/linux/man-pages/man5/proc.5.html butil::ScopedFILE fp("/proc/self/stat", "r"); if (NULL == fp) { - PLOG_ONCE(WARNING) << "Fail to open /proc/self/stat"; + static bool ever_printed_stat_err = false; + if (!ever_printed_stat_err) { + fprintf(stderr, "WARNING: Fail to open /proc/self/stat, errno=%d. " + "Process status related bvars will be unavailable.\n", errno); + ever_printed_stat_err = true; + } return false; } if (fscanf(fp, "%d %*s %c " @@ -94,7 +99,8 @@ static bool read_proc_status(ProcStat &stat) { &stat.flags, &stat.minflt, &stat.cminflt, &stat.majflt, &stat.cmajflt, &stat.utime, &stat.stime, &stat.cutime, &stat.cstime, &stat.priority, &stat.nice, &stat.num_threads) != 19) { - PLOG(WARNING) << "Fail to fscanf"; + fprintf(stderr, "WARNING: Fail to fscanf /proc/self/stat, errno=%d. " + "Process status related bvars will be unavailable.\n", errno); return false; } return true; From 66ef83748f53a52d64505d17d37e93125b062bdd Mon Sep 17 00:00:00 2001 From: huangjun Date: Sun, 26 Apr 2026 17:44:07 +0800 Subject: [PATCH 81/84] Roll back LocalityAwareLoadBalancer to gettimeofday_us to match callers (#3283) PR #3268 ("Use monotonic time instead of wall time") switched LocalityAwareLoadBalancer::Weight::Update's end_time_us and LocalityAwareLoadBalancer::Describe's now to butil::cpuwide_time_us(), but every caller that supplies CallInfo::begin_time_us still uses butil::gettimeofday_us(): - Channel::CallMethod (channel.cpp:451) -> Controller::IssueRPC -> Controller::Call::begin_time_us -> SelectIn::begin_time_us -> CallInfo::begin_time_us - Controller::OnVersionedRPCReturned retry sites (controller.cpp:672, 715) call IssueRPC(gettimeofday_us()) on backup-request and regular retries The mismatched time domains make latency = end_time_us - ci.begin_time_us = cpuwide_now - wallclock_begin ~= -1.7e15 us trigger the `if (latency <= 0) { /* time skews, ignore */ return 0; }` short-circuit on every call. _time_q never accumulates samples, _avg_latency stays at 0, and locality-aware weight feedback is silently disabled. Visible downstream symptom: cold-start `list://` channels with `lb=la` and 2 backends occasionally fail RPCs with EHOSTDOWN ("Fail to select server from list://...") on retry even when one backend is healthy. Bisected reproduction in xsky/brpc fork: - 51 commit range c41e838..604dad0c (1.16.1 .. 1.17.0-rc2) - master code + LA-driven multipath probe at 2 backends, max_retry=1, repeat 500x: * commit 771de31e (one before #3268): 0/500 fail * commit 12fb539a (#3268): 25/500 fail * commit 12fb539a + revert only Weight::Update::end_time_us to gettimeofday_us: 0/500 fail This commit reverts the LA-side of #3268's clock change so the LB lines up with its existing callers again. Channel::CallMethod and the retry paths in Controller stay on butil::gettimeofday_us(), which preserves the wall-clock semantics of Controller::_begin_time_us / Controller::latency_us() that public users rely on. Adds test/brpc_load_balancer_unittest.cpp::la_records_latency_with_consistent_time_source which drives a series of SelectServer + Feedback cycles against LocalityAwareLoadBalancer (no Server / Channel needed) and asserts that _avg_latency reflects the elapsed time, rather than being stuck at 0 because of a time-source mismatch. Co-authored-by: huangjun --- .../policy/locality_aware_load_balancer.cpp | 6 +- test/brpc_load_balancer_unittest.cpp | 77 +++++++++++++++++++ 2 files changed, 80 insertions(+), 3 deletions(-) diff --git a/src/brpc/policy/locality_aware_load_balancer.cpp b/src/brpc/policy/locality_aware_load_balancer.cpp index 455f6fc397..beea51690e 100644 --- a/src/brpc/policy/locality_aware_load_balancer.cpp +++ b/src/brpc/policy/locality_aware_load_balancer.cpp @@ -18,7 +18,7 @@ #include // numeric_limits #include -#include "butil/time.h" // cpuwide_time_us +#include "butil/time.h" // gettimeofday_us #include "butil/fast_rand.h" #include "brpc/log.h" #include "brpc/socket.h" @@ -376,7 +376,7 @@ void LocalityAwareLoadBalancer::Feedback(const CallInfo& info) { int64_t LocalityAwareLoadBalancer::Weight::Update( const CallInfo& ci, size_t index) { - const int64_t end_time_us = butil::cpuwide_time_us(); + const int64_t end_time_us = butil::gettimeofday_us(); const int64_t latency = end_time_us - ci.begin_time_us; BAIDU_SCOPED_LOCK(_mutex); if (Disabled()) { @@ -524,7 +524,7 @@ void LocalityAwareLoadBalancer::Describe( if (_db_servers.Read(&s) != 0) { os << "fail to read _db_servers"; } else { - const int64_t now = butil::cpuwide_time_us(); + const int64_t now = butil::gettimeofday_us(); const size_t n = s->weight_tree.size(); os << '['; for (size_t i = 0; i < n; ++i) { diff --git a/test/brpc_load_balancer_unittest.cpp b/test/brpc_load_balancer_unittest.cpp index 2a2be242aa..76ad005eac 100644 --- a/test/brpc_load_balancer_unittest.cpp +++ b/test/brpc_load_balancer_unittest.cpp @@ -1303,6 +1303,83 @@ TEST_F(LoadBalancerTest, revived_from_all_failed_intergrated) { } #endif // BUTIL_USE_ASAN +// Regression for #3268's incomplete migration of LocalityAwareLoadBalancer. +// +// #3268 switched `LocalityAwareLoadBalancer::Weight::Update::end_time_us` and +// `LocalityAwareLoadBalancer::Describe::now` to `butil::cpuwide_time_us()` +// while every caller that supplies `CallInfo::begin_time_us` (the RPC entry +// in `Channel::CallMethod` and the retry sites in +// `Controller::OnVersionedRPCReturned`) still uses `butil::gettimeofday_us()`. +// The resulting time-source mismatch makes +// +// latency = end_time_us - ci.begin_time_us +// = cpuwide_now - wallclock_begin +// ~= -1.7e15 us (huge negative) +// +// trigger the +// +// if (latency <= 0) { /* time skews, ignore the sample */ return 0; } +// +// short-circuit on every call. `_time_q` never accumulates samples, +// `_avg_latency` stays at 0, and locality-aware weight feedback is silently +// disabled. Visible downstream symptom: cold-start `list://` channels with +// `lb=la` and 2 backends occasionally fail RPCs with `EHOSTDOWN` +// ("Fail to select server") on retry even when one backend is healthy. +// +// This commit reverts the LA side of #3268, so `Weight::Update` and +// `Describe` once again use `butil::gettimeofday_us()` to match every +// existing caller of `CallInfo::begin_time_us`. +// +// The test below runs entirely against `LocalityAwareLoadBalancer` (no +// Server / Channel is involved), so it is hermetic. It supplies a +// gettimeofday-based `begin_time_us` (matching what `Channel::CallMethod` +// passes today) and asserts that the LB records a positive `_avg_latency`, +// rather than tripping the time-skew short-circuit. +TEST_F(LoadBalancerTest, la_records_latency_with_consistent_time_source) { + LALB lalb; + char addr[] = "192.168.1.1:8080"; + butil::EndPoint dummy; + ASSERT_EQ(0, str2endpoint(addr, &dummy)); + brpc::ServerId id(8888); + brpc::SocketOptions options; + options.remote_side = dummy; + ASSERT_EQ(0, brpc::Socket::Create(options, &id.id)); + ASSERT_TRUE(lalb.AddServer(id)); + + auto avg_latency = [&]() -> int64_t { + std::ostringstream os; + brpc::DescribeOptions opts; + opts.verbose = true; + lalb.Describe(os, opts); + const std::string s = os.str(); + const size_t p = s.find("avg_latency="); + if (p == std::string::npos) return -1; + return strtoll(s.c_str() + p + strlen("avg_latency="), NULL, 10); + }; + + // Drive a few "RPCs": pick a server, sleep ~2ms, feed back. begin_time_us + // comes from gettimeofday_us(), matching what Channel::CallMethod and the + // retry sites in Controller::OnVersionedRPCReturned pass on every RPC. + for (int i = 0; i < 8; ++i) { + const int64_t begin_us = butil::gettimeofday_us(); + brpc::SocketUniquePtr ptr; + brpc::LoadBalancer::SelectIn in = { begin_us, true, false, 0u, NULL }; + brpc::LoadBalancer::SelectOut out(&ptr); + ASSERT_EQ(0, lalb.SelectServer(in, &out)); + bthread_usleep(2000); + brpc::LoadBalancer::CallInfo ci = { begin_us, id.id, 0, NULL }; + lalb.Feedback(ci); + } + + // _avg_latency must reflect actual elapsed time. If this is 0, either + // Weight::Update::end_time_us was changed away from gettimeofday_us + // again (re-introducing the time-source mismatch) or some caller of + // CallInfo::begin_time_us drifted to a different clock domain. + EXPECT_GT(avg_latency(), 0); + + ASSERT_EQ(0, brpc::Socket::SetFailed(id.id)); +} + TEST_F(LoadBalancerTest, la_selection_too_long) { brpc::GlobalInitializeOrDie(); brpc::LoadBalancerWithNaming lb; From cb80d554847962ff8df98d11d242104654da6ef8 Mon Sep 17 00:00:00 2001 From: xin_github <74232414+xin-ok@users.noreply.github.com> Date: Wed, 6 May 2026 15:16:06 +0800 Subject: [PATCH 82/84] Use compare_exchange_weak in steal loop for potential performance improvement (#3285) --- src/bthread/work_stealing_queue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bthread/work_stealing_queue.h b/src/bthread/work_stealing_queue.h index 06e9ed074c..138aaa6ba8 100644 --- a/src/bthread/work_stealing_queue.h +++ b/src/bthread/work_stealing_queue.h @@ -128,7 +128,7 @@ class WorkStealingQueue { return false; } *val = _buffer[t & (_capacity - 1)]; - } while (!_top.compare_exchange_strong(t, t + 1, + } while (!_top.compare_exchange_weak(t, t + 1, butil::memory_order_seq_cst, butil::memory_order_relaxed)); return true; From db5707c41a99672ba0c5e4656f160239cce54303 Mon Sep 17 00:00:00 2001 From: zchuango Date: Sat, 9 May 2026 07:02:50 +0000 Subject: [PATCH 83/84] add the ubring docs for ubring transport --- README.md | 1 + README_cn.md | 1 + docs/cn/ubring.md | 184 ++++++++++++++++++++++++++++++++++++++++++++++ docs/en/ubring.md | 182 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 368 insertions(+) create mode 100644 docs/cn/ubring.md create mode 100644 docs/en/ubring.md diff --git a/README.md b/README.md index 1c4f78528b..d65366fafb 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,7 @@ You can use it to: * [FlatMap](docs/en/flatmap.md) * [Coroutine](docs/en/coroutine.md) * [Circuit Breaker](docs/en/circuit_breaker.md) + * [UBRing](docs/en/ubring.md) * [RDMA](docs/en/rdma.md) * [Bazel Support](docs/en/bazel_support.md) * [Wireshark baidu_std dissector plugin](docs/en/wireshark_baidu_std.md) diff --git a/README_cn.md b/README_cn.md index 6413f83fde..2cc686bd85 100644 --- a/README_cn.md +++ b/README_cn.md @@ -87,6 +87,7 @@ * [FlatMap](docs/cn/flatmap.md) * [协程](docs/cn/coroutine.md) * [熔断](docs/cn/circuit_breaker.md) + * [UBRing](docs/cn/ubring.md) * [RDMA](docs/cn/rdma.md) * [Bazel构建支持](docs/cn/bazel_support.md) * [Wireshark baidu_std协议解析插件](docs/cn/wireshark_baidu_std.md) diff --git a/docs/cn/ubring.md b/docs/cn/ubring.md new file mode 100644 index 0000000000..576930f539 --- /dev/null +++ b/docs/cn/ubring.md @@ -0,0 +1,184 @@ +# UBRing: 高性能共享内存 RPC + +UBRing 是 brpc 中的高性能 RPC 实现,它利用共享内存进行进程间通信(IPC)。它支持本地共享内存(POSIX IPC)和远端共享内存(ubs-mem)两种模式,提供微秒到纳秒级的进程间通信延迟。 + +## 技术背景 + +传统的 RPC 框架通常使用网络套接字进行通信,由于内核参与、上下文切换和数据拷贝等原因,会引入显著的开销。UBRing 通过使用共享内存作为通信介质来解决这个问题,允许进程之间直接内存访问,最小化内核干预。 + +UBRing 的主要优势: + +- **超低延迟**:微秒级 RPC 延迟 +- **高吞吐量**:每秒数百万次 RPC 调用 +- **减少数据拷贝**:进程间直接内存访问 +- **跨平台支持**:支持 Linux 和 macOS + +## 支持的共享内存后端 + +UBRing 支持两种共享内存后端,通过 `ub_shm_type` 参数控制: + +### 1. POSIX IPC 共享内存 (ub\_shm\_type = 1) + +这是默认模式,使用标准 POSIX 共享内存进行本地 IPC。同一机器上的进程可以通过共享内存区域直接通信。 + +### 2. UBS-Mem 远端共享内存 (ub\_shm\_type = 2) + +此模式使用 ubs-mem(Unified Block Storage Memory),这是来自 openEuler 的开源远端共享内存框架。它支持机架内节点之间的共享内存通信,类似于 RDMA 但部署要求更简单。 + +**UBS-Mem 开源地址**: + +### 未来扩展 + +该架构设计支持未来扩展 CXL(Compute Express Link)基于的远端共享内存,实现更灵活的分布式内存共享。 + +## 构建配置 + +### 使用 CMake 构建 + +要构建带有 UBRing 支持的 brpc,请使用以下命令: + +```bash +# 构建 brpc 并启用 UBRing 支持 +cd /path/to/brpc +cmake -B build -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DWITH_UBRING:BOOL=ON +cmake --build build -j 8 + +# 构建 ubring_performance 示例 +cd /path/to/brpc/example/ubring_performance +cmake -B build +cmake --build build -j 8 +``` + +### 使用 Bazel 构建 + +使用 Bazel 构建带有 UBRing 支持的 brpc: + +```bash +# 构建 brpc 并启用 UBRing 支持 +cd /path/to/brpc +bazel build //... --define=with_ubring=true + +# 构建 ubring_performance 示例 +bazel build //example/ubring_performance/... +``` + +### 选择共享内存后端 + +共享内存后端通过 `--ub_shm_type` 参数控制: + +```bash +# 使用 POSIX IPC(默认) +./your_program --ub_shm_type=1 + +# 使用 UBS-Mem +./your_program --ub_shm_type=2 +``` + +## 性能测试 + +### 示例: ubring\_performance + +brpc 在 `example/ubring_performance/` 目录提供了性能测试示例。 + +#### 构建示例 + +```bash +cd example/ubring_performance +mkdir -p build && cd build +cmake .. +make +``` + +#### 运行服务端 + +```bash +# 使用 POSIX IPC +./ubring_performance_server --ub_shm_type=1 + +# 使用 UBS-Mem +./ubring_performance_server --ub_shm_type=2 +``` + +#### 运行客户端 + +```bash +# 使用 POSIX IPC +./ubring_performance_client --ub_shm_type=1 --server=127.0.0.1:8000 + +# 使用 UBS-Mem +./ubring_performance_client --ub_shm_type=2 --server=:8000 +``` + +#### 测试选项 + +| 选项 | 描述 | 默认值 | +| --------------- | ------------------------- | -------------- | +| `--ub_shm_type` | 共享内存类型 (1=IPC, 2=UBS-Mem) | 1 | +| `--server` | 服务端地址 | 127.0.0.1:8000 | +| `--thread_num` | 客户端线程数 | 1 | +| `--request_num` | 每线程请求总数 | 1000000 | +| `--timeout_ms` | 请求超时时间(毫秒) | 1000 | + +## 架构概述 + +```mermaid +graph TD + subgraph 客户端进程 + A[Client] + end + + subgraph 服务端进程 + B[Server] + end + + subgraph 共享内存层 + C[SHM Manager] + D[IPC Backend] + E[UBS-Mem Backend] + end + + A -->|直接内存访问| C + B -->|直接内存访问| C + C --> D + C --> E + + style A fill:#636,color:#fff,stroke:#333,stroke-width:2px + style B fill:#369,color:#fff,stroke:#333,stroke-width:2px + style C fill:#396,color:#fff,stroke:#333,stroke-width:2px +``` + +### 架构细节 + +UBRing 架构包含以下组件: + +1. **客户端/服务端进程**: 通过共享内存通信的应用进程 +2. **SHM Manager**: 共享内存操作的中央管理器 (`shm_mgr.cpp`) +3. **IPC Backend**: 用于本地通信的 POSIX 共享内存实现 +4. **UBS-Mem Backend**: 用于跨节点通信的远端共享内存实现 + +## 实现细节 + +### 共享内存管理 + +共享内存管理器 (`shm_mgr.cpp`) 为不同的共享内存后端提供统一接口: + +- **初始化**: `ShmMgrInit()` - 初始化共享内存子系统 +- **本地分配**: `ShmLocalMalloc()` - 分配本地共享内存 +- **远端分配**: `ShmRemoteMalloc()` - 分配远程节点可访问的共享内存 +- **释放**: `ShmFree()` - 释放共享内存资源 + +### 定时器管理 + +UBRing 使用高精度定时器系统 (`timer_mgr.cpp`) 进行连接管理和超时处理,支持 epoll(Linux)和 kqueue(macOS)。 + +## 参考资料 + +- [UBRing 特性提案](https://github.com/apache/brpc/issues/3226) +- [UBRing 技术讨论](https://github.com/apache/brpc/discussions/3217) +- [UBS-Mem 开源项目](https://atomgit.com/openeuler/ubs-mem) + +## 相关文档 + +- [UB Client](ub_client.md) - 访问 UB 服务 +- [RDMA 支持](rdma.md) - 远程直接内存访问 + diff --git a/docs/en/ubring.md b/docs/en/ubring.md new file mode 100644 index 0000000000..93b9be2054 --- /dev/null +++ b/docs/en/ubring.md @@ -0,0 +1,182 @@ +# UBRing: High-Performance Shared Memory RPC + +UBRing is a high-performance RPC implementation in brpc that leverages shared memory for inter-process communication (IPC). It supports both local shared memory (POSIX IPC) and remote shared memory (ubs-mem), providing ultra-low latency communication between processes. + +## Technical Background + +Traditional RPC frameworks typically use network sockets for communication, which introduces significant overhead due to kernel involvement, context switches, and data copying. UBRing addresses this by using shared memory as the communication medium, allowing direct memory access between processes with minimal kernel intervention. + +Key advantages of UBRing: +- **Ultra-low latency**: Microsecond-level RPC latency +- **High throughput**: Millions of RPC calls per second +- **Reduced data copying**: Direct memory access between processes +- **Cross-platform support**: Works on Linux and macOS + +## Supported Shared Memory Backends + +UBRing supports two types of shared memory backends, controlled by the `ub_shm_type` flag: + +### 1. POSIX IPC Shared Memory (ub_shm_type = 1) + +This is the default mode, using standard POSIX shared memory for local IPC. Processes on the same machine can communicate directly through shared memory regions. + +### 2. UBS-Mem Remote Shared Memory (ub_shm_type = 2) + +This mode uses ubs-mem (Unified Block Storage Memory), an open-source remote shared memory framework from openEuler. It enables shared memory communication across nodes in a rack, similar to RDMA but with simpler deployment requirements. + +**UBS-Mem Open Source**: https://atomgit.com/openeuler/ubs-mem + +### Future Expansion + +The architecture is designed to support CXL (Compute Express Link) based remote shared memory in the future, enabling even more flexible distributed memory sharing. + +## Build Configuration + +### Build with CMake + +To build brpc with UBRing support, use the following commands: + +```bash +# Build brpc with UBRing support +cd /path/to/brpc +cmake -B build -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DWITH_UBRING:BOOL=ON +cmake --build build -j 8 + +# Build the ubring_performance example +cd /path/to/brpc/example/ubring_performance +cmake -B build +cmake --build build -j 8 +``` + +### Build with Bazel + +To build brpc with UBRing support using Bazel: + +```bash +# Build brpc with UBRing support +cd /path/to/brpc +bazel build //... --define=with_ubring=true + +# Build the ubring_performance example +bazel build //example/ubring_performance/... +``` + +### Select Shared Memory Backend + +The shared memory backend is controlled by the `--ub_shm_type` flag: + +```bash +# Use POSIX IPC (default) +./your_program --ub_shm_type=1 + +# Use UBS-Mem +./your_program --ub_shm_type=2 +``` + +## Performance Testing + +### Example: ubring_performance + +brpc provides a performance test example at `example/ubring_performance/`. + +#### Build the Example + +```bash +cd example/ubring_performance +mkdir -p build && cd build +cmake .. +make +``` + +#### Run Server + +```bash +# Run with POSIX IPC +./ubring_performance_server --ub_shm_type=1 + +# Run with UBS-Mem +./ubring_performance_server --ub_shm_type=2 +``` + +#### Run Client + +```bash +# Run with POSIX IPC +./ubring_performance_client --ub_shm_type=1 --server=127.0.0.1:8000 + +# Run with UBS-Mem +./ubring_performance_client --ub_shm_type=2 --server=:8000 +``` + +#### Test Options + +| Option | Description | Default | +|--------|-------------|---------| +| `--ub_shm_type` | Shared memory type (1=IPC, 2=UBS-Mem) | 1 | +| `--server` | Server address | 127.0.0.1:8000 | +| `--thread_num` | Number of client threads | 1 | +| `--request_num` | Total requests per thread | 1000000 | +| `--timeout_ms` | Request timeout in milliseconds | 1000 | + +## Architecture Overview + +```mermaid +graph TD + subgraph Client Process + A[Client] + end + + subgraph Server Process + B[Server] + end + + subgraph Shared Memory + C[SHM Manager] + D[IPC Backend] + E[UBS-Mem Backend] + end + + A -->|Direct Memory Access| C + B -->|Direct Memory Access| C + C --> D + C --> E + + style A fill:#636,color:#fff,stroke:#333,stroke-width:2px + style B fill:#369,color:#fff,stroke:#333,stroke-width:2px + style C fill:#396,color:#fff,stroke:#333,stroke-width:2px +``` + +### Architecture Details + +The UBRing architecture consists of: + +1. **Client/Server Processes**: Application processes that communicate via shared memory +2. **SHM Manager**: Central manager for shared memory operations (`shm_mgr.cpp`) +3. **IPC Backend**: POSIX shared memory implementation for local communication +4. **UBS-Mem Backend**: Remote shared memory implementation for cross-node communication + +## Implementation Details + +### Shared Memory Management + +The shared memory manager (`shm_mgr.cpp`) provides a unified interface for different shared memory backends: + +- **Initialization**: `ShmMgrInit()` - Initializes the shared memory subsystem +- **Local Allocation**: `ShmLocalMalloc()` - Allocates shared memory for local use +- **Remote Allocation**: `ShmRemoteMalloc()` - Allocates shared memory accessible by remote nodes +- **Free**: `ShmFree()` - Releases shared memory resources + +### Timer Management + +UBRing uses a high-precision timer system (`timer_mgr.cpp`) for connection management and timeout handling, supporting both epoll (Linux) and kqueue (macOS). + +## References + +- [UBRing Feature Proposal](https://github.com/apache/brpc/issues/3226) +- [UBRing Technical Discussion](https://github.com/apache/brpc/discussions/3217) +- [UBS-Mem Open Source](https://atomgit.com/openeuler/ubs-mem) + +## See Also + +- [UB Client](ub_client.md) - Accessing UB services +- [RDMA Support](rdma.md) - Remote direct memory access \ No newline at end of file From a6a852ed060dd5013ce3acb2d73c29fff47eb75a Mon Sep 17 00:00:00 2001 From: zchuango Date: Sat, 9 May 2026 08:05:41 +0000 Subject: [PATCH 84/84] modify some file name and directory structure --- src/brpc/transport_factory.cpp | 2 +- src/brpc/{ubring => ubshm}/common/common.h | 0 .../{ubring => ubshm}/common/thread_lock.h | 2 +- src/brpc/{ubring => ubshm}/shm/shm_def.h | 0 src/brpc/{ubring => ubshm}/shm/shm_ipc.cpp | 6 +++--- src/brpc/{ubring => ubshm}/shm/shm_ipc.h | 0 src/brpc/{ubring => ubshm}/shm/shm_mgr.cpp | 8 ++++---- src/brpc/{ubring => ubshm}/shm/shm_mgr.h | 4 ++-- src/brpc/{ubring => ubshm}/shm/shm_ubs.cpp | 18 +++++++++--------- src/brpc/{ubring => ubshm}/shm/shm_ubs.h | 0 src/brpc/{ubring => ubshm}/timer/timer_mgr.cpp | 2 +- src/brpc/{ubring => ubshm}/timer/timer_mgr.h | 2 +- src/brpc/{ubring => ubshm}/ub_endpoint.cpp | 12 ++++++------ src/brpc/{ubring => ubshm}/ub_endpoint.h | 6 +++--- src/brpc/{ubring => ubshm}/ub_helper.cpp | 6 +++--- src/brpc/{ubring => ubshm}/ub_helper.h | 0 src/brpc/{ubring => ubshm}/ub_ring.cpp | 6 +++--- src/brpc/{ubring => ubshm}/ub_ring.h | 6 +++--- src/brpc/{ubring => ubshm}/ub_ring_manager.cpp | 4 ++-- src/brpc/{ubring => ubshm}/ub_ring_manager.h | 6 +++--- src/brpc/{ubring => ubshm}/ubr_msg.h | 0 src/brpc/{ubring => ubshm}/ubr_trx.h | 8 ++++---- .../ubs_mem}/declare_shm_ubs.h | 0 .../rack_mem => ubshm/ubs_mem}/ubs_mem.h | 0 .../rack_mem => ubshm/ubs_mem}/ubs_mem_def.h | 0 .../ubs_mem}/ubshmem_stub.cpp | 0 .../{ub_transport.cpp => ubshm_transport.cpp} | 6 +++--- src/brpc/{ub_transport.h => ubshm_transport.h} | 0 28 files changed, 52 insertions(+), 52 deletions(-) rename src/brpc/{ubring => ubshm}/common/common.h (100%) rename src/brpc/{ubring => ubshm}/common/thread_lock.h (99%) rename src/brpc/{ubring => ubshm}/shm/shm_def.h (100%) rename src/brpc/{ubring => ubshm}/shm/shm_ipc.cpp (98%) rename src/brpc/{ubring => ubshm}/shm/shm_ipc.h (100%) rename src/brpc/{ubring => ubshm}/shm/shm_mgr.cpp (97%) rename src/brpc/{ubring => ubshm}/shm/shm_mgr.h (94%) rename src/brpc/{ubring => ubshm}/shm/shm_ubs.cpp (97%) rename src/brpc/{ubring => ubshm}/shm/shm_ubs.h (100%) rename src/brpc/{ubring => ubshm}/timer/timer_mgr.cpp (99%) rename src/brpc/{ubring => ubshm}/timer/timer_mgr.h (98%) rename src/brpc/{ubring => ubshm}/ub_endpoint.cpp (99%) rename src/brpc/{ubring => ubshm}/ub_endpoint.h (98%) rename src/brpc/{ubring => ubshm}/ub_helper.cpp (96%) rename src/brpc/{ubring => ubshm}/ub_helper.h (100%) rename src/brpc/{ubring => ubshm}/ub_ring.cpp (99%) rename src/brpc/{ubring => ubshm}/ub_ring.h (98%) rename src/brpc/{ubring => ubshm}/ub_ring_manager.cpp (99%) rename src/brpc/{ubring => ubshm}/ub_ring_manager.h (95%) rename src/brpc/{ubring => ubshm}/ubr_msg.h (100%) rename src/brpc/{ubring => ubshm}/ubr_trx.h (96%) rename src/brpc/{ubring/rack_mem => ubshm/ubs_mem}/declare_shm_ubs.h (100%) rename src/brpc/{ubring/rack_mem => ubshm/ubs_mem}/ubs_mem.h (100%) rename src/brpc/{ubring/rack_mem => ubshm/ubs_mem}/ubs_mem_def.h (100%) rename src/brpc/{ubring/rack_mem => ubshm/ubs_mem}/ubshmem_stub.cpp (100%) rename src/brpc/{ub_transport.cpp => ubshm_transport.cpp} (98%) rename src/brpc/{ub_transport.h => ubshm_transport.h} (100%) diff --git a/src/brpc/transport_factory.cpp b/src/brpc/transport_factory.cpp index 0dfb55e6b8..36fdaaed05 100644 --- a/src/brpc/transport_factory.cpp +++ b/src/brpc/transport_factory.cpp @@ -18,7 +18,7 @@ #include "brpc/transport_factory.h" #include "brpc/tcp_transport.h" #include "brpc/rdma_transport.h" -#include "brpc/ub_transport.h" +#include "brpc/ubshm_transport.h" namespace brpc { int TransportFactory::ContextInitOrDie(SocketMode mode, bool serverOrNot, const void* _options) { diff --git a/src/brpc/ubring/common/common.h b/src/brpc/ubshm/common/common.h similarity index 100% rename from src/brpc/ubring/common/common.h rename to src/brpc/ubshm/common/common.h diff --git a/src/brpc/ubring/common/thread_lock.h b/src/brpc/ubshm/common/thread_lock.h similarity index 99% rename from src/brpc/ubring/common/thread_lock.h rename to src/brpc/ubshm/common/thread_lock.h index 07368daa57..8c07ce360d 100644 --- a/src/brpc/ubring/common/thread_lock.h +++ b/src/brpc/ubshm/common/thread_lock.h @@ -22,7 +22,7 @@ #include #include #include -#include "brpc/ubring/common/common.h" +#include "brpc/ubshm/common/common.h" #ifdef __cplusplus extern "C" { diff --git a/src/brpc/ubring/shm/shm_def.h b/src/brpc/ubshm/shm/shm_def.h similarity index 100% rename from src/brpc/ubring/shm/shm_def.h rename to src/brpc/ubshm/shm/shm_def.h diff --git a/src/brpc/ubring/shm/shm_ipc.cpp b/src/brpc/ubshm/shm/shm_ipc.cpp similarity index 98% rename from src/brpc/ubring/shm/shm_ipc.cpp rename to src/brpc/ubshm/shm/shm_ipc.cpp index 878ca093ea..7e934c7568 100644 --- a/src/brpc/ubring/shm/shm_ipc.cpp +++ b/src/brpc/ubshm/shm/shm_ipc.cpp @@ -23,9 +23,9 @@ #include #include #include -#include "brpc/ubring/common/common.h" -#include "brpc/ubring/shm/shm_def.h" -#include "brpc/ubring/shm/shm_ipc.h" +#include "brpc/ubshm/common/common.h" +#include "brpc/ubshm/shm/shm_def.h" +#include "brpc/ubshm/shm/shm_ipc.h" namespace brpc { namespace ubring { diff --git a/src/brpc/ubring/shm/shm_ipc.h b/src/brpc/ubshm/shm/shm_ipc.h similarity index 100% rename from src/brpc/ubring/shm/shm_ipc.h rename to src/brpc/ubshm/shm/shm_ipc.h diff --git a/src/brpc/ubring/shm/shm_mgr.cpp b/src/brpc/ubshm/shm/shm_mgr.cpp similarity index 97% rename from src/brpc/ubring/shm/shm_mgr.cpp rename to src/brpc/ubshm/shm/shm_mgr.cpp index 74e722d344..cc588da8bd 100644 --- a/src/brpc/ubring/shm/shm_mgr.cpp +++ b/src/brpc/ubshm/shm/shm_mgr.cpp @@ -20,10 +20,10 @@ #include #include #include -#include "brpc/ubring/common/common.h" -#include "brpc/ubring/shm/shm_ipc.h" -#include "brpc/ubring/shm/shm_ubs.h" -#include "brpc/ubring/shm/shm_mgr.h" +#include "brpc/ubshm/common/common.h" +#include "brpc/ubshm/shm/shm_ipc.h" +#include "brpc/ubshm/shm/shm_ubs.h" +#include "brpc/ubshm/shm/shm_mgr.h" namespace brpc { namespace ubring { diff --git a/src/brpc/ubring/shm/shm_mgr.h b/src/brpc/ubshm/shm/shm_mgr.h similarity index 94% rename from src/brpc/ubring/shm/shm_mgr.h rename to src/brpc/ubshm/shm/shm_mgr.h index e9afa086d2..597f5e4ba5 100644 --- a/src/brpc/ubring/shm/shm_mgr.h +++ b/src/brpc/ubshm/shm/shm_mgr.h @@ -19,8 +19,8 @@ #define BRPC_SHM_MGR_H #include -#include "brpc/ubring/common/common.h" -#include "brpc/ubring/shm/shm_def.h" +#include "brpc/ubshm/common/common.h" +#include "brpc/ubshm/shm/shm_def.h" namespace brpc { namespace ubring { diff --git a/src/brpc/ubring/shm/shm_ubs.cpp b/src/brpc/ubshm/shm/shm_ubs.cpp similarity index 97% rename from src/brpc/ubring/shm/shm_ubs.cpp rename to src/brpc/ubshm/shm/shm_ubs.cpp index 057806e107..74c8cfc967 100644 --- a/src/brpc/ubring/shm/shm_ubs.cpp +++ b/src/brpc/ubshm/shm/shm_ubs.cpp @@ -22,13 +22,13 @@ #include #include #include -#include "brpc/ubring/timer/timer_mgr.h" -#include "brpc/ubring/common/thread_lock.h" -#include "brpc/ubring/common/common.h" -#include "brpc/ubring/shm/shm_def.h" -#include "brpc/ubring/ub_ring_manager.h" -#include "brpc/ubring/rack_mem/ubs_mem.h" -#include "brpc/ubring/rack_mem/ubs_mem_def.h" +#include "brpc/ubshm/timer/timer_mgr.h" +#include "brpc/ubshm/common/thread_lock.h" +#include "brpc/ubshm/common/common.h" +#include "brpc/ubshm/shm/shm_def.h" +#include "brpc/ubshm/ub_ring_manager.h" +#include "brpc/ubshm/ubs_mem/ubs_mem.h" +#include "brpc/ubshm/ubs_mem/ubs_mem_def.h" #ifdef UT #include "ubs_mem.h" #endif @@ -37,7 +37,7 @@ namespace brpc { namespace ubring { #define UBRING_MK_UBSM(ret, fn, args) ret (*fn) args = NULL -#include "brpc/ubring/rack_mem/declare_shm_ubs.h" +#include "brpc/ubshm/ubs_mem/declare_shm_ubs.h" #define SHM_RIGHT_MODE 0666 #define UBRING_REGION_NAME_PREFIX "UbrONE2ALLRegion" DEFINE_uint32(node_location, 1, "Location of the ub machine."); @@ -81,7 +81,7 @@ RETURN_CODE UbsShmInterfacesLoad(void) return UBRING_ERR; \ } \ } while (0) -#include "brpc/ubring/rack_mem/declare_shm_ubs.h" +#include "brpc/ubshm/ubs_mem/declare_shm_ubs.h" dlclose(dlhandler); dlhandler = NULL; diff --git a/src/brpc/ubring/shm/shm_ubs.h b/src/brpc/ubshm/shm/shm_ubs.h similarity index 100% rename from src/brpc/ubring/shm/shm_ubs.h rename to src/brpc/ubshm/shm/shm_ubs.h diff --git a/src/brpc/ubring/timer/timer_mgr.cpp b/src/brpc/ubshm/timer/timer_mgr.cpp similarity index 99% rename from src/brpc/ubring/timer/timer_mgr.cpp rename to src/brpc/ubshm/timer/timer_mgr.cpp index cba30118f1..e53833f95e 100644 --- a/src/brpc/ubring/timer/timer_mgr.cpp +++ b/src/brpc/ubshm/timer/timer_mgr.cpp @@ -24,7 +24,7 @@ #include #include #include -#include "brpc/ubring/timer/timer_mgr.h" +#include "brpc/ubshm/timer/timer_mgr.h" namespace brpc { namespace ubring { diff --git a/src/brpc/ubring/timer/timer_mgr.h b/src/brpc/ubshm/timer/timer_mgr.h similarity index 98% rename from src/brpc/ubring/timer/timer_mgr.h rename to src/brpc/ubshm/timer/timer_mgr.h index 74576a4885..9630430a2c 100644 --- a/src/brpc/ubring/timer/timer_mgr.h +++ b/src/brpc/ubshm/timer/timer_mgr.h @@ -19,7 +19,7 @@ #define BRPC_TIMER_MGR_H #include #include -#include "brpc/ubring/common/common.h" +#include "brpc/ubshm/common/common.h" #if defined(OS_LINUX) #include diff --git a/src/brpc/ubring/ub_endpoint.cpp b/src/brpc/ubshm/ub_endpoint.cpp similarity index 99% rename from src/brpc/ubring/ub_endpoint.cpp rename to src/brpc/ubshm/ub_endpoint.cpp index 438b0229a9..24b3ffdd5c 100644 --- a/src/brpc/ubring/ub_endpoint.cpp +++ b/src/brpc/ubshm/ub_endpoint.cpp @@ -27,12 +27,12 @@ #include "brpc/input_messenger.h" #include "brpc/socket.h" #include "brpc/reloadable_flags.h" -#include "brpc/ubring/ub_helper.h" -#include "brpc/ubring/ub_endpoint.h" -#include "brpc/ubring/shm/shm_def.h" -#include "brpc/ubring/common/common.h" -#include "brpc/ub_transport.h" -#include "brpc/ubring/ubr_trx.h" +#include "brpc/ubshm/ub_helper.h" +#include "brpc/ubshm/ub_endpoint.h" +#include "brpc/ubshm/shm/shm_def.h" +#include "brpc/ubshm/common/common.h" +#include "brpc/ubshm_transport.h" +#include "brpc/ubshm/ubr_trx.h" DECLARE_int32(task_group_ntags); diff --git a/src/brpc/ubring/ub_endpoint.h b/src/brpc/ubshm/ub_endpoint.h similarity index 98% rename from src/brpc/ubring/ub_endpoint.h rename to src/brpc/ubshm/ub_endpoint.h index e63513165c..d199f5881a 100644 --- a/src/brpc/ubring/ub_endpoint.h +++ b/src/brpc/ubshm/ub_endpoint.h @@ -30,9 +30,9 @@ #include "butil/macros.h" #include "butil/containers/mpsc_queue.h" #include "brpc/socket.h" -#include "brpc/ubring/ub_helper.h" -#include "brpc/ubring/ub_ring.h" -#include "brpc/ubring/shm/shm_def.h" +#include "brpc/ubshm/ub_helper.h" +#include "brpc/ubshm/ub_ring.h" +#include "brpc/ubshm/shm/shm_def.h" namespace brpc { diff --git a/src/brpc/ubring/ub_helper.cpp b/src/brpc/ubshm/ub_helper.cpp similarity index 96% rename from src/brpc/ubring/ub_helper.cpp rename to src/brpc/ubshm/ub_helper.cpp index 9b397cbb63..6c4c7a5fde 100644 --- a/src/brpc/ubring/ub_helper.cpp +++ b/src/brpc/ubshm/ub_helper.cpp @@ -24,9 +24,9 @@ #include #include "butil/logging.h" #include "brpc/socket.h" -#include "brpc/ubring/ub_endpoint.h" -#include "brpc/ubring/ub_helper.h" -#include "brpc/ubring/ub_ring_manager.h" +#include "brpc/ubshm/ub_endpoint.h" +#include "brpc/ubshm/ub_helper.h" +#include "brpc/ubshm/ub_ring_manager.h" namespace brpc { namespace ubring { diff --git a/src/brpc/ubring/ub_helper.h b/src/brpc/ubshm/ub_helper.h similarity index 100% rename from src/brpc/ubring/ub_helper.h rename to src/brpc/ubshm/ub_helper.h diff --git a/src/brpc/ubring/ub_ring.cpp b/src/brpc/ubshm/ub_ring.cpp similarity index 99% rename from src/brpc/ubring/ub_ring.cpp rename to src/brpc/ubshm/ub_ring.cpp index c14bdeddfc..0ea64f07c1 100644 --- a/src/brpc/ubring/ub_ring.cpp +++ b/src/brpc/ubshm/ub_ring.cpp @@ -21,9 +21,9 @@ #include #include "bthread/bthread.h" #include "butil/logging.h" -#include "brpc/ubring/ub_ring.h" -#include "brpc/ubring/ub_ring_manager.h" -#include "brpc/ubring/shm/shm_ipc.h" +#include "brpc/ubshm/ub_ring.h" +#include "brpc/ubshm/ub_ring_manager.h" +#include "brpc/ubshm/shm/shm_ipc.h" namespace brpc { namespace ubring { diff --git a/src/brpc/ubring/ub_ring.h b/src/brpc/ubshm/ub_ring.h similarity index 98% rename from src/brpc/ubring/ub_ring.h rename to src/brpc/ubshm/ub_ring.h index c0cbc2f7ca..09a97d1dcb 100644 --- a/src/brpc/ubring/ub_ring.h +++ b/src/brpc/ubshm/ub_ring.h @@ -22,9 +22,9 @@ #include #include "butil/macros.h" #include "butil/reader_writer.h" -#include "brpc/ubring/ubr_trx.h" -#include "brpc/ubring/shm/shm_mgr.h" -#include "brpc/ubring/timer/timer_mgr.h" +#include "brpc/ubshm/ubr_trx.h" +#include "brpc/ubshm/shm/shm_mgr.h" +#include "brpc/ubshm/timer/timer_mgr.h" namespace brpc { namespace ubring { diff --git a/src/brpc/ubring/ub_ring_manager.cpp b/src/brpc/ubshm/ub_ring_manager.cpp similarity index 99% rename from src/brpc/ubring/ub_ring_manager.cpp rename to src/brpc/ubshm/ub_ring_manager.cpp index 9d6094d77b..13df631f9e 100644 --- a/src/brpc/ubring/ub_ring_manager.cpp +++ b/src/brpc/ubshm/ub_ring_manager.cpp @@ -16,8 +16,8 @@ // under the License. #include -#include "brpc/ubring/ub_ring.h" -#include "brpc/ubring/ub_ring_manager.h" +#include "brpc/ubshm/ub_ring.h" +#include "brpc/ubshm/ub_ring_manager.h" #include "butil/logging.h" namespace brpc { diff --git a/src/brpc/ubring/ub_ring_manager.h b/src/brpc/ubshm/ub_ring_manager.h similarity index 95% rename from src/brpc/ubring/ub_ring_manager.h rename to src/brpc/ubshm/ub_ring_manager.h index a55fa66da2..c901791565 100644 --- a/src/brpc/ubring/ub_ring_manager.h +++ b/src/brpc/ubshm/ub_ring_manager.h @@ -18,9 +18,9 @@ #ifndef BRPC_UB_RING_MANAGER_H #define BRPC_UB_RING_MANAGER_H -#include "brpc/ubring/ubr_trx.h" -#include "brpc/ubring/shm/shm_def.h" -#include "brpc/ubring/common/common.h" +#include "brpc/ubshm/ubr_trx.h" +#include "brpc/ubshm/shm/shm_def.h" +#include "brpc/ubshm/common/common.h" namespace brpc { namespace ubring { diff --git a/src/brpc/ubring/ubr_msg.h b/src/brpc/ubshm/ubr_msg.h similarity index 100% rename from src/brpc/ubring/ubr_msg.h rename to src/brpc/ubshm/ubr_msg.h diff --git a/src/brpc/ubring/ubr_trx.h b/src/brpc/ubshm/ubr_trx.h similarity index 96% rename from src/brpc/ubring/ubr_trx.h rename to src/brpc/ubshm/ubr_trx.h index aba6964137..af9c52ade7 100644 --- a/src/brpc/ubring/ubr_trx.h +++ b/src/brpc/ubshm/ubr_trx.h @@ -20,10 +20,10 @@ #include #include #include -#include "brpc/ubring/shm/shm_def.h" -#include "brpc/ubring/common/common.h" -#include "brpc/ubring/common/thread_lock.h" -#include "brpc/ubring/ubr_msg.h" +#include "brpc/ubshm/shm/shm_def.h" +#include "brpc/ubshm/common/common.h" +#include "brpc/ubshm/common/thread_lock.h" +#include "brpc/ubshm/ubr_msg.h" /* +----------------------------------------------------------------------------+ │ UbrTrx shm │ diff --git a/src/brpc/ubring/rack_mem/declare_shm_ubs.h b/src/brpc/ubshm/ubs_mem/declare_shm_ubs.h similarity index 100% rename from src/brpc/ubring/rack_mem/declare_shm_ubs.h rename to src/brpc/ubshm/ubs_mem/declare_shm_ubs.h diff --git a/src/brpc/ubring/rack_mem/ubs_mem.h b/src/brpc/ubshm/ubs_mem/ubs_mem.h similarity index 100% rename from src/brpc/ubring/rack_mem/ubs_mem.h rename to src/brpc/ubshm/ubs_mem/ubs_mem.h diff --git a/src/brpc/ubring/rack_mem/ubs_mem_def.h b/src/brpc/ubshm/ubs_mem/ubs_mem_def.h similarity index 100% rename from src/brpc/ubring/rack_mem/ubs_mem_def.h rename to src/brpc/ubshm/ubs_mem/ubs_mem_def.h diff --git a/src/brpc/ubring/rack_mem/ubshmem_stub.cpp b/src/brpc/ubshm/ubs_mem/ubshmem_stub.cpp similarity index 100% rename from src/brpc/ubring/rack_mem/ubshmem_stub.cpp rename to src/brpc/ubshm/ubs_mem/ubshmem_stub.cpp diff --git a/src/brpc/ub_transport.cpp b/src/brpc/ubshm_transport.cpp similarity index 98% rename from src/brpc/ub_transport.cpp rename to src/brpc/ubshm_transport.cpp index 80f9c9eb9d..233850bf20 100644 --- a/src/brpc/ub_transport.cpp +++ b/src/brpc/ubshm_transport.cpp @@ -17,10 +17,10 @@ #if BRPC_WITH_UBRING -#include "brpc/ub_transport.h" +#include "brpc/ubshm_transport.h" #include "brpc/tcp_transport.h" -#include "brpc/ubring/ub_endpoint.h" -#include "brpc/ubring/ub_helper.h" +#include "brpc/ubshm/ub_endpoint.h" +#include "brpc/ubshm/ub_helper.h" namespace brpc { DECLARE_bool(usercode_in_coroutine); diff --git a/src/brpc/ub_transport.h b/src/brpc/ubshm_transport.h similarity index 100% rename from src/brpc/ub_transport.h rename to src/brpc/ubshm_transport.h