diff --git a/docs/global-options.md b/docs/global-options.md index 9fcdf5a..340ef4a 100644 --- a/docs/global-options.md +++ b/docs/global-options.md @@ -6,4 +6,5 @@ | greptimedb.signal.out_dir | Signal handler can output to the specified directory, default is the process start directory. | | greptimedb.available_cpus | Specify the number of available cpus, the default is to use the full number of cpus of the current environment. | | greptimedb.reporter.period_minutes | Metrics reporter timed output period, default 30 minutes. | -| greptimedb.read.write.rw_logging | Whether to print logs for each read/write operation, default off. | \ No newline at end of file +| greptimedb.write.logging | Whether to print logs for each write operation, default off. | +| greptimedb.bulk_write.logging | Whether to print logs for each write operation in bulk write, default on. | \ No newline at end of file diff --git a/docs/magic-tools.md b/docs/magic-tools.md index 4644fc0..934c97e 100644 --- a/docs/magic-tools.md +++ b/docs/magic-tools.md @@ -12,14 +12,15 @@ the log output, including: ### Just follow the help information ```text +Handling signal SIGUSR2. - -- GreptimeDB Signal Help -- - Signal output dir: /Users/xxx - -- How to open or close read/write log(The second execution means close): +- How to open or close write log(The second execution means close): - [1] `cd /Users/xxx` -- [2] `touch rw_logging.sig` +- [2] `touch write_logging.sig` - [3] `kill -s SIGUSR2 $pid` -- [4] `rm rw_logging.sig` +- [4] `rm write_logging.sig` - - - How to open or close rpc limiter(The second execution means close): @@ -28,6 +29,13 @@ the log output, including: - [3] `kill -s SIGUSR2 $pid` - [4] `rm rpc_limit.sig` - +- +- How to open or close bulk write log(The second execution means close): +- [1] `cd /Users/xxx` +- [2] `touch bulk_write_logging.sig` +- [3] `kill -s SIGUSR2 $pid` +- [4] `rm bulk_write_logging.sig` +- - How to get metrics and display info: - [1] `cd /Users/xxx` - [2] `rm *.sig` @@ -35,6 +43,9 @@ the log output, including: - - The file signals that is currently open: - -- Displaying GreptimeDB clients triggered by signal: USR2 to file: /Users/xxx/greptimedb_client_display.log.2024-01-09_16-28-38. -- Printing GreptimeDB client metrics triggered by signal: USR2 to file: /Users/xxx/greptimedb_client_metrics.log.2024-01-09_16-28-38. +- Displaying GreptimeDB clients triggered by signal: USR2 to file: /Users/xxx/greptimedb_client_display.log.2025-05-14_16-06-27. +- Printing GreptimeDB clients metrics triggered by signal: USR2 to file: /Users/xxx/greptimedb_client_metrics.log.2025-05-14_16-06-27. +- `BULK_WRITE_LOGGING`=true. +- `WRITE_LOGGING`=false. +- `LIMIT_SWITCH`=true. ``` \ No newline at end of file diff --git a/docs/metrics-display.md b/docs/metrics-display.md index a142f68..121b64c 100644 --- a/docs/metrics-display.md +++ b/docs/metrics-display.md @@ -21,27 +21,35 @@ By default, 2 files are generated in the program's working directory #### List of Metrics (constantly updated) -| Type | Name | Description | -|:----------|:---------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Counter | connection_counter_${address} | Number of connections to the server. | -| Histogram | delete_rows_failure_num | Statistics on the number of data entries that failed to delete. | -| Histogram | delete_rows_success_num | Statistics on the number of successful deletions. | -| Histogram | insert_rows_failure_num | Statistics on the number of data entries that failed to write. | -| Histogram | insert_rows_success_num | Statistics on the number of successful writes. | -| Histogram | serializing_executor_drain_num\_${name} | Serializing executor. Statistics on the number of draining tasks. | -| Histogram | write_limiter_acquire_available_permits | Statistics on the number of available permits for write data(insert/delete). | -| Meter | connection_failure | Statistics on the number of failed connections. | -| Meter | write_by_retries_${n} | QPS for the nth retry write, n == 0 for the first write (non-retry), n > 3 will be counted as n == 3 | -| Meter | write_failure_num | Statistics on the number of failed writes. | -| Meter | write_qps | Write Request QPS | -| Timer | write_stream_limiter_acquire_wait_time | Statistics on the time spent acquiring write data (insert/delete) permits when using `StreamWriter`,
note that it does not include the time spent writing, only the time spent acquiring the permit. | -| Timer | async_write_pool.time | Asynchronous pool time statistics for asynchronous write tasks in SDK, this is important and it is recommended to focus on it. | -| Timer | direct_executor_timer_rpc_direct_pool | he appearance of this metric means that we are using the current thread to execute the asynchronous callback of the rpc client, which is the default configuration.
This is usually sufficient and very resource-saving, but it needs attention. When there are problems, replace it with a thread pool in time. | -| Timer | req_rt_${service_name}/${method_name} | The time consumption statistics of the request, the service name and method name are the names of the service and method of the grpc request. | -| Timer | scheduled_thread_pool.${schedule_thread_pool_name} | Schedule thread pool execution task time statistics. | -| Timer | serializing_executor_drain_timer_${name} | Serializing executor. Drains all tasks for time consumption statistics | -| Timer | serializing_executor_single_task_timer_${name} | Serializing executor. Single task execution time consumption statistics | -| Timer | write_limiter_acquire_wait_time | Statistics on the time spent acquiring write data (insert/delete) permits,
note that it does not include the time spent writing, only the time spent acquiring the permit. | +| Type | Name | Description | +|:-----|:-----|:------------| +| Counter | connection_counter | Total number of active connections across all endpoints | +| Counter | connection_counter_${address} | Number of active connections to the specified endpoint address | +| Counter | flight_allocation_bytes | Total bytes allocated for flight operations in bulk write API | +| Histogram | bulk_write_limiter_acquire_available_permits | Available permits for bulk write operations, indicating limiter capacity and utilization | +| Histogram | delete_rows_failure_num | Number of failed delete rows | +| Histogram | delete_rows_success_num | Number of successful delete rows | +| Histogram | insert_rows_failure_num | Number of failed insert rows | +| Histogram | insert_rows_success_num | Number of successful insert rows | +| Histogram | serializing_executor_drain_num\_${name} | Number of tasks drained by the serializing executor | +| Histogram | write_limiter_acquire_available_permits | Available permits for write operations, indicating limiter capacity and utilization | +| Meter | connection_failure | Rate of connection failures across all endpoints | +| Meter | write_by_retries_${n} | Write QPS by retry count (n=0 for first attempt, n>3 counted as n=3) | +| Meter | write_failure_num | Rate of failed write operations across all endpoints | +| Meter | write_qps | Write requests per second across all endpoints | +| Timer | async_bulk_write_pool | Execution duration of bulk write tasks in async thread pool | +| Timer | async_write_pool | Execution duration of write tasks in async thread pool | +| Timer | bulk_flight_client.wait_until_stream_ready | Time waiting for bulk write stream readiness | +| Timer | bulk_write_limiter_acquire_wait_time | Time spent waiting for bulk write permits | +| Timer | bulk_write_prepare_time | Time spent encoding data for bulk write operations | +| Timer | bulk_write_put_time | Total duration of bulk write operations from start to completion | +| Timer | direct_executor_timer_rpc_direct_pool | Execution time of RPC callbacks in current thread (default). Monitor performance and consider thread pool if needed | +| Timer | req_rt_${service_name}/${method_name} | Round-trip time of gRPC requests by service and method | +| Timer | scheduled_thread_pool.${schedule_thread_pool_name} | Task execution time in scheduled thread pool by pool name | +| Timer | serializing_executor_drain_timer_${name} | Total time to process and execute all queued tasks | +| Timer | serializing_executor_single_task_timer_${name} | Execution time per task, helping identify task-level bottlenecks | +| Timer | write_limiter_acquire_wait_time | Time waiting for write permits (excludes actual write operation time) | +| Timer | write_stream_limiter_acquire_wait_time | Time waiting for write permits when using StreamWriter (excludes actual write operation time) | #### Example @@ -50,115 +58,96 @@ By default, 2 files are generated in the program's working directory ``` --- GreptimeDB Client --- id=1 -version=0.5.1 +version=0.14.3 endpoints=[127.0.0.1:4001] database=public -rpcOptions=RpcOptions{useRpcSharedPool=false, defaultRpcTimeout=10000, maxInboundMessageSize=268435456, flowControlWindow=268435456, idleTimeoutSeconds=300, keepAliveTimeSeconds=9223372036854775807, keepAliveTimeoutSeconds=3, keepAliveWithoutCalls=false, limitKind=None, initialLimit=64, maxLimit=1024, longRttWindow=100, smoothing=0.2, blockOnLimit=false, logOnLimitChange=true, enableMetricInterceptor=false} +rpcOptions=RpcOptions{useRpcSharedPool=false, defaultRpcTimeout=10000, maxInboundMessageSize=268435456, flowControlWindow=268435456, idleTimeoutSeconds=300, keepAliveTimeSeconds=9223372036854775807, keepAliveTimeoutSeconds=3, keepAliveWithoutCalls=false, limitKind=None, initialLimit=64, maxLimit=1024, longRttWindow=100, smoothing=0.2, blockOnLimit=false, logOnLimitChange=true, enableMetricInterceptor=false, tlsOptions=null} --- RouterClient --- -opts=RouterOptions{endpoints=[127.0.0.1:4001], refreshPeriodSeconds=-1, router=null} +opts=RouterOptions{rpcClient=io.greptime.rpc.GrpcClient@55b699ef, endpoints=[127.0.0.1:4001], refreshPeriodSeconds=600, checkHealthTimeoutMs=1000, router=null} --- GrpcClient --- started=true -opts=RpcOptions{useRpcSharedPool=false, defaultRpcTimeout=10000, maxInboundMessageSize=268435456, flowControlWindow=268435456, idleTimeoutSeconds=300, keepAliveTimeSeconds=9223372036854775807, keepAliveTimeoutSeconds=3, keepAliveWithoutCalls=false, limitKind=None, initialLimit=64, maxLimit=1024, longRttWindow=100, smoothing=0.2, blockOnLimit=false, logOnLimitChange=true, enableMetricInterceptor=false} -connectionObservers=[io.greptime.GreptimeDB$RpcConnectionObserver@5253e7a0] -asyncPool=DirectExecutor{name='rpc-direct-pool'} -interceptors=[io.greptime.rpc.interceptors.ContextToHeadersInterceptor@1751638e] -managedChannelPool={127.0.0.1:4001=IdChannel{channelId=1, channel=ManagedChannelOrphanWrapper{delegate=ManagedChannelImpl{logId=1, target=127.0.0.1:4001}}}} +opts=RpcOptions{useRpcSharedPool=false, defaultRpcTimeout=10000, maxInboundMessageSize=268435456, flowControlWindow=268435456, idleTimeoutSeconds=300, keepAliveTimeSeconds=9223372036854775807, keepAliveTimeoutSeconds=3, keepAliveWithoutCalls=false, limitKind=None, initialLimit=64, maxLimit=1024, longRttWindow=100, smoothing=0.2, blockOnLimit=false, logOnLimitChange=true, enableMetricInterceptor=false, tlsOptions=null} +connectionObservers=[io.greptime.GreptimeDB$RpcConnectionObserver@625d44db] +asyncPool=DirectExecutor{name='rpc_direct_pool'} +interceptors=[io.greptime.rpc.interceptors.ContextToHeadersInterceptor@275fd6f4] +managedChannelPool={127.0.0.1:4001=IdChannel{channelId=1, channel=ManagedChannelOrphanWrapper{delegate=ManagedChannelImpl{logId=5, target=127.0.0.1:4001}}}} transientFailures={} --- WriteClient --- -maxRetries=1 -asyncPool=MetricExecutor{pool=SerializingExecutor{name='async_pool'}, name='async_write_pool.time'} +maxRetries=0 +asyncPool=MetricExecutor{pool=SerializingExecutor{name='bench_async_pool'}, name='async_write_pool.time'} + +--- BulkWriteClient --- +asyncPool=MetricExecutor{pool=SerializingExecutor{name='bench_async_pool'}, name='async_bulk_write_pool.time'} ``` ##### greptimedb_client_metrics.log.xxx ``` --- GreptimeDB 1/9/24 4:28:38 PM ============================================================== +-- GreptimeDB 5/14/25 4:06:27 PM ============================================================= -- GreptimeDB -- Counters -------------------------------------------------------------------- connection_counter - count = 1 + count = 2 connection_counter_127.0.0.1:4001 - count = 1 + count = 2 +flight_allocation_bytes + count = 339503994 -- GreptimeDB -- Histograms ------------------------------------------------------------------ -delete_rows_failure_num - count = 1 - min = 0 - max = 0 - mean = 0.00 - stddev = 0.00 - median = 0.00 - 75% <= 0.00 - 95% <= 0.00 - 98% <= 0.00 - 99% <= 0.00 - 99.9% <= 0.00 -delete_rows_success_num - count = 1 - min = 10 - max = 10 - mean = 10.00 - stddev = 0.00 - median = 10.00 - 75% <= 10.00 - 95% <= 10.00 - 98% <= 10.00 - 99% <= 10.00 - 99.9% <= 10.00 -insert_rows_failure_num - count = 1 +bulk_write_limiter_acquire_available_permits + count = 153 min = 0 - max = 0 - mean = 0.00 - stddev = 0.00 - median = 0.00 - 75% <= 0.00 - 95% <= 0.00 - 98% <= 0.00 - 99% <= 0.00 - 99.9% <= 0.00 -insert_rows_success_num - count = 1 - min = 20 - max = 20 - mean = 20.00 - stddev = 0.00 - median = 20.00 - 75% <= 20.00 - 95% <= 20.00 - 98% <= 20.00 - 99% <= 20.00 - 99.9% <= 20.00 -serializing_executor_drain_num_async_pool - count = 4 - min = 1 - max = 3 - mean = 2.00 - stddev = 1.00 + max = 7 + mean = 2.77 + stddev = 2.06 median = 3.00 - 75% <= 3.00 - 95% <= 3.00 - 98% <= 3.00 - 99% <= 3.00 - 99.9% <= 3.00 + 75% <= 5.00 + 95% <= 6.00 + 98% <= 6.00 + 99% <= 6.00 + 99.9% <= 6.00 +bulk_write_put_bytes + count = 153 + min = 73032465 + max = 124278256 + mean = 119973603.01 + stddev = 14129538.75 + median = 124220600.00 + 75% <= 124233684.00 + 95% <= 124278256.00 + 98% <= 124278256.00 + 99% <= 124278256.00 + 99.9% <= 124278256.00 +bulk_write_put_rows + count = 153 + min = 38528 + max = 65536 + mean = 63292.26 + stddev = 7454.17 + median = 65536.00 + 75% <= 65536.00 + 95% <= 65536.00 + 98% <= 65536.00 + 99% <= 65536.00 + 99.9% <= 65536.00 +serializing_executor_drain_num_bench_async_pool + count = 170 + min = 1 + max = 1 + mean = 1.00 + stddev = 0.00 + median = 1.00 + 75% <= 1.00 + 95% <= 1.00 + 98% <= 1.00 + 99% <= 1.00 + 99.9% <= 1.00 write_limiter_acquire_available_permits - count = 2 - min = 65516 - max = 65526 - mean = 65521.00 - stddev = 5.00 - median = 65526.00 - 75% <= 65526.00 - 95% <= 65526.00 - 98% <= 65526.00 - 99% <= 65526.00 - 99.9% <= 65526.00 -write_stream_limiter_acquire_wait_time count = 0 min = 0 max = 0 @@ -172,102 +161,186 @@ write_stream_limiter_acquire_wait_time 99.9% <= 0.00 -- GreptimeDB -- Meters ---------------------------------------------------------------------- +bulk_write_in_flight_requests + count = 860 + mean rate = 0.84 events/second + 1-minute rate = 0.72 events/second + 5-minute rate = 0.99 events/second + 15-minute rate = 2.85 events/second connection_failure count = 0 mean rate = 0.00 events/second 1-minute rate = 0.00 events/second 5-minute rate = 0.00 events/second 15-minute rate = 0.00 events/second -write_by_retries_0 - count = 2 - mean rate = 0.16 events/second - 1-minute rate = 0.37 events/second - 5-minute rate = 0.39 events/second - 15-minute rate = 0.40 events/second -write_failure_num - count = 0 - mean rate = 0.00 events/second - 1-minute rate = 0.00 events/second - 5-minute rate = 0.00 events/second - 15-minute rate = 0.00 events/second -write_qps - count = 2 - mean rate = 0.16 events/second - 1-minute rate = 0.37 events/second - 5-minute rate = 0.39 events/second - 15-minute rate = 0.40 events/second -- GreptimeDB -- Timers ---------------------------------------------------------------------- -async_write_pool.time - count = 8 - mean rate = 0.64 calls/second - 1-minute rate = 1.47 calls/second - 5-minute rate = 1.57 calls/second - 15-minute rate = 1.59 calls/second - min = 0.03 milliseconds - max = 295.46 milliseconds - mean = 37.87 milliseconds - stddev = 97.37 milliseconds - median = 0.36 milliseconds - 75% <= 5.27 milliseconds - 95% <= 295.46 milliseconds - 98% <= 295.46 milliseconds - 99% <= 295.46 milliseconds - 99.9% <= 295.46 milliseconds -direct_executor_timer_rpc-direct-pool - count = 11 - mean rate = 0.88 calls/second - 1-minute rate = 2.02 calls/second - 5-minute rate = 2.16 calls/second - 15-minute rate = 2.19 calls/second - min = 0.01 milliseconds - max = 10.35 milliseconds - mean = 2.54 milliseconds - stddev = 3.23 milliseconds - median = 1.00 milliseconds - 75% <= 5.47 milliseconds - 95% <= 10.35 milliseconds - 98% <= 10.35 milliseconds - 99% <= 10.35 milliseconds - 99.9% <= 10.35 milliseconds -req_rt_greptime.v1.GreptimeDatabase/Handle - count = 2 +async_bulk_write_pool + count = 170 mean rate = 0.17 calls/second - 1-minute rate = 0.37 calls/second - 5-minute rate = 0.39 calls/second - 15-minute rate = 0.40 calls/second - min = 10.00 milliseconds - max = 591.00 milliseconds - mean = 300.50 milliseconds - stddev = 290.50 milliseconds - median = 591.00 milliseconds - 75% <= 591.00 milliseconds - 95% <= 591.00 milliseconds - 98% <= 591.00 milliseconds - 99% <= 591.00 milliseconds - 99.9% <= 591.00 milliseconds -req_rt_greptime.v1.GreptimeDatabase/Handle_127.0.0.1:4001 + 1-minute rate = 0.16 calls/second + 5-minute rate = 0.19 calls/second + 15-minute rate = 0.43 calls/second + min = 0.02 milliseconds + max = 9.22 milliseconds + mean = 0.63 milliseconds + stddev = 0.43 milliseconds + median = 0.49 milliseconds + 75% <= 0.79 milliseconds + 95% <= 1.93 milliseconds + 98% <= 1.93 milliseconds + 99% <= 2.17 milliseconds + 99.9% <= 2.36 milliseconds +async_write_pool + count = 0 + mean rate = 0.00 calls/second + 1-minute rate = 0.00 calls/second + 5-minute rate = 0.00 calls/second + 15-minute rate = 0.00 calls/second + min = 0.00 milliseconds + max = 0.00 milliseconds + mean = 0.00 milliseconds + stddev = 0.00 milliseconds + median = 0.00 milliseconds + 75% <= 0.00 milliseconds + 95% <= 0.00 milliseconds + 98% <= 0.00 milliseconds + 99% <= 0.00 milliseconds + 99.9% <= 0.00 milliseconds +bulk_flight_client.wait_until_stream_ready + count = 153 + mean rate = 0.15 calls/second + 1-minute rate = 0.14 calls/second + 5-minute rate = 0.19 calls/second + 15-minute rate = 0.61 calls/second + min = 0.00 milliseconds + max = 42050.52 milliseconds + mean = 5356.27 milliseconds + stddev = 13869.85 milliseconds + median = 0.00 milliseconds + 75% <= 0.00 milliseconds + 95% <= 41310.81 milliseconds + 98% <= 41310.81 milliseconds + 99% <= 41310.81 milliseconds + 99.9% <= 41734.41 milliseconds +bulk_write_limiter_acquire_wait_time + count = 153 + mean rate = 0.15 calls/second + 1-minute rate = 0.13 calls/second + 5-minute rate = 0.18 calls/second + 15-minute rate = 0.55 calls/second + min = 0.00 milliseconds + max = 6742.00 milliseconds + mean = 671.88 milliseconds + stddev = 1774.12 milliseconds + median = 0.00 milliseconds + 75% <= 0.00 milliseconds + 95% <= 6295.00 milliseconds + 98% <= 6410.00 milliseconds + 99% <= 6410.00 milliseconds + 99.9% <= 6410.00 milliseconds +bulk_write_prepare_time + count = 153 + mean rate = 0.15 calls/second + 1-minute rate = 0.14 calls/second + 5-minute rate = 0.19 calls/second + 15-minute rate = 0.61 calls/second + min = 116.00 milliseconds + max = 42254.00 milliseconds + mean = 5548.32 milliseconds + stddev = 13857.03 milliseconds + median = 192.00 milliseconds + 75% <= 278.00 milliseconds + 95% <= 41514.00 milliseconds + 98% <= 41514.00 milliseconds + 99% <= 41514.00 milliseconds + 99.9% <= 41943.00 milliseconds +bulk_write_put_time + count = 149 + mean rate = 0.14 calls/second + 1-minute rate = 0.14 calls/second + 5-minute rate = 0.14 calls/second + 15-minute rate = 0.10 calls/second + min = 7266.00 milliseconds + max = 55630.00 milliseconds + mean = 33684.43 milliseconds + stddev = 14395.13 milliseconds + median = 32508.00 milliseconds + 75% <= 45185.00 milliseconds + 95% <= 55221.00 milliseconds + 98% <= 55221.00 milliseconds + 99% <= 55221.00 milliseconds + 99.9% <= 55310.00 milliseconds +direct_executor_timer_rpc_direct_pool + count = 15 + mean rate = 0.01 calls/second + 1-minute rate = 0.01 calls/second + 5-minute rate = 0.02 calls/second + 15-minute rate = 0.01 calls/second + min = 0.00 milliseconds + max = 0.93 milliseconds + mean = 0.15 milliseconds + stddev = 0.12 milliseconds + median = 0.17 milliseconds + 75% <= 0.23 milliseconds + 95% <= 0.33 milliseconds + 98% <= 0.33 milliseconds + 99% <= 0.33 milliseconds + 99.9% <= 0.33 milliseconds +req_rt_greptime.v1.HealthCheck/HealthCheck count = 2 - mean rate = 0.17 calls/second - 1-minute rate = 0.37 calls/second - 5-minute rate = 0.39 calls/second - 15-minute rate = 0.40 calls/second - min = 10.00 milliseconds - max = 591.00 milliseconds - mean = 300.50 milliseconds - stddev = 290.50 milliseconds - median = 591.00 milliseconds - 75% <= 591.00 milliseconds - 95% <= 591.00 milliseconds - 98% <= 591.00 milliseconds - 99% <= 591.00 milliseconds - 99.9% <= 591.00 milliseconds + mean rate = 0.00 calls/second + 1-minute rate = 0.00 calls/second + 5-minute rate = 0.02 calls/second + 15-minute rate = 0.09 calls/second + min = 12.00 milliseconds + max = 170.00 milliseconds + mean = 12.02 milliseconds + stddev = 1.76 milliseconds + median = 12.00 milliseconds + 75% <= 12.00 milliseconds + 95% <= 12.00 milliseconds + 98% <= 12.00 milliseconds + 99% <= 12.00 milliseconds + 99.9% <= 12.00 milliseconds +req_rt_greptime.v1.HealthCheck/HealthCheck_127.0.0.1:4001 + count = 2 + mean rate = 0.00 calls/second + 1-minute rate = 0.00 calls/second + 5-minute rate = 0.02 calls/second + 15-minute rate = 0.09 calls/second + min = 12.00 milliseconds + max = 170.00 milliseconds + mean = 12.02 milliseconds + stddev = 1.76 milliseconds + median = 12.00 milliseconds + 75% <= 12.00 milliseconds + 95% <= 12.00 milliseconds + 98% <= 12.00 milliseconds + 99% <= 12.00 milliseconds + 99.9% <= 12.00 milliseconds scheduled_thread_pool.display_self + count = 2 + mean rate = 0.00 calls/second + 1-minute rate = 0.00 calls/second + 5-minute rate = 0.01 calls/second + 15-minute rate = 0.07 calls/second + min = 1.00 milliseconds + max = 2.00 milliseconds + mean = 1.00 milliseconds + stddev = 0.01 milliseconds + median = 1.00 milliseconds + 75% <= 1.00 milliseconds + 95% <= 1.00 milliseconds + 98% <= 1.00 milliseconds + 99% <= 1.00 milliseconds + 99.9% <= 1.00 milliseconds +scheduled_thread_pool.metrics.reporter count = 1 - mean rate = 0.08 calls/second - 1-minute rate = 0.18 calls/second - 5-minute rate = 0.20 calls/second - 15-minute rate = 0.20 calls/second + mean rate = 0.00 calls/second + 1-minute rate = 0.00 calls/second + 5-minute rate = 0.05 calls/second + 15-minute rate = 0.13 calls/second min = 10.00 milliseconds max = 10.00 milliseconds mean = 10.00 milliseconds @@ -278,44 +351,60 @@ scheduled_thread_pool.display_self 98% <= 10.00 milliseconds 99% <= 10.00 milliseconds 99.9% <= 10.00 milliseconds -serializing_executor_drain_timer_async_pool - count = 8 - mean rate = 0.63 calls/second - 1-minute rate = 1.47 calls/second - 5-minute rate = 1.57 calls/second - 15-minute rate = 1.59 calls/second - min = 0.00 milliseconds - max = 295.59 milliseconds - mean = 37.91 milliseconds - stddev = 97.41 milliseconds - median = 0.53 milliseconds - 75% <= 5.34 milliseconds - 95% <= 295.59 milliseconds - 98% <= 295.59 milliseconds - 99% <= 295.59 milliseconds - 99.9% <= 295.59 milliseconds -serializing_executor_single_task_timer_async_pool - count = 8 - mean rate = 0.63 calls/second - 1-minute rate = 1.47 calls/second - 5-minute rate = 1.57 calls/second - 15-minute rate = 1.59 calls/second +scheduled_thread_pool.route_cache_refresher + count = 2 + mean rate = 0.00 calls/second + 1-minute rate = 0.00 calls/second + 5-minute rate = 0.02 calls/second + 15-minute rate = 0.09 calls/second + min = 2.00 milliseconds + max = 60.00 milliseconds + mean = 2.01 milliseconds + stddev = 0.64 milliseconds + median = 2.00 milliseconds + 75% <= 2.00 milliseconds + 95% <= 2.00 milliseconds + 98% <= 2.00 milliseconds + 99% <= 2.00 milliseconds + 99.9% <= 2.00 milliseconds +serializing_executor_drain_timer_bench_async_pool + count = 170 + mean rate = 0.17 calls/second + 1-minute rate = 0.16 calls/second + 5-minute rate = 0.19 calls/second + 15-minute rate = 0.43 calls/second + min = 0.02 milliseconds + max = 9.24 milliseconds + mean = 0.65 milliseconds + stddev = 0.44 milliseconds + median = 0.51 milliseconds + 75% <= 0.82 milliseconds + 95% <= 1.99 milliseconds + 98% <= 1.99 milliseconds + 99% <= 2.24 milliseconds + 99.9% <= 2.42 milliseconds +serializing_executor_single_task_timer_bench_async_pool + count = 170 + mean rate = 0.17 calls/second + 1-minute rate = 0.16 calls/second + 5-minute rate = 0.19 calls/second + 15-minute rate = 0.43 calls/second min = 0.00 milliseconds - max = 295.00 milliseconds - mean = 37.88 milliseconds - stddev = 97.20 milliseconds + max = 9.00 milliseconds + mean = 0.59 milliseconds + stddev = 0.63 milliseconds median = 1.00 milliseconds - 75% <= 5.00 milliseconds - 95% <= 295.00 milliseconds - 98% <= 295.00 milliseconds - 99% <= 295.00 milliseconds - 99.9% <= 295.00 milliseconds + 75% <= 1.00 milliseconds + 95% <= 2.00 milliseconds + 98% <= 2.00 milliseconds + 99% <= 3.00 milliseconds + 99.9% <= 3.00 milliseconds write_limiter_acquire_wait_time - count = 2 - mean rate = 0.16 calls/second - 1-minute rate = 0.37 calls/second - 5-minute rate = 0.39 calls/second - 15-minute rate = 0.40 calls/second + count = 0 + mean rate = 0.00 calls/second + 1-minute rate = 0.00 calls/second + 5-minute rate = 0.00 calls/second + 15-minute rate = 0.00 calls/second min = 0.00 milliseconds max = 0.00 milliseconds mean = 0.00 milliseconds diff --git a/grafana/greptimedb-ingester-dashboard.json b/grafana/greptimedb-ingester-dashboard.json new file mode 100644 index 0000000..0338b98 --- /dev/null +++ b/grafana/greptimedb-ingester-dashboard.json @@ -0,0 +1,1630 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [], + "title": "Write Performance", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 2, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "bulk_write_put_time{quantile=\"0.95\"}", + "legendFormat": "95th Percentile", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "bulk_write_put_time{quantile=\"0.5\"}", + "legendFormat": "Median", + "refId": "B" + } + ], + "title": "Bulk Write Put Time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 3, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "bulk_write_prepare_time{quantile=\"0.95\"}", + "legendFormat": "95th Percentile", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "bulk_write_prepare_time{quantile=\"0.5\"}", + "legendFormat": "Median", + "refId": "B" + } + ], + "title": "Bulk Write Prepare Time", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 4, + "panels": [], + "title": "Async Execution", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 5, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "async_bulk_write_pool{quantile=\"0.95\"}", + "legendFormat": "95th Percentile", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "async_bulk_write_pool{quantile=\"0.5\"}", + "legendFormat": "Median", + "refId": "B" + } + ], + "title": "Async Bulk Write Pool Time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 6, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "async_write_pool{quantile=\"0.95\"}", + "legendFormat": "95th Percentile", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "async_write_pool{quantile=\"0.5\"}", + "legendFormat": "Median", + "refId": "B" + } + ], + "title": "Async Write Pool Time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 7, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "direct_executor_timer_rpc_direct_pool{quantile=\"0.95\"}", + "legendFormat": "95th Percentile", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "direct_executor_timer_rpc_direct_pool{quantile=\"0.5\"}", + "legendFormat": "Median", + "refId": "B" + } + ], + "title": "Direct Executor RPC Pool Time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 8, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "bulk_flight_client_wait_until_stream_ready{quantile=\"0.95\"}", + "legendFormat": "95th Percentile", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "bulk_flight_client_wait_until_stream_ready{quantile=\"0.5\"}", + "legendFormat": "Median", + "refId": "B" + } + ], + "title": "Bulk Flight Client Stream Ready Wait Time", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 9, + "panels": [], + "title": "Limiter Metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 27 + }, + "id": 10, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "bulk_write_limiter_acquire_wait_time{quantile=\"0.95\"}", + "legendFormat": "95th Percentile", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "bulk_write_limiter_acquire_wait_time{quantile=\"0.5\"}", + "legendFormat": "Median", + "refId": "B" + } + ], + "title": "Bulk Write Limiter Wait Time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 27 + }, + "id": 11, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "bulk_write_limiter_acquire_available_permits{quantile=\"0.95\"}", + "legendFormat": "95th Percentile", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "bulk_write_limiter_acquire_available_permits{quantile=\"0.5\"}", + "legendFormat": "Median", + "refId": "B" + } + ], + "title": "Bulk Write Limiter Available Permits", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 35 + }, + "id": 12, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "write_limiter_acquire_wait_time{quantile=\"0.95\"}", + "legendFormat": "95th Percentile", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "write_limiter_acquire_wait_time{quantile=\"0.5\"}", + "legendFormat": "Median", + "refId": "B" + } + ], + "title": "Write Limiter Wait Time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 35 + }, + "id": 13, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "write_limiter_acquire_available_permits{quantile=\"0.95\"}", + "legendFormat": "95th Percentile", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "write_limiter_acquire_available_permits{quantile=\"0.5\"}", + "legendFormat": "Median", + "refId": "B" + } + ], + "title": "Write Limiter Available Permits", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 43 + }, + "id": 14, + "panels": [], + "title": "System Metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 44 + }, + "id": 15, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "flight_allocation_bytes", + "legendFormat": "Flight Allocation Bytes", + "refId": "A" + } + ], + "title": "Flight Allocation Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 44 + }, + "id": 16, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "connection_counter", + "legendFormat": "Active Connections", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "rate(connection_failure_total[5m])", + "legendFormat": "Connection Failures (/s)", + "refId": "B" + } + ], + "title": "Connection Metrics", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 52 + }, + "id": 17, + "panels": [], + "title": "Write Metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 61 + }, + "id": 20, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "bulk_write_put_rows{quantile=\"0.5\"}", + "legendFormat": "Median Rows per Write", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "bulk_write_put_rows{quantile=\"0.95\"}", + "legendFormat": "95th Percentile Rows per Write", + "refId": "B" + } + ], + "title": "Bulk Write Rows per Operation", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 61 + }, + "id": 21, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "bulk_write_put_bytes{quantile=\"0.5\"}", + "legendFormat": "Median Bytes per Write", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "bulk_write_put_bytes{quantile=\"0.95\"}", + "legendFormat": "95th Percentile Bytes per Write", + "refId": "B" + } + ], + "title": "Bulk Write Bytes per Operation", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 69 + }, + "id": 22, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "insert_rows_success_num{quantile=\"0.5\"}", + "legendFormat": "Median Success Rows", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "insert_rows_failure_num{quantile=\"0.5\"}", + "legendFormat": "Median Failed Rows", + "refId": "B" + } + ], + "title": "Insert Rows Success vs Failure", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 77 + }, + "id": 25, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "req_rt_greptime_v1_GreptimeDatabase_Handle{quantile=\"0.95\"}", + "legendFormat": "95th Percentile", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "req_rt_greptime_v1_GreptimeDatabase_Handle{quantile=\"0.5\"}", + "legendFormat": "Median", + "refId": "B" + } + ], + "title": "Database Handle Request Time", + "type": "timeseries" + } + ], + "refresh": "5s", + "schemaVersion": 38, + "style": "dark", + "tags": ["greptimedb", "ingester"], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "GreptimeDB Ingester Dashboard", + "uid": "greptimedb-ingester", + "version": 1, + "weekStart": "" +} diff --git a/ingester-common/src/main/java/io/greptime/common/Keys.java b/ingester-common/src/main/java/io/greptime/common/Keys.java index e4f9b66..26563f4 100644 --- a/ingester-common/src/main/java/io/greptime/common/Keys.java +++ b/ingester-common/src/main/java/io/greptime/common/Keys.java @@ -32,7 +32,8 @@ public final class Keys { public static final String GRPC_CONN_RESET_THRESHOLD = "greptimedb.grpc.conn.failures.reset_threshold"; public static final String GRPC_POOL_CORE_WORKERS = "greptimedb.grpc.pool.core_workers"; public static final String GRPC_POOL_MAXIMUM_WORKERS = "greptimedb.grpc.pool.maximum_works"; - public static final String RW_LOGGING = "greptimedb.read.write.rw_logging"; + public static final String WRITE_LOGGING = "greptimedb.write.logging"; + public static final String BULK_WRITE_LOGGING = "greptimedb.bulk_write.logging"; public static final String FLIGHT_ALLOCATION_LIMIT = "greptimedb.flight.allocation.limit"; // Greptime headers diff --git a/ingester-common/src/main/java/io/greptime/common/signal/FileSignal.java b/ingester-common/src/main/java/io/greptime/common/signal/FileSignal.java index 5581b36..cc3ff80 100644 --- a/ingester-common/src/main/java/io/greptime/common/signal/FileSignal.java +++ b/ingester-common/src/main/java/io/greptime/common/signal/FileSignal.java @@ -26,8 +26,9 @@ * purpose of controlling the process to output specified content through this. */ public enum FileSignal { - RwLogging("rw_logging.sig", "How to open or close read/write log(The second execution means close)"), + WriteLogging("write_logging.sig", "How to open or close write log(The second execution means close)"), RpcLimit("rpc_limit.sig", "How to open or close rpc limiter(The second execution means close)"), + BulkWriteLogging("bulk_write_logging.sig", "How to open or close bulk write log(The second execution means close)"), ; private final String filename; diff --git a/ingester-example/src/main/java/io/greptime/bench/BatchingWriteBenchmark.java b/ingester-example/src/main/java/io/greptime/bench/BatchingWriteBenchmark.java new file mode 100644 index 0000000..36fd042 --- /dev/null +++ b/ingester-example/src/main/java/io/greptime/bench/BatchingWriteBenchmark.java @@ -0,0 +1,112 @@ +/* + * Copyright 2023 Greptime Team + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.greptime.bench; + +import io.greptime.GreptimeDB; +import io.greptime.WriteOp; +import io.greptime.common.util.MetricsUtil; +import io.greptime.common.util.ServiceLoader; +import io.greptime.common.util.SystemPropertyUtil; +import io.greptime.metrics.MetricsExporter; +import io.greptime.models.Err; +import io.greptime.models.Result; +import io.greptime.models.Table; +import io.greptime.models.TableSchema; +import io.greptime.models.WriteOk; +import io.greptime.rpc.Compression; +import io.greptime.rpc.Context; +import java.util.Arrays; +import java.util.Iterator; +import java.util.concurrent.CompletableFuture; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * BatchingWriteBenchmark is a benchmark for the batching write API of GreptimeDB. + * + * Env: + * - db_endpoint: the endpoint of the GreptimeDB server + * - db_name: the name of the database + * - batch_size_per_request: the batch size per request + * - zstd_compression: whether to use zstd compression + * - max_points_per_second: the max number of points that can be written per second, exceeding which may cause blockage + */ +public class BatchingWriteBenchmark { + + private static final Logger LOG = LoggerFactory.getLogger(BatchingWriteBenchmark.class); + + public static void main(String[] args) throws Exception { + String endpoint = SystemPropertyUtil.get("db_endpoint", "127.0.0.1:4001"); + String dbName = SystemPropertyUtil.get("db_name", "public"); + boolean zstdCompression = SystemPropertyUtil.getBool("zstd_compression", true); + int batchSize = SystemPropertyUtil.getInt("batch_size_per_request", 64 * 1024); + int maxPointsPerSecond = SystemPropertyUtil.getInt("max_points_per_second", Integer.MAX_VALUE); + LOG.info("Connect to db: {}, endpoint: {}", dbName, endpoint); + LOG.info("Using zstd compression: {}", zstdCompression); + LOG.info("Batch size: {}", batchSize); + LOG.info("Max points per second: {}", maxPointsPerSecond); + + // Start a metrics exporter + MetricsExporter metricsExporter = new MetricsExporter(8080, MetricsUtil.metricRegistry()); + metricsExporter.init(null); + + GreptimeDB greptimeDB = DBConnector.connectTo(new String[] {endpoint}, dbName); + + Compression compression = zstdCompression ? Compression.Zstd : Compression.None; + Context ctx = Context.newDefault().withCompression(compression); + + TableDataProvider tableDataProvider = + ServiceLoader.load(TableDataProvider.class).first(); + LOG.info("Table data provider: {}", tableDataProvider.getClass().getName()); + tableDataProvider.init(); + TableSchema tableSchema = tableDataProvider.tableSchema(); + Iterator rows = tableDataProvider.rows(); + + LOG.info("Start writing data"); + long start = System.nanoTime(); + for (; ; ) { + Table table = Table.from(tableSchema); + for (int i = 0; i < batchSize; i++) { + if (!rows.hasNext()) { + break; + } + table.addRow(rows.next()); + } + LOG.info("Table bytes used: {}", table.bytesUsed()); + // Complete the table; adding rows is no longer permitted. + table.complete(); + long fStart = System.nanoTime(); + // Write the table data to the server + CompletableFuture> future = + greptimeDB.write(Arrays.asList(table), WriteOp.Insert, ctx); + // Wait for the write to complete + int numRows = future.get().mapOr(0, writeOk -> writeOk.getSuccess()); + long costMs = (System.nanoTime() - fStart) / 1000000; + LOG.info("Write rows: {}, time cost: {}ms", numRows, costMs); + + if (!rows.hasNext()) { + break; + } + } + + LOG.info("Completed writing data, time cost: {}s", (System.nanoTime() - start) / 1000000000); + + greptimeDB.shutdownGracefully(); + tableDataProvider.close(); + metricsExporter.shutdownGracefully(); + } +} diff --git a/ingester-example/src/main/java/io/greptime/bench/BulkWriteBenchmark.java b/ingester-example/src/main/java/io/greptime/bench/BulkWriteBenchmark.java index cb6facc..204f74f 100644 --- a/ingester-example/src/main/java/io/greptime/bench/BulkWriteBenchmark.java +++ b/ingester-example/src/main/java/io/greptime/bench/BulkWriteBenchmark.java @@ -64,7 +64,7 @@ public static void main(String[] args) throws Exception { .allocatorInitReservation(0) .allocatorMaxAllocation(4 * 1024 * 1024 * 1024L) .timeoutMsPerMessage(60000) - .maxRequestsInFlight(8) + .maxRequestsInFlight(4) .build(); Compression compression = zstdCompression ? Compression.Zstd : Compression.None; Context ctx = Context.newDefault().withCompression(compression); diff --git a/ingester-example/src/main/java/io/greptime/bench/DBConnector.java b/ingester-example/src/main/java/io/greptime/bench/DBConnector.java index 97c5310..505dc26 100644 --- a/ingester-example/src/main/java/io/greptime/bench/DBConnector.java +++ b/ingester-example/src/main/java/io/greptime/bench/DBConnector.java @@ -17,7 +17,6 @@ package io.greptime.bench; import io.greptime.GreptimeDB; -import io.greptime.common.util.SerializingExecutor; import io.greptime.options.GreptimeOptions; /** @@ -27,7 +26,6 @@ public class DBConnector { public static GreptimeDB connectTo(String[] endpoints, String dbname) { GreptimeOptions opts = GreptimeOptions.newBuilder(endpoints, dbname) - .asyncPool(new SerializingExecutor("bench_async_pool")) .writeMaxRetries(0) .defaultStreamMaxWritePointsPerSecond(Integer.MAX_VALUE) .useZeroCopyWriteInBulkWrite(true) diff --git a/ingester-grpc/src/main/java/io/greptime/rpc/GrpcClient.java b/ingester-grpc/src/main/java/io/greptime/rpc/GrpcClient.java index 83a5a20..e300c2b 100644 --- a/ingester-grpc/src/main/java/io/greptime/rpc/GrpcClient.java +++ b/ingester-grpc/src/main/java/io/greptime/rpc/GrpcClient.java @@ -222,11 +222,9 @@ private long onReceived(boolean onError) { long duration = Clock.defaultClock().duration(startCall); MetricsUtil.timer(REQ_RT, methodName).update(duration, TimeUnit.MILLISECONDS); - MetricsUtil.timer(REQ_RT, methodName, address).update(duration, TimeUnit.MILLISECONDS); if (onError) { MetricsUtil.meter(REQ_FAILED, methodName).mark(); - MetricsUtil.meter(REQ_FAILED, methodName, address).mark(); } return duration; diff --git a/ingester-prometheus-metrics/src/main/java/io/greptime/metrics/MetricsExporter.java b/ingester-prometheus-metrics/src/main/java/io/greptime/metrics/MetricsExporter.java index 0c161de..03a83a2 100644 --- a/ingester-prometheus-metrics/src/main/java/io/greptime/metrics/MetricsExporter.java +++ b/ingester-prometheus-metrics/src/main/java/io/greptime/metrics/MetricsExporter.java @@ -48,7 +48,7 @@ public MetricsExporter(int port, MetricRegistry dropwizardMetricRegistry) { public boolean init(Void opts) { if (this.started.compareAndSet(false, true)) { try { - this.server = new HTTPServer(new InetSocketAddress(this.port), this.prometheusMetricRegistry); + this.server = new HTTPServer(new InetSocketAddress(this.port), this.prometheusMetricRegistry, true); LOG.info("Metrics exporter started at `http://localhost:{}/metrics`", this.port); return true; } catch (IOException e) { diff --git a/ingester-protocol/src/main/java/io/greptime/BulkWriteClient.java b/ingester-protocol/src/main/java/io/greptime/BulkWriteClient.java index a9496d7..7386354 100644 --- a/ingester-protocol/src/main/java/io/greptime/BulkWriteClient.java +++ b/ingester-protocol/src/main/java/io/greptime/BulkWriteClient.java @@ -16,6 +16,7 @@ package io.greptime; +import com.codahale.metrics.Histogram; import com.codahale.metrics.Timer; import io.greptime.common.Display; import io.greptime.common.Endpoint; @@ -63,8 +64,8 @@ public boolean init(BulkWriteOptions opts) { this.opts = Ensures.ensureNonNull(opts, "null `BulkWriteClient.opts`"); this.routerClient = this.opts.getRouterClient(); Executor pool = this.opts.getAsyncPool(); - this.asyncPool = pool != null ? pool : new SerializingExecutor("buld_write_client"); - this.asyncPool = new MetricExecutor(this.asyncPool, "async_bulk_write_pool.time"); + this.asyncPool = pool != null ? pool : new SerializingExecutor("bulk_write_client"); + this.asyncPool = new MetricExecutor(this.asyncPool, "async_bulk_write_pool"); return true; } @@ -167,6 +168,8 @@ public String toString() { static final class InnerMetricHelper { static final Timer BULK_WRITE_PREPARE_TIME = MetricsUtil.timer("bulk_write_prepare_time"); static final Timer BULK_WRITE_PUT_TIME = MetricsUtil.timer("bulk_write_put_time"); + static final Histogram BULK_WRITE_PUT_ROWS = MetricsUtil.histogram("bulk_write_put_rows"); + static final Histogram BULK_WRITE_PUT_BYTES = MetricsUtil.histogram("bulk_write_put_bytes"); static Timer prepareTime() { return BULK_WRITE_PREPARE_TIME; @@ -175,6 +178,14 @@ static Timer prepareTime() { static Timer putTime() { return BULK_WRITE_PUT_TIME; } + + static Histogram putRows() { + return BULK_WRITE_PUT_ROWS; + } + + static Histogram putBytes() { + return BULK_WRITE_PUT_BYTES; + } } /** @@ -205,10 +216,19 @@ public Table.TableBufferRoot tableBufferRoot(int columnBufferSize) { @Override public CompletableFuture writeNext() throws Exception { Table.TableBufferRoot table = this.current.getAndSet(null); - if (table != null) { - // make sure the table is completed - table.complete(); + if (table == null) { + return Util.errorCf( + new IllegalStateException("No table buffer available - call `tableBufferRoot()` first")); } + // make sure the table is completed + table.complete(); + + String tableName = table.tableName(); + int rows = table.rowCount(); + long bytes = table.bytesUsed(); + + InnerMetricHelper.putRows().update(rows); + InnerMetricHelper.putBytes().update(bytes); // Check if the stream is ready if (!isStreamReady()) { @@ -224,13 +244,22 @@ public CompletableFuture writeNext() throws Exception { InnerMetricHelper.prepareTime().update(clock.duration(startPut), TimeUnit.MILLISECONDS); long startCall = clock.getTick(); + int inFlight = stage.numInFlight(); CompletableFuture future = stage.future(); future.whenComplete((r, t) -> { - InnerMetricHelper.putTime().update(clock.duration(startCall), TimeUnit.MILLISECONDS); + long duration = clock.duration(startCall); + InnerMetricHelper.putTime().update(duration, TimeUnit.MILLISECONDS); + if (Util.isBulkWriteLogging()) { + LOG.info( + "Bulk write completed - table={}, rows={}, bytes={}, duration={}ms, in-flight={} requests", + tableName, + rows, + bytes, + duration, + inFlight); + } }); - LOG.info("Write request sent successfully, in-flight requests: {}", stage.numInFlight()); - return future; }); } diff --git a/ingester-protocol/src/main/java/io/greptime/Util.java b/ingester-protocol/src/main/java/io/greptime/Util.java index c12734f..1a92832 100644 --- a/ingester-protocol/src/main/java/io/greptime/Util.java +++ b/ingester-protocol/src/main/java/io/greptime/Util.java @@ -39,12 +39,14 @@ * Util for GreptimeDB Ingester. */ public final class Util { - private static final AtomicBoolean RW_LOGGING; + private static final AtomicBoolean WRITE_LOGGING; + private static final AtomicBoolean BULK_WRITE_LOGGING; private static final int REPORT_PERIOD_MIN; private static final ScheduledExecutorService DISPLAY; static { - RW_LOGGING = new AtomicBoolean(SystemPropertyUtil.getBool(Keys.RW_LOGGING, false)); + WRITE_LOGGING = new AtomicBoolean(SystemPropertyUtil.getBool(Keys.WRITE_LOGGING, false)); + BULK_WRITE_LOGGING = new AtomicBoolean(SystemPropertyUtil.getBool(Keys.BULK_WRITE_LOGGING, true)); REPORT_PERIOD_MIN = SystemPropertyUtil.getInt(Keys.REPORT_PERIOD, 10); DISPLAY = ThreadPoolUtil.newScheduledBuilder() .poolName("display_self") @@ -59,23 +61,43 @@ public final class Util { } /** - * Whether to output concise read/write logs. + * Whether to output concise write logs. * * @return true or false */ - public static boolean isRwLogging() { - return RW_LOGGING.get(); + public static boolean isWriteLogging() { + return WRITE_LOGGING.get(); } /** - * See {@link #isRwLogging()} + * See {@link #isWriteLogging()} * - * Reset `rwLogging`, set to the opposite of the old value. + * Reset `writeLogging`, set to the opposite of the old value. * * @return old value */ - public static boolean resetRwLogging() { - return RW_LOGGING.getAndSet(!RW_LOGGING.get()); + public static boolean resetWriteLogging() { + return WRITE_LOGGING.getAndSet(!WRITE_LOGGING.get()); + } + + /** + * Whether to output concise bulk write logs. + * + * @return true or false + */ + public static boolean isBulkWriteLogging() { + return BULK_WRITE_LOGGING.get(); + } + + /** + * See {@link #isBulkWriteLogging()} + * + * Reset `bulkWriteLogging`, set to the opposite of the old value. + * + * @return old value + */ + public static boolean resetBulkWriteLogging() { + return BULK_WRITE_LOGGING.getAndSet(!BULK_WRITE_LOGGING.get()); } /** diff --git a/ingester-protocol/src/main/java/io/greptime/WriteClient.java b/ingester-protocol/src/main/java/io/greptime/WriteClient.java index fb8b6a1..12e8c28 100644 --- a/ingester-protocol/src/main/java/io/greptime/WriteClient.java +++ b/ingester-protocol/src/main/java/io/greptime/WriteClient.java @@ -72,7 +72,7 @@ public boolean init(WriteOptions opts) { this.routerClient = this.opts.getRouterClient(); Executor pool = this.opts.getAsyncPool(); this.asyncPool = pool != null ? pool : new SerializingExecutor("write_client"); - this.asyncPool = new MetricExecutor(this.asyncPool, "async_write_pool.time"); + this.asyncPool = new MetricExecutor(this.asyncPool, "async_write_pool"); this.writeLimiter = new DefaultWriteLimiter(this.opts.getMaxInFlightWritePoints(), this.opts.getLimitedPolicy()); return true; @@ -100,7 +100,7 @@ public CompletableFuture> write(Collection tables, W (r, e) -> { InnerMetricHelper.writeQps().mark(); if (r != null) { - if (Util.isRwLogging()) { + if (Util.isWriteLogging()) { LOG.info( "Write to {} with operation {}, duration={} ms, result={}.", Keys.DB_NAME, diff --git a/ingester-protocol/src/main/java/io/greptime/signal/BulkWriteLoggingSignalHandler.java b/ingester-protocol/src/main/java/io/greptime/signal/BulkWriteLoggingSignalHandler.java new file mode 100644 index 0000000..77637f0 --- /dev/null +++ b/ingester-protocol/src/main/java/io/greptime/signal/BulkWriteLoggingSignalHandler.java @@ -0,0 +1,42 @@ +/* + * Copyright 2023 Greptime Team + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.greptime.signal; + +import io.greptime.Util; +import io.greptime.common.SPI; +import io.greptime.common.signal.FileSignal; +import io.greptime.common.signal.FileSignalHelper; +import io.greptime.common.signal.SignalHandler; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@SPI(priority = 96) +public class BulkWriteLoggingSignalHandler implements SignalHandler { + + private static final Logger LOG = LoggerFactory.getLogger(BulkWriteLoggingSignalHandler.class); + + @Override + public void handle(String signalName) { + if (FileSignalHelper.ignoreSignal(FileSignal.BulkWriteLogging)) { + LOG.info("`BULK_WRITE_LOGGING`={}.", Util.isBulkWriteLogging()); + return; + } + + boolean oldValue = Util.resetBulkWriteLogging(); + LOG.info("Reset `BULK_WRITE_LOGGING` to {} triggered by signal: {}.", !oldValue, signalName); + } +} diff --git a/ingester-protocol/src/main/java/io/greptime/signal/RwLoggingSignalHandler.java b/ingester-protocol/src/main/java/io/greptime/signal/WriteLoggingSignalHandler.java similarity index 65% rename from ingester-protocol/src/main/java/io/greptime/signal/RwLoggingSignalHandler.java rename to ingester-protocol/src/main/java/io/greptime/signal/WriteLoggingSignalHandler.java index 95489e4..5725e60 100644 --- a/ingester-protocol/src/main/java/io/greptime/signal/RwLoggingSignalHandler.java +++ b/ingester-protocol/src/main/java/io/greptime/signal/WriteLoggingSignalHandler.java @@ -25,21 +25,21 @@ import org.slf4j.LoggerFactory; /** - * A signal handler that can reset RW_LOGGING by {@link Util#resetRwLogging()}. + * A signal handler that can reset WRITE_LOGGING by {@link Util#resetWriteLogging()}. */ @SPI(priority = 95) -public class RwLoggingSignalHandler implements SignalHandler { +public class WriteLoggingSignalHandler implements SignalHandler { - private static final Logger LOG = LoggerFactory.getLogger(RwLoggingSignalHandler.class); + private static final Logger LOG = LoggerFactory.getLogger(WriteLoggingSignalHandler.class); @Override public void handle(String signalName) { - if (FileSignalHelper.ignoreSignal(FileSignal.RwLogging)) { - LOG.info("`RW_LOGGING`={}.", Util.isRwLogging()); + if (FileSignalHelper.ignoreSignal(FileSignal.WriteLogging)) { + LOG.info("`WRITE_LOGGING`={}.", Util.isWriteLogging()); return; } - boolean oldValue = Util.resetRwLogging(); - LOG.info("Reset `RW_LOGGING` to {} triggered by signal: {}.", !oldValue, signalName); + boolean oldValue = Util.resetWriteLogging(); + LOG.info("Reset `WRITE_LOGGING` to {} triggered by signal: {}.", !oldValue, signalName); } } diff --git a/ingester-protocol/src/main/resources/META-INF/services/io.greptime.common.signal.SignalHandler b/ingester-protocol/src/main/resources/META-INF/services/io.greptime.common.signal.SignalHandler index 88e2b02..0c47938 100644 --- a/ingester-protocol/src/main/resources/META-INF/services/io.greptime.common.signal.SignalHandler +++ b/ingester-protocol/src/main/resources/META-INF/services/io.greptime.common.signal.SignalHandler @@ -1,4 +1,5 @@ io.greptime.signal.DisplaySignalHandler io.greptime.signal.MetricsSignalHandler -io.greptime.signal.RwLoggingSignalHandler +io.greptime.signal.WriteLoggingSignalHandler +io.greptime.signal.BulkWriteLoggingSignalHandler io.greptime.signal.ShowHelpSignalHandler