diff --git a/mysql/changelog.d/23505.added b/mysql/changelog.d/23505.added new file mode 100644 index 0000000000000..ab7aea1a62bcd --- /dev/null +++ b/mysql/changelog.d/23505.added @@ -0,0 +1 @@ +Add metadata lock blocking detection to activity query for MySQL 8.0+. \ No newline at end of file diff --git a/mysql/datadog_checks/mysql/activity.py b/mysql/datadog_checks/mysql/activity.py index eabf5dd1c00ac..7b13013d04836 100644 --- a/mysql/datadog_checks/mysql/activity.py +++ b/mysql/datadog_checks/mysql/activity.py @@ -89,22 +89,53 @@ """ BLOCKING_COLUMNS_MYSQL8 = """\ - ,blocking_thread.thread_id AS blocking_thread_id, - blocking_thread.processlist_id AS blocking_processlist_id + ,COALESCE(blocking_thread.thread_id, mdl_blocking_thread.thread_id) AS blocking_thread_id, + COALESCE(blocking_thread.processlist_id, mdl_blocking_thread.processlist_id) AS blocking_processlist_id, + mdl_waiting.OBJECT_TYPE AS mdl_object_type, + mdl_waiting.OBJECT_SCHEMA AS mdl_object_schema, + mdl_waiting.OBJECT_NAME AS mdl_object_name, + mdl_waiting.LOCK_TYPE AS mdl_waiting_lock_type, + mdl_blocking.LOCK_TYPE AS mdl_blocking_lock_type """ BLOCKING_JOINS_MYSQL8 = """\ - LEFT JOIN performance_schema.data_lock_waits AS lock_waits ON thread_a.thread_id = lock_waits.requesting_thread_id - LEFT JOIN performance_schema.threads AS blocking_thread ON lock_waits.blocking_thread_id = blocking_thread.thread_id + LEFT JOIN performance_schema.data_lock_waits AS lock_waits + ON thread_a.thread_id = lock_waits.requesting_thread_id + LEFT JOIN performance_schema.threads AS blocking_thread + ON lock_waits.blocking_thread_id = blocking_thread.thread_id + LEFT JOIN performance_schema.metadata_locks AS mdl_waiting + ON thread_a.thread_id = mdl_waiting.OWNER_THREAD_ID + AND mdl_waiting.LOCK_STATUS = 'PENDING' + LEFT JOIN performance_schema.metadata_locks AS mdl_blocking + ON mdl_waiting.OBJECT_TYPE = mdl_blocking.OBJECT_TYPE + AND mdl_waiting.OBJECT_SCHEMA <=> mdl_blocking.OBJECT_SCHEMA + AND mdl_waiting.OBJECT_NAME <=> mdl_blocking.OBJECT_NAME + AND mdl_blocking.LOCK_STATUS = 'GRANTED' + AND mdl_waiting.OWNER_THREAD_ID != mdl_blocking.OWNER_THREAD_ID + LEFT JOIN performance_schema.threads AS mdl_blocking_thread + ON mdl_blocking.OWNER_THREAD_ID = mdl_blocking_thread.thread_id """ IDLE_BLOCKERS_SUBQUERY_MYSQL8 = """\ OR - -- Include idle sessions that are blocking others + -- Include idle sessions that are blocking others via InnoDB data locks thread_a.thread_id IN ( SELECT blocking_thread_id FROM performance_schema.data_lock_waits ) + OR + -- Include idle sessions that are blocking others via metadata locks + thread_a.thread_id IN ( + SELECT mdl_granted.OWNER_THREAD_ID + FROM performance_schema.metadata_locks AS mdl_pending + JOIN performance_schema.metadata_locks AS mdl_granted + ON mdl_pending.OBJECT_TYPE = mdl_granted.OBJECT_TYPE + AND mdl_pending.OBJECT_SCHEMA <=> mdl_granted.OBJECT_SCHEMA + AND mdl_pending.OBJECT_NAME <=> mdl_granted.OBJECT_NAME + AND mdl_granted.LOCK_STATUS = 'GRANTED' + AND mdl_pending.LOCK_STATUS = 'PENDING' + AND mdl_pending.OWNER_THREAD_ID != mdl_granted.OWNER_THREAD_ID + ) """ BLOCKING_COLUMNS_MYSQL7 = """\ diff --git a/mysql/tests/test_query_activity.py b/mysql/tests/test_query_activity.py index 39dde133aa97b..b0cb19330e23d 100644 --- a/mysql/tests/test_query_activity.py +++ b/mysql/tests/test_query_activity.py @@ -667,6 +667,97 @@ def run_second_deadlock_query(conn, event1, event2): ) +@pytest.mark.skipif( + MYSQL_FLAVOR == 'mariadb' or MYSQL_VERSION_PARSED < parse_version('8.0'), + reason='MDL blocking detection requires MySQL 8.0+ with metadata_locks instrument enabled by default', +) +@pytest.mark.integration +@pytest.mark.usefixtures('dd_environment') +def test_mdl_blocking_activity(aggregator, dbm_instance, dd_run_check, root_conn): + """Verify that metadata lock contention appears in the activity payload with blocking relationships.""" + config = deepcopy(dbm_instance) + config['query_activity']['collect_blocking_queries'] = True + # Disable all metrics that could query information_schema or touch user table + # metadata — those queries acquire SHARED_READ metadata locks which would get + # queued behind the ALTER TABLE's pending EXCLUSIVE lock, deadlocking the check. + config['options'] = { + 'extra_performance_metrics': False, + 'replication': False, + 'extra_status_metrics': False, + 'extra_innodb_metrics': False, + 'schema_size_metrics': False, + 'table_size_metrics': False, + 'system_table_size_metrics': False, + 'table_rows_stats_metrics': False, + } + config['index_metrics'] = {'enabled': False} + config['only_custom_queries'] = True + check = MySql(CHECK_NAME, {}, instances=[config]) + + MDL_TABLE = 'testdb.mdl_test_table' + + with closing(root_conn.cursor()) as cur: + # Ensure the consumer is enabled — other tests in this session may disable it. + cur.execute("UPDATE performance_schema.setup_consumers SET enabled='YES' WHERE name = 'events_waits_current'") + cur.execute('DROP TABLE IF EXISTS {}'.format(MDL_TABLE)) + cur.execute('CREATE TABLE {} (id INT PRIMARY KEY, val VARCHAR(50))'.format(MDL_TABLE)) + cur.execute('INSERT INTO {} VALUES (1, %s)'.format(MDL_TABLE), ('hello',)) + # PyMySQL defaults to autocommit=False, so the DDL/DML above opened an + # implicit transaction that holds a SHARED_WRITE metadata lock on the table. + # Commit to release it before we set up the intentional MDL contention. + root_conn.commit() + + root_password = 'mypass' if MYSQL_FLAVOR == 'percona' or MYSQL_REPLICATION in ('group', 'hybrid') else None + holder_conn = pymysql.connect(host=HOST, port=PORT, user='root', password=root_password) + ddl_conn = pymysql.connect(host=HOST, port=PORT, user='root', password=root_password) + ddl_ready = Event() + + def _hold_shared_mdl(conn): + conn.begin() + conn.cursor().execute('SELECT * FROM {}'.format(MDL_TABLE)) + + def _run_ddl(conn, ready_event): + ready_event.set() + try: + conn.cursor().execute('ALTER TABLE {} ADD COLUMN new_col INT'.format(MDL_TABLE)) + except Exception: + pass + + executor = ThreadPoolExecutor(2) + try: + executor.submit(_hold_shared_mdl, holder_conn) + time.sleep(0.2) + executor.submit(_run_ddl, ddl_conn, ddl_ready) + ddl_ready.wait(timeout=5) + time.sleep(0.5) + + dd_run_check(check) + + dbm_activity = aggregator.get_event_platform_events("dbm-activity") + assert dbm_activity, "should have collected at least one activity payload" + + all_rows = [row for event in dbm_activity for row in event['mysql_activity']] + mdl_blocked_rows = [r for r in all_rows if r.get('mdl_object_name') == 'mdl_test_table'] + assert mdl_blocked_rows, "should have at least one activity row with MDL blocking context; all rows: {}".format( + [(r.get('sql_text', '?')[:60], r.get('mdl_object_name')) for r in all_rows] + ) + + mdl_row = mdl_blocked_rows[0] + assert mdl_row['mdl_object_type'] == 'TABLE' + assert mdl_row['mdl_object_schema'] == 'testdb' + assert mdl_row.get('blocking_thread_id'), "MDL blocked row should have a blocking_thread_id" + finally: + holder_conn.commit() + holder_conn.close() + # Wait for the ALTER TABLE to complete now that the MDL holder released. + # Must shutdown before closing ddl_conn — closing a socket that another + # thread is recv()ing on deadlocks on macOS. + executor.shutdown(wait=True) + ddl_conn.close() + with closing(root_conn.cursor()) as cur: + cur.execute('DROP TABLE IF EXISTS {}'.format(MDL_TABLE)) + + def _get_conn_for_user(user, _autocommit=False): return pymysql.connect(host=HOST, port=PORT, user=user, password=user, autocommit=_autocommit) diff --git a/nifi/README.md b/nifi/README.md index 6dcb6f762499d..db5403c134d7e 100644 --- a/nifi/README.md +++ b/nifi/README.md @@ -31,56 +31,6 @@ No additional installation is needed on your server. 3. [Restart the Agent][5]. -#### Log collection - -1. Collecting logs is disabled by default in the Datadog Agent. Enable it in your `datadog.yaml` file: - - ```yaml - logs_enabled: true - ``` - -2. Uncomment and edit the logs configuration block in your `nifi.d/conf.yaml` file. NiFi produces several log files; configure the ones relevant to your environment: - - ```yaml - logs: - - type: file - path: /opt/nifi/logs/nifi-app.log - source: nifi - service: nifi - - type: file - path: /opt/nifi/logs/nifi-user.log - source: nifi - service: nifi - - type: file - path: /opt/nifi/logs/nifi-bootstrap.log - source: nifi - service: nifi - - type: file - path: /opt/nifi/logs/nifi-request.log - source: nifi - service: nifi - tags: - - "log_type:request" - ``` - - The `log_type:request` tag on the request log entry routes HTTP access logs through a dedicated parsing pipeline that extracts standard HTTP attributes (method, status code, URL path, client IP). - - NiFi is a Java application that produces multiline stack traces. To aggregate them into single log events, add a `log_processing_rules` entry to the application log: - - ```yaml - logs: - - type: file - path: /opt/nifi/logs/nifi-app.log - source: nifi - service: nifi - log_processing_rules: - - type: multi_line - name: java_stack_trace - pattern: \d{4}-\d{2}-\d{2} - ``` - -3. [Restart the Agent][5]. - ### Validation [Run the Agent's status subcommand][6] and look for `nifi` under the Checks section. diff --git a/nifi/assets/configuration/spec.yaml b/nifi/assets/configuration/spec.yaml index d55c389aef416..a9e7ab250ffc9 100644 --- a/nifi/assets/configuration/spec.yaml +++ b/nifi/assets/configuration/spec.yaml @@ -87,17 +87,3 @@ files: path: /opt/nifi/logs/nifi-app.log source: nifi service: nifi - - type: file - path: /opt/nifi/logs/nifi-user.log - source: nifi - service: nifi - - type: file - path: /opt/nifi/logs/nifi-bootstrap.log - source: nifi - service: nifi - - type: file - path: /opt/nifi/logs/nifi-request.log - source: nifi - service: nifi - tags: - - "log_type:request" diff --git a/nifi/assets/dataflows.yaml b/nifi/assets/dataflows.yaml deleted file mode 100644 index 9e1235ec477b6..0000000000000 --- a/nifi/assets/dataflows.yaml +++ /dev/null @@ -1,13 +0,0 @@ -provides: - - id: nifi-metrics - always_on: true - data_type: metrics - direction: inbound - - id: nifi-events - always_on: true - data_type: events - direction: inbound - - id: nifi-logs - always_on: false - data_type: logs - direction: inbound diff --git a/nifi/assets/logs/nifi.yaml b/nifi/assets/logs/nifi.yaml deleted file mode 100644 index 0145b69ddfdf8..0000000000000 --- a/nifi/assets/logs/nifi.yaml +++ /dev/null @@ -1,178 +0,0 @@ -id: nifi -metric_id: nifi -backend_only: false -facets: - - groups: - - Web Access - name: Method - path: http.method - source: log - - groups: - - Web Access - name: Status Code - path: http.status_code - source: log - - groups: - - Web Access - name: URL Host - path: http.url_details.host - source: log - - groups: - - Web Access - name: URL Path - path: http.url_details.path - source: log - - groups: - - Web Access - name: URL Port - path: http.url_details.port - source: log - - groups: - - Web Access - name: URL scheme - path: http.url_details.scheme - source: log - - groups: - - Web Access - name: Browser - path: http.useragent_details.browser.family - source: log - - groups: - - Web Access - name: Device - path: http.useragent_details.device.family - source: log - - groups: - - Web Access - name: OS - path: http.useragent_details.os.family - source: log - - groups: - - Source Code - name: Logger Name - path: logger.name - source: log - - groups: - - Source Code - name: Thread Name - path: logger.thread_name - source: log - - groups: - - Web Access - name: Client IP - path: network.client.ip - source: log -pipeline: - type: pipeline - name: NiFi - enabled: true - filter: - query: source:nifi - processors: - - type: pipeline - name: NiFi application logs - enabled: true - filter: - query: -log_type:request - processors: - - type: grok-parser - name: Parsing NiFi application logs - enabled: true - source: message - samples: - - "2026-03-20 18:08:33,065 INFO [main] o.a.nifi.NiFi NiFi PID is 1" - - "2026-03-20 18:08:33,065 ERROR [Timer-Driven Process Thread-1] o.a.n.p.standard.PutFile PutFile[id=abc] Failed to write" - - "2026-03-20 18:08:34,100 WARN [Flow Service Tasks Thread-1] o.a.n.c.s.impl.WriteAheadFlowFileRepository Encountered non-critical error" - grok: - supportRules: | - _date %{date("yyyy-MM-dd HH:mm:ss,SSS"):timestamp} - _level %{word:level} - _thread %{regex("[^]]*"):logger.thread_name} - _logger %{notSpace:logger.name} - matchRules: | - nifi_app %{_date} %{_level} \[%{_thread}\] %{_logger} %{data:message} - - type: date-remapper - name: Define `timestamp` as the official date of the log - enabled: true - sources: - - timestamp - - type: status-remapper - name: Define `level` as the official status of the log - enabled: true - sources: - - level - - type: message-remapper - name: Define `message` as the official message of the log - enabled: true - sources: - - message - - type: pipeline - name: NiFi HTTP request logs - enabled: true - filter: - query: log_type:request - processors: - - type: grok-parser - name: Parsing NiFi HTTP request logs - enabled: true - source: message - samples: - - '192.168.1.100 - admin [20/Mar/2026:18:08:33 +0000] "GET /nifi-api/flow/status HTTP/1.1" 200 1234 "-" "python-requests/2.31.0"' - - '10.0.0.50 - - [20/Mar/2026:18:10:45 +0000] "POST /nifi-api/process-groups/root/process-groups HTTP/1.1" 404 892 "https://localhost:8443/nifi/" "Mozilla/5.0"' - grok: - supportRules: | - _auth %{notSpace:http.auth:nullIf("-")} - _bytes_written %{integer:network.bytes_written} - _client_ip %{ipOrHost:network.client.ip} - _version HTTP\/%{regex("\\d+\\.\\d+"):http.version} - _url %{notSpace:http.url} - _ident %{notSpace:http.ident:nullIf("-")} - _user_agent %{regex("[^\\\"]*"):http.useragent} - _referer %{notSpace:http.referer} - _status_code %{integer:http.status_code} - _method %{word:http.method} - _date_access %{date("dd/MMM/yyyy:HH:mm:ss Z"):date_access} - matchRules: | - access.common %{_client_ip} %{_ident} %{_auth} \[%{_date_access}\] "(?>%{_method} |)%{_url}(?> %{_version}|)" %{_status_code} (?>%{_bytes_written}|-) - - access.combined %{access.common} "%{_referer}" "%{_user_agent}" - - type: url-parser - name: Extract URL details - enabled: true - sources: - - http.url - target: http.url_details - - type: user-agent-parser - name: Extract user agent details - enabled: true - sources: - - http.useragent - target: http.useragent_details - encoded: false - - type: date-remapper - name: Define `date_access` as the official date of the log - enabled: true - sources: - - date_access - - type: category-processor - name: Categorize status code - enabled: true - categories: - - filter: - query: "@http.status_code:[200 TO 299]" - name: OK - - filter: - query: "@http.status_code:[300 TO 399]" - name: notice - - filter: - query: "@http.status_code:[400 TO 499]" - name: warning - - filter: - query: "@http.status_code:[500 TO 599]" - name: error - target: http.status_category - - type: status-remapper - name: Define `http.status_category` as the official status of the log - enabled: true - sources: - - http.status_category diff --git a/nifi/assets/logs/nifi_tests.yaml b/nifi/assets/logs/nifi_tests.yaml deleted file mode 100644 index dacd133ff8f3c..0000000000000 --- a/nifi/assets/logs/nifi_tests.yaml +++ /dev/null @@ -1,117 +0,0 @@ -id: "nifi" -tests: - - - sample: "2026-03-20 18:08:33,065 INFO [main] o.a.nifi.NiFi NiFi PID is 1" - result: - custom: - level: "INFO" - logger: - name: "o.a.nifi.NiFi" - thread_name: "main" - timestamp: 1774030113065 - message: "NiFi PID is 1" - status: "info" - tags: - - "source:LOGS_SOURCE" - timestamp: 1774030113065 - - - sample: "2026-03-20 18:08:33,065 ERROR [Timer-Driven Process Thread-1] o.a.n.p.standard.PutFile PutFile[id=abc] Failed to write" - result: - custom: - level: "ERROR" - logger: - name: "o.a.n.p.standard.PutFile" - thread_name: "Timer-Driven Process Thread-1" - timestamp: 1774030113065 - message: "PutFile[id=abc] Failed to write" - status: "error" - tags: - - "source:LOGS_SOURCE" - timestamp: 1774030113065 - - - sample: "2026-03-20 18:08:34,100 WARN [Flow Service Tasks Thread-1] o.a.n.c.s.impl.WriteAheadFlowFileRepository Encountered non-critical error" - result: - custom: - level: "WARN" - logger: - name: "o.a.n.c.s.impl.WriteAheadFlowFileRepository" - thread_name: "Flow Service Tasks Thread-1" - timestamp: 1774030114100 - message: "Encountered non-critical error" - status: "warn" - tags: - - "source:LOGS_SOURCE" - timestamp: 1774030114100 - - - sample: '192.168.1.100 - admin [20/Mar/2026:18:08:33 +0000] "GET /nifi-api/flow/status HTTP/1.1" 200 1234 "-" "python-requests/2.31.0"' - tags: - - "log_type:request" - result: - custom: - date_access: 1774030113000 - http: - auth: "admin" - method: "GET" - referer: "-" - status_category: "OK" - status_code: 200 - url: "/nifi-api/flow/status" - url_details: - path: "/nifi-api/flow/status" - useragent: "python-requests/2.31.0" - useragent_details: - browser: - family: "Python Requests" - major: "2" - minor: "31" - device: - category: "Other" - family: "Other" - os: - family: "Other" - version: "1.1" - network: - bytes_written: 1234 - client: - ip: "192.168.1.100" - message: '192.168.1.100 - admin [20/Mar/2026:18:08:33 +0000] "GET /nifi-api/flow/status HTTP/1.1" 200 1234 "-" "python-requests/2.31.0"' - status: "ok" - tags: - - "source:LOGS_SOURCE" - - "log_type:request" - timestamp: 1774030113000 - - - sample: '10.0.0.50 - - [20/Mar/2026:18:10:45 +0000] "POST /nifi-api/process-groups/root/process-groups HTTP/1.1" 404 892 "https://localhost:8443/nifi/" "Mozilla/5.0"' - tags: - - "log_type:request" - result: - custom: - date_access: 1774030245000 - http: - method: "POST" - referer: "https://localhost:8443/nifi/" - status_category: "warning" - status_code: 404 - url: "/nifi-api/process-groups/root/process-groups" - url_details: - path: "/nifi-api/process-groups/root/process-groups" - useragent: "Mozilla/5.0" - useragent_details: - browser: - family: "Other" - device: - category: "Other" - family: "Other" - os: - family: "Other" - version: "1.1" - network: - bytes_written: 892 - client: - ip: "10.0.0.50" - message: '10.0.0.50 - - [20/Mar/2026:18:10:45 +0000] "POST /nifi-api/process-groups/root/process-groups HTTP/1.1" 404 892 "https://localhost:8443/nifi/" "Mozilla/5.0"' - status: "warn" - tags: - - "source:LOGS_SOURCE" - - "log_type:request" - timestamp: 1774030245000 diff --git a/nifi/assets/saved_views/nifi_access.json b/nifi/assets/saved_views/nifi_access.json deleted file mode 100644 index 1aac748133d47..0000000000000 --- a/nifi/assets/saved_views/nifi_access.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "name": "NiFi Access Logs", - "type": "logs", - "page": "stream", - "query": "source:nifi log_type:request", - "timerange": { - "interval_ms": 3600000 - }, - "visible_facets": [ - "source", - "host", - "service", - "status", - "@http.status_code", - "@http.method", - "@http.url_details.path", - "@network.client.ip" - ], - "options": { - "columns": [ - "status", - "@http.method", - "@http.url_details.path", - "@http.status_code" - ], - "show_date_column": true, - "show_message_column": true, - "message_display": "inline", - "show_timeline": true - } -} diff --git a/nifi/assets/saved_views/nifi_errors.json b/nifi/assets/saved_views/nifi_errors.json deleted file mode 100644 index e7bac1e1b986e..0000000000000 --- a/nifi/assets/saved_views/nifi_errors.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "name": "NiFi Error Logs", - "type": "logs", - "page": "stream", - "query": "source:nifi status:(error OR warn OR warning)", - "timerange": { - "interval_ms": 3600000 - }, - "visible_facets": [ - "source", - "host", - "service", - "status", - "@logger.name", - "@logger.thread_name" - ], - "options": { - "columns": [ - "status", - "host", - "@logger.name" - ], - "show_date_column": true, - "show_message_column": true, - "message_display": "inline", - "show_timeline": true - } -} diff --git a/nifi/changelog.d/23302.added b/nifi/changelog.d/23302.added deleted file mode 100644 index 4bae1b2d051ac..0000000000000 --- a/nifi/changelog.d/23302.added +++ /dev/null @@ -1 +0,0 @@ -Add log pipeline, saved views, and dataflows for NiFi log collection \ No newline at end of file diff --git a/nifi/datadog_checks/nifi/data/conf.yaml.example b/nifi/datadog_checks/nifi/data/conf.yaml.example index eb01cd6da9516..7742e60c5a67b 100644 --- a/nifi/datadog_checks/nifi/data/conf.yaml.example +++ b/nifi/datadog_checks/nifi/data/conf.yaml.example @@ -480,17 +480,3 @@ instances: # path: /opt/nifi/logs/nifi-app.log # source: nifi # service: nifi -# - type: file -# path: /opt/nifi/logs/nifi-user.log -# source: nifi -# service: nifi -# - type: file -# path: /opt/nifi/logs/nifi-bootstrap.log -# source: nifi -# service: nifi -# - type: file -# path: /opt/nifi/logs/nifi-request.log -# source: nifi -# service: nifi -# tags: -# - log_type:request