Switch to path filtering instead of conformance markers to run conformance tests (#45)

lmolkova · web-flow · commit 258a3cee864f · 2026-05-19T12:32:15.000-07:00
* Switch to path filtering instead of conformance markers to run conformance tests
diff --git a/AGENTS.md b/AGENTS.md
@@ -119,10 +119,10 @@ via Weaver live-check. Each scenario module defines a subclass of
 `expected_spans`, `expected_metrics`, and implements
 `run(*, tracer_provider, meter_provider, logger_provider, vcr)`.
 
-`tests/test_conformance.py` must set `pytestmark = pytest.mark.conformance` at
-module level.
-
-Run via `tox -e py312-test-instrumentation-<pkg>-conformance`.
+Run via `tox -e py312-test-instrumentation-<pkg>-conformance`. The
+`*-conformance` tox envs target `tests/test_conformance.py` directly; the
+regular `*-{oldest,latest}` envs `--ignore` it so they don't need the
+OTLP/gRPC exporter or `weaver_live_check`.
 
 The parallel PR-review rules live in
 [`.github/instructions/instrumentation.instructions.md`](.github/instructions/instrumentation.instructions.md)
diff --git a/instrumentation/opentelemetry-instrumentation-anthropic/tests/test_conformance.py b/instrumentation/opentelemetry-instrumentation-anthropic/tests/test_conformance.py
@@ -21,8 +21,6 @@
 from .conformance.inference import InferenceScenario
 from .conformance.tool_calling import ToolCallingScenario
 
-pytestmark = pytest.mark.conformance
-
 _LEGACY_SYSTEM_SKIP = pytest.mark.skip(
     reason="anthropic emits legacy gen_ai.system in experimental mode"
 )
diff --git a/instrumentation/opentelemetry-instrumentation-langchain/tests/test_conformance.py b/instrumentation/opentelemetry-instrumentation-langchain/tests/test_conformance.py
@@ -20,8 +20,6 @@
 
 from .conformance.inference import InferenceScenario
 
-pytestmark = pytest.mark.conformance
-
 
 @pytest.mark.parametrize(
     "scenario",
diff --git a/instrumentation/opentelemetry-instrumentation-openai-v2/tests/test_conformance.py b/instrumentation/opentelemetry-instrumentation-openai-v2/tests/test_conformance.py
@@ -27,8 +27,6 @@
 from .conformance.inference import InferenceScenario
 from .conformance.tool_calling import ToolCallingScenario
 
-pytestmark = pytest.mark.conformance
-
 
 @pytest.mark.parametrize(
     "scenario",
diff --git a/pytest.ini b/pytest.ini
@@ -2,5 +2,3 @@
 addopts = -rs -v
 log_cli = true
 log_cli_level = warning
-markers =
-    conformance: GenAI semconv conformance scenario (run via the *-conformance tox envs)
diff --git a/tox.ini b/tox.ini
@@ -146,23 +146,23 @@ commands_pre =
   coverage: python {toxinidir}/scripts/eachdist.py install --editable
 
 commands =
-  test-instrumentation-openai-v2-{oldest,latest}: pytest -m "not conformance" {toxinidir}/instrumentation/opentelemetry-instrumentation-openai-v2/tests {posargs}
-  test-instrumentation-openai-v2-conformance: pytest -m conformance {toxinidir}/instrumentation/opentelemetry-instrumentation-openai-v2/tests --vcr-record=none {posargs}
+  test-instrumentation-openai-v2-{oldest,latest}: pytest --ignore={toxinidir}/instrumentation/opentelemetry-instrumentation-openai-v2/tests/test_conformance.py {toxinidir}/instrumentation/opentelemetry-instrumentation-openai-v2/tests {posargs}
+  test-instrumentation-openai-v2-conformance: pytest {toxinidir}/instrumentation/opentelemetry-instrumentation-openai-v2/tests/test_conformance.py --vcr-record=none {posargs}
   lint-instrumentation-openai-v2: sh -c "cd instrumentation && ruff check opentelemetry-instrumentation-openai-v2"
   test-instrumentation-openai_agents-v2: pytest {toxinidir}/instrumentation/opentelemetry-instrumentation-openai-agents-v2/tests {posargs}
   lint-instrumentation-openai_agents-v2: sh -c "cd instrumentation && ruff check opentelemetry-instrumentation-openai-agents-v2"
 
   test-instrumentation-google-genai: pytest {toxinidir}/instrumentation/opentelemetry-instrumentation-google-genai/tests --vcr-record=none {posargs}
   lint-instrumentation-google-genai: sh -c "cd instrumentation && ruff check opentelemetry-instrumentation-google-genai"
 
-  test-instrumentation-anthropic-{oldest,latest}: pytest -m "not conformance" {toxinidir}/instrumentation/opentelemetry-instrumentation-anthropic/tests --vcr-record=none {posargs}
-  test-instrumentation-anthropic-conformance: pytest -m conformance {toxinidir}/instrumentation/opentelemetry-instrumentation-anthropic/tests --vcr-record=none {posargs}
+  test-instrumentation-anthropic-{oldest,latest}: pytest --ignore={toxinidir}/instrumentation/opentelemetry-instrumentation-anthropic/tests/test_conformance.py {toxinidir}/instrumentation/opentelemetry-instrumentation-anthropic/tests --vcr-record=none {posargs}
+  test-instrumentation-anthropic-conformance: pytest {toxinidir}/instrumentation/opentelemetry-instrumentation-anthropic/tests/test_conformance.py --vcr-record=none {posargs}
   lint-instrumentation-anthropic: sh -c "cd instrumentation && ruff check opentelemetry-instrumentation-anthropic"
 
   test-instrumentation-claude-agent-sdk: pytest {toxinidir}/instrumentation/opentelemetry-instrumentation-claude-agent-sdk/tests --vcr-record=none {posargs}
   lint-instrumentation-claude-agent-sdk: sh -c "cd instrumentation && ruff check opentelemetry-instrumentation-claude-agent-sdk"
 
-  test-instrumentation-langchain-conformance: pytest -m conformance {toxinidir}/instrumentation/opentelemetry-instrumentation-langchain/tests --vcr-record=none {posargs}
+  test-instrumentation-langchain-conformance: pytest {toxinidir}/instrumentation/opentelemetry-instrumentation-langchain/tests/test_conformance.py --vcr-record=none {posargs}
   lint-instrumentation-langchain: sh -c "cd instrumentation && ruff check opentelemetry-instrumentation-langchain"
 
   lint-instrumentation-weaviate: sh -c "cd instrumentation && ruff check opentelemetry-instrumentation-weaviate"
diff --git a/util/opentelemetry-test-util-genai/src/opentelemetry/test_util_genai/conformance.py b/util/opentelemetry-test-util-genai/src/opentelemetry/test_util_genai/conformance.py
@@ -5,18 +5,17 @@
 
 Intended call shape from a per-package ``tests/test_conformance.py``::
 
-    pytestmark = pytest.mark.conformance
-
     @pytest.mark.parametrize(
         "scenario", [InferenceScenario(), ToolCallingScenario()]
     )
     def test_conformance(scenario, vcr, weaver_live_check):
         report = run_conformance(scenario, vcr=vcr, weaver=weaver_live_check)
         # Optionally layer lib-specific assertions on `report` here.
 
-The module-level ``pytestmark = pytest.mark.conformance`` is required: the
-``*-conformance`` tox envs select these tests via ``-m conformance``, and the
-regular ``*-{oldest,latest}`` envs deselect them via ``-m "not conformance"``.
+The ``*-conformance`` tox envs point pytest directly at
+``tests/test_conformance.py``; the regular ``*-{oldest,latest}`` envs
+``--ignore`` it. The OTLP/gRPC exporter and ``weaver_live_check`` only need
+to be installed in the conformance envs.
 
 Each ``tests/conformance/<op>.py`` defines a :class:`Scenario` subclass with:
 
@@ -40,15 +39,6 @@ def test_conformance(scenario, vcr, weaver_live_check):
 from pathlib import Path
 from typing import Any, ClassVar
 
-from opentelemetry.exporter.otlp.proto.grpc._log_exporter import (
-    OTLPLogExporter,
-)
-from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import (
-    OTLPMetricExporter,
-)
-from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
-    OTLPSpanExporter,
-)
 from opentelemetry.sdk._logs import LoggerProvider
 from opentelemetry.sdk._logs.export import SimpleLogRecordProcessor
 from opentelemetry.sdk.metrics import MeterProvider
@@ -105,6 +95,19 @@ def validate(self, report: LiveCheckReport) -> None:
 def _build_providers(
     endpoint: str,
 ) -> tuple[TracerProvider, MeterProvider, LoggerProvider]:
+    # OTLP/gRPC exporters are only installed in the *-conformance tox envs
+    # (see dev-requirements-conformance.txt). Import lazily so this module
+    # stays importable in regular test envs that exclude conformance tests.
+    from opentelemetry.exporter.otlp.proto.grpc._log_exporter import (  # noqa: PLC0415
+        OTLPLogExporter,
+    )
+    from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import (  # noqa: PLC0415
+        OTLPMetricExporter,
+    )
+    from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (  # noqa: PLC0415
+        OTLPSpanExporter,
+    )
+
     tracer_provider = TracerProvider()
     tracer_provider.add_span_processor(
         SimpleSpanProcessor(OTLPSpanExporter(endpoint=endpoint, insecure=True))
diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/completion_hook.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/completion_hook.py
@@ -14,7 +14,7 @@
 
 import logging
 from os import environ
-from typing import Any, Protocol, cast, runtime_checkable
+from typing import Any, Protocol, runtime_checkable
 
 from opentelemetry._logs import LogRecord
 from opentelemetry.trace import Span
@@ -120,15 +120,15 @@ def load_completion_hook() -> CompletionHook:
     if not hook_name:
         return _NoOpCompletionHook()
 
-    for entry_point in entry_points(  # pyright: ignore[reportUnknownVariableType]
+    for entry_point in entry_points(
         group="opentelemetry_genai_completion_hook"
     ):
-        name = cast(str, entry_point.name)  # pyright: ignore[reportUnknownMemberType]
+        name = entry_point.name
         try:
             if hook_name != name:
                 continue
 
-            hook = entry_point.load()()  # pyright: ignore[reportUnknownVariableType, reportUnknownMemberType]
+            hook = entry_point.load()()
             if not isinstance(hook, CompletionHook):
                 _logger.debug(
                     "%s is not a valid CompletionHook. Using noop", name

Original file line number	Diff line number	Diff line change
`@@ -21,8 +21,6 @@`
`21`	`21`	`from .conformance.inference import InferenceScenario`
`22`	`22`	`from .conformance.tool_calling import ToolCallingScenario`
`23`	`23`
`24`		`-pytestmark = pytest.mark.conformance`
`25`		`-`
`26`	`24`	`_LEGACY_SYSTEM_SKIP = pytest.mark.skip(`
`27`	`25`	`reason="anthropic emits legacy gen_ai.system in experimental mode"`
`28`	`26`	`)`