From 130a6c95e4a97909d0af3b9714e4bc9b23ff6e64 Mon Sep 17 00:00:00 2001 From: Chris Huber Date: Tue, 12 May 2026 00:04:33 -0400 Subject: [PATCH] feat: expose event source inventory capabilities --- .../EventImport/Handlers/DiceFm/DiceFm.php | 10 ++- .../Handlers/EventFlyer/EventFlyer.php | 15 +++- .../Handlers/EventImportHandler.php | 51 +++++++++++++ .../SingleRecurring/SingleRecurring.php | 11 ++- .../Handlers/Ticketmaster/Ticketmaster.php | 17 ++++- .../WebScraper/UniversalWebScraper.php | 26 +++++-- .../Unit/SourceInventoryCapabilitiesTest.php | 74 +++++++++++++++++++ 7 files changed, 188 insertions(+), 16 deletions(-) create mode 100644 tests/Unit/SourceInventoryCapabilitiesTest.php diff --git a/inc/Steps/EventImport/Handlers/DiceFm/DiceFm.php b/inc/Steps/EventImport/Handlers/DiceFm/DiceFm.php index a7e3037..d766404 100644 --- a/inc/Steps/EventImport/Handlers/DiceFm/DiceFm.php +++ b/inc/Steps/EventImport/Handlers/DiceFm/DiceFm.php @@ -45,6 +45,14 @@ public function __construct() { ); } + protected function getSourceInventoryCapabilities(): array { + return array( + 'stable_ids' => true, + 'supports_query_shards' => true, + 'bounded_by' => array( 'city', 'country' ), + ); + } + /** * Execute Dice FM event import with flat parameter structure */ @@ -159,7 +167,7 @@ protected function executeFetch( array $config, ExecutionContext $context ): arr 'flow_id' => $context->getFlowId(), 'original_title' => $standardized_event['title'], 'event_identifier' => $event_identifier, - 'item_identifier' => $event_identifier, + 'item_identifier' => $event_identifier, 'import_timestamp' => time(), '_engine_data' => $engine_data, ), diff --git a/inc/Steps/EventImport/Handlers/EventFlyer/EventFlyer.php b/inc/Steps/EventImport/Handlers/EventFlyer/EventFlyer.php index 3446946..6592a88 100644 --- a/inc/Steps/EventImport/Handlers/EventFlyer/EventFlyer.php +++ b/inc/Steps/EventImport/Handlers/EventFlyer/EventFlyer.php @@ -43,6 +43,15 @@ public function __construct() { ); } + protected function getSourceInventoryCapabilities(): array { + return array( + 'can_enumerate' => true, + 'stable_ids' => true, + 'has_total_count' => true, + 'inventory_source' => 'uploaded_files', + ); + } + protected function executeFetch( array $config, ExecutionContext $context ): array { $context->log( 'info', 'EventFlyer: Starting import' ); @@ -98,8 +107,8 @@ protected function executeFetch( array $config, ExecutionContext $context ): arr // Add image context to engine data for vision processing. $engine_data['image_file_path'] = $image_file['persistent_path']; - $upload_dir = wp_upload_dir(); - $engine_data['image_url'] = str_replace( $upload_dir['basedir'], $upload_dir['baseurl'], $image_file['persistent_path'] ); + $upload_dir = wp_upload_dir(); + $engine_data['image_url'] = str_replace( $upload_dir['basedir'], $upload_dir['baseurl'], $image_file['persistent_path'] ); $this->stripVenueMetadataFromEvent( $event_data ); @@ -121,7 +130,7 @@ protected function executeFetch( array $config, ExecutionContext $context ): arr 'flow_id' => $context->getFlowId(), 'original_title' => $event_data['title'] ? $event_data['title'] : $image_file['original_name'], 'event_identifier' => $event_identifier, - 'item_identifier' => $file_identifier, + 'item_identifier' => $file_identifier, 'import_timestamp' => time(), 'image_file_path' => $image_file['persistent_path'], '_engine_data' => $engine_data, diff --git a/inc/Steps/EventImport/Handlers/EventImportHandler.php b/inc/Steps/EventImport/Handlers/EventImportHandler.php index 1da29d8..0064141 100644 --- a/inc/Steps/EventImport/Handlers/EventImportHandler.php +++ b/inc/Steps/EventImport/Handlers/EventImportHandler.php @@ -48,6 +48,57 @@ public function shouldSkipEventTitle( string $title ): bool { public function __construct( string $handler_type ) { parent::__construct( $handler_type ); + add_filter( 'datamachine_source_inventory_capabilities', array( $this, 'filterSourceInventoryCapabilities' ), 10, 2 ); + } + + /** + * Add handler-owned source inventory facts to matching source descriptors. + * + * @param array $capabilities Existing capabilities. + * @param array $source Source descriptor. + * @return array + */ + public function filterSourceInventoryCapabilities( array $capabilities, array $source ): array { + if ( ! $this->sourceMatchesHandler( $source ) ) { + return $capabilities; + } + + return array_merge( $this->getSourceInventoryCapabilities(), $capabilities ); + } + + /** + * Handler-owned source inventory facts. + * + * Concrete handlers override this when their source has known inventory, + * count, cursor, or bounded-discovery behavior. + * + * @return array + */ + protected function getSourceInventoryCapabilities(): array { + return array(); + } + + /** + * Whether a generic source descriptor refers to this handler. + * + * @param array $source Source descriptor. + */ + private function sourceMatchesHandler( array $source ): bool { + foreach ( array( 'handler', 'handler_type', 'provider', 'source_type', 'kind' ) as $key ) { + if ( $this->handler_type === $this->normalizeSourceKey( (string) ( $source[ $key ] ?? '' ) ) ) { + return true; + } + } + + return false; + } + + private function normalizeSourceKey( string $value ): string { + $value = strtolower( trim( $value ) ); + $value = preg_replace( '/[^a-z0-9_\-]+/', '_', $value ); + $value = str_replace( '-', '_', (string) $value ); + + return trim( $value, '_' ); } /** diff --git a/inc/Steps/EventImport/Handlers/SingleRecurring/SingleRecurring.php b/inc/Steps/EventImport/Handlers/SingleRecurring/SingleRecurring.php index b86058f..e62c173 100644 --- a/inc/Steps/EventImport/Handlers/SingleRecurring/SingleRecurring.php +++ b/inc/Steps/EventImport/Handlers/SingleRecurring/SingleRecurring.php @@ -50,6 +50,15 @@ public function __construct() { ); } + protected function getSourceInventoryCapabilities(): array { + return array( + 'can_enumerate' => true, + 'stable_ids' => true, + 'has_total_count' => true, + 'inventory_source' => 'handler_config', + ); + } + protected function executeFetch( array $config, ExecutionContext $context ): array { $context->log( 'info', 'SingleRecurring: Starting event handler' ); @@ -129,7 +138,7 @@ protected function executeFetch( array $config, ExecutionContext $context ): arr 'flow_id' => $context->getFlowId(), 'original_title' => $event_title, 'event_identifier' => $event_identifier, - 'item_identifier' => $event_identifier, + 'item_identifier' => $event_identifier, 'import_timestamp' => time(), '_engine_data' => $engine_data, ), diff --git a/inc/Steps/EventImport/Handlers/Ticketmaster/Ticketmaster.php b/inc/Steps/EventImport/Handlers/Ticketmaster/Ticketmaster.php index fd7e7db..3dce449 100644 --- a/inc/Steps/EventImport/Handlers/Ticketmaster/Ticketmaster.php +++ b/inc/Steps/EventImport/Handlers/Ticketmaster/Ticketmaster.php @@ -51,6 +51,17 @@ public function __construct() { ); } + protected function getSourceInventoryCapabilities(): array { + return array( + 'stable_ids' => true, + 'has_total_count' => true, + 'supports_time_windows' => true, + 'supports_query_shards' => true, + 'pagination' => 'page', + 'max_pages' => self::MAX_PAGE + 1, + ); + } + /** * Execute fetch logic */ @@ -166,7 +177,7 @@ protected function executeFetch( array $config, ExecutionContext $context ): arr 'flow_id' => $context->getFlowId(), 'original_title' => $standardized_event['title'], 'event_identifier' => $event_identifier, - 'item_identifier' => $event_identifier, + 'item_identifier' => $event_identifier, 'import_timestamp' => time(), '_engine_data' => $engine_data, ), @@ -217,7 +228,7 @@ private function build_search_params( array $handler_config, string $api_key, Ex $classification_slug = strtolower( $handler_config['classification_type'] ); if ( ! isset( $classifications[ $classification_slug ] ) ) { - throw new \Exception( 'Invalid Ticketmaster classification_type: ' . $classification_slug ); + throw new \Exception( 'Invalid Ticketmaster classification_type: ' . esc_html( $classification_slug ) ); } $params['segmentName'] = $classifications[ $classification_slug ]; @@ -282,7 +293,7 @@ public static function get_classifications( $api_key = '' ) { return self::get_fallback_classifications(); } - $api_url = 'https://app.ticketmaster.com/discovery/v2/classifications.json?apikey=' . urlencode( $api_key ); + $api_url = 'https://app.ticketmaster.com/discovery/v2/classifications.json?apikey=' . rawurlencode( $api_key ); $result = \DataMachine\Core\HttpClient::get( $api_url, array( diff --git a/inc/Steps/EventImport/Handlers/WebScraper/UniversalWebScraper.php b/inc/Steps/EventImport/Handlers/WebScraper/UniversalWebScraper.php index 90b4553..bf89760 100644 --- a/inc/Steps/EventImport/Handlers/WebScraper/UniversalWebScraper.php +++ b/inc/Steps/EventImport/Handlers/WebScraper/UniversalWebScraper.php @@ -133,6 +133,16 @@ public function __construct() { ); } + protected function getSourceInventoryCapabilities(): array { + return array( + 'stable_ids' => true, + 'supports_query_shards' => true, + 'supports_pagination' => true, + 'pagination' => 'url', + 'max_pages' => self::MAX_PAGES, + ); + } + /** * Get registered extractors in priority order. * @@ -308,17 +318,17 @@ protected function executeFetch( array $config, ExecutionContext $context ): arr // pagination instead of returning immediately. This allows // multi-page APIs (e.g. Tribe Events with 9 pages) to be // fully scraped in a single fetch cycle. - $page_items = isset( $structured_result['items'] ) ? $structured_result['items'] : array( $structured_result ); + $page_items = isset( $structured_result['items'] ) ? $structured_result['items'] : array( $structured_result ); $accumulated_items = array_merge( $accumulated_items, $page_items ); $context->log( 'info', 'Universal Web Scraper: Accumulated structured items from page', array( - 'page' => $current_page, - 'page_items' => count( $page_items ), - 'total_items' => count( $accumulated_items ), - 'source_url' => $current_url, + 'page' => $current_page, + 'page_items' => count( $page_items ), + 'total_items' => count( $accumulated_items ), + 'source_url' => $current_url, ) ); @@ -556,9 +566,9 @@ private function tryHtmlSectionExtraction( 'source_type' => 'universal_web_scraper', 'pipeline_id' => $context->getPipelineId(), 'flow_id' => $context->getFlowId(), - 'original_title' => 'HTML Section from ' . parse_url( $current_url, PHP_URL_HOST ), + 'original_title' => 'HTML Section from ' . wp_parse_url( $current_url, PHP_URL_HOST ), 'event_identifier' => $event_section['identifier'], - 'item_identifier' => $event_section['identifier'], + 'item_identifier' => $event_section['identifier'], 'import_timestamp' => time(), ), ); @@ -771,7 +781,7 @@ private function extract_event_sections( string $html_content, string $url, Exec * Attempt to discover WordPress API endpoint if initial fetch fails. */ private function attemptWordPressApiDiscovery( string $url, ExecutionContext $context ): ?string { - $parsed = parse_url( $url ); + $parsed = wp_parse_url( $url ); if ( empty( $parsed['host'] ) ) { return null; } diff --git a/tests/Unit/SourceInventoryCapabilitiesTest.php b/tests/Unit/SourceInventoryCapabilitiesTest.php new file mode 100644 index 0000000..99afb15 --- /dev/null +++ b/tests/Unit/SourceInventoryCapabilitiesTest.php @@ -0,0 +1,74 @@ + 'event_import', + 'provider' => 'ticketmaster', + ) + ); + + $this->assertTrue( $capabilities['stable_ids'] ); + $this->assertTrue( $capabilities['has_total_count'] ); + $this->assertTrue( $capabilities['supports_time_windows'] ); + $this->assertSame( 20, $capabilities['max_pages'] ); + } + + public function test_event_flyer_source_reports_inventory_capabilities(): void { + $capabilities = apply_filters( + 'datamachine_source_inventory_capabilities', + array(), + array( + 'handler_type' => 'event_flyer', + ) + ); + + $this->assertTrue( $capabilities['can_enumerate'] ); + $this->assertTrue( $capabilities['stable_ids'] ); + $this->assertSame( 'uploaded_files', $capabilities['inventory_source'] ); + } + + public function test_existing_source_capability_overrides_default(): void { + $capabilities = apply_filters( + 'datamachine_source_inventory_capabilities', + array( 'max_pages' => 5 ), + array( 'provider' => 'universal-web-scraper' ) + ); + + $this->assertTrue( $capabilities['supports_pagination'] ); + $this->assertSame( 5, $capabilities['max_pages'] ); + } + + public function test_unknown_source_is_unchanged(): void { + $capabilities = apply_filters( + 'datamachine_source_inventory_capabilities', + array( 'stable_ids' => false ), + array( 'provider' => 'unknown' ) + ); + + $this->assertSame( array( 'stable_ids' => false ), $capabilities ); + } +}