Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion inc/Steps/EventImport/Handlers/DiceFm/DiceFm.php
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@ public function __construct() {
);
}

protected function getSourceInventoryCapabilities(): array {
return array(
'stable_ids' => true,
'supports_query_shards' => true,
'bounded_by' => array( 'city', 'country' ),
);
}

/**
* Execute Dice FM event import with flat parameter structure
*/
Expand Down Expand Up @@ -159,7 +167,7 @@ protected function executeFetch( array $config, ExecutionContext $context ): arr
'flow_id' => $context->getFlowId(),
'original_title' => $standardized_event['title'],
'event_identifier' => $event_identifier,
'item_identifier' => $event_identifier,
'item_identifier' => $event_identifier,
'import_timestamp' => time(),
'_engine_data' => $engine_data,
),
Expand Down
15 changes: 12 additions & 3 deletions inc/Steps/EventImport/Handlers/EventFlyer/EventFlyer.php
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,15 @@ public function __construct() {
);
}

protected function getSourceInventoryCapabilities(): array {
return array(
'can_enumerate' => true,
'stable_ids' => true,
'has_total_count' => true,
'inventory_source' => 'uploaded_files',
);
}

protected function executeFetch( array $config, ExecutionContext $context ): array {
$context->log( 'info', 'EventFlyer: Starting import' );

Expand Down Expand Up @@ -98,8 +107,8 @@ protected function executeFetch( array $config, ExecutionContext $context ): arr

// Add image context to engine data for vision processing.
$engine_data['image_file_path'] = $image_file['persistent_path'];
$upload_dir = wp_upload_dir();
$engine_data['image_url'] = str_replace( $upload_dir['basedir'], $upload_dir['baseurl'], $image_file['persistent_path'] );
$upload_dir = wp_upload_dir();
$engine_data['image_url'] = str_replace( $upload_dir['basedir'], $upload_dir['baseurl'], $image_file['persistent_path'] );

$this->stripVenueMetadataFromEvent( $event_data );

Expand All @@ -121,7 +130,7 @@ protected function executeFetch( array $config, ExecutionContext $context ): arr
'flow_id' => $context->getFlowId(),
'original_title' => $event_data['title'] ? $event_data['title'] : $image_file['original_name'],
'event_identifier' => $event_identifier,
'item_identifier' => $file_identifier,
'item_identifier' => $file_identifier,
'import_timestamp' => time(),
'image_file_path' => $image_file['persistent_path'],
'_engine_data' => $engine_data,
Expand Down
51 changes: 51 additions & 0 deletions inc/Steps/EventImport/Handlers/EventImportHandler.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,57 @@ public function shouldSkipEventTitle( string $title ): bool {

public function __construct( string $handler_type ) {
parent::__construct( $handler_type );
add_filter( 'datamachine_source_inventory_capabilities', array( $this, 'filterSourceInventoryCapabilities' ), 10, 2 );
}

/**
* Add handler-owned source inventory facts to matching source descriptors.
*
* @param array<string,mixed> $capabilities Existing capabilities.
* @param array<string,mixed> $source Source descriptor.
* @return array<string,mixed>
*/
public function filterSourceInventoryCapabilities( array $capabilities, array $source ): array {
if ( ! $this->sourceMatchesHandler( $source ) ) {
return $capabilities;
}

return array_merge( $this->getSourceInventoryCapabilities(), $capabilities );
}

/**
* Handler-owned source inventory facts.
*
* Concrete handlers override this when their source has known inventory,
* count, cursor, or bounded-discovery behavior.
*
* @return array<string,mixed>
*/
protected function getSourceInventoryCapabilities(): array {
return array();
}

/**
* Whether a generic source descriptor refers to this handler.
*
* @param array<string,mixed> $source Source descriptor.
*/
private function sourceMatchesHandler( array $source ): bool {
foreach ( array( 'handler', 'handler_type', 'provider', 'source_type', 'kind' ) as $key ) {
if ( $this->handler_type === $this->normalizeSourceKey( (string) ( $source[ $key ] ?? '' ) ) ) {
return true;
}
}

return false;
}

private function normalizeSourceKey( string $value ): string {
$value = strtolower( trim( $value ) );
$value = preg_replace( '/[^a-z0-9_\-]+/', '_', $value );
$value = str_replace( '-', '_', (string) $value );

return trim( $value, '_' );
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,15 @@ public function __construct() {
);
}

protected function getSourceInventoryCapabilities(): array {
return array(
'can_enumerate' => true,
'stable_ids' => true,
'has_total_count' => true,
'inventory_source' => 'handler_config',
);
}

protected function executeFetch( array $config, ExecutionContext $context ): array {
$context->log( 'info', 'SingleRecurring: Starting event handler' );

Expand Down Expand Up @@ -129,7 +138,7 @@ protected function executeFetch( array $config, ExecutionContext $context ): arr
'flow_id' => $context->getFlowId(),
'original_title' => $event_title,
'event_identifier' => $event_identifier,
'item_identifier' => $event_identifier,
'item_identifier' => $event_identifier,
'import_timestamp' => time(),
'_engine_data' => $engine_data,
),
Expand Down
17 changes: 14 additions & 3 deletions inc/Steps/EventImport/Handlers/Ticketmaster/Ticketmaster.php
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,17 @@ public function __construct() {
);
}

protected function getSourceInventoryCapabilities(): array {
return array(
'stable_ids' => true,
'has_total_count' => true,
'supports_time_windows' => true,
'supports_query_shards' => true,
'pagination' => 'page',
'max_pages' => self::MAX_PAGE + 1,
);
}

/**
* Execute fetch logic
*/
Expand Down Expand Up @@ -166,7 +177,7 @@ protected function executeFetch( array $config, ExecutionContext $context ): arr
'flow_id' => $context->getFlowId(),
'original_title' => $standardized_event['title'],
'event_identifier' => $event_identifier,
'item_identifier' => $event_identifier,
'item_identifier' => $event_identifier,
'import_timestamp' => time(),
'_engine_data' => $engine_data,
),
Expand Down Expand Up @@ -217,7 +228,7 @@ private function build_search_params( array $handler_config, string $api_key, Ex
$classification_slug = strtolower( $handler_config['classification_type'] );

if ( ! isset( $classifications[ $classification_slug ] ) ) {
throw new \Exception( 'Invalid Ticketmaster classification_type: ' . $classification_slug );
throw new \Exception( 'Invalid Ticketmaster classification_type: ' . esc_html( $classification_slug ) );
}

$params['segmentName'] = $classifications[ $classification_slug ];
Expand Down Expand Up @@ -282,7 +293,7 @@ public static function get_classifications( $api_key = '' ) {
return self::get_fallback_classifications();
}

$api_url = 'https://app.ticketmaster.com/discovery/v2/classifications.json?apikey=' . urlencode( $api_key );
$api_url = 'https://app.ticketmaster.com/discovery/v2/classifications.json?apikey=' . rawurlencode( $api_key );
$result = \DataMachine\Core\HttpClient::get(
$api_url,
array(
Expand Down
26 changes: 18 additions & 8 deletions inc/Steps/EventImport/Handlers/WebScraper/UniversalWebScraper.php
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,16 @@ public function __construct() {
);
}

protected function getSourceInventoryCapabilities(): array {
return array(
'stable_ids' => true,
'supports_query_shards' => true,
'supports_pagination' => true,
'pagination' => 'url',
'max_pages' => self::MAX_PAGES,
);
}

/**
* Get registered extractors in priority order.
*
Expand Down Expand Up @@ -308,17 +318,17 @@ protected function executeFetch( array $config, ExecutionContext $context ): arr
// pagination instead of returning immediately. This allows
// multi-page APIs (e.g. Tribe Events with 9 pages) to be
// fully scraped in a single fetch cycle.
$page_items = isset( $structured_result['items'] ) ? $structured_result['items'] : array( $structured_result );
$page_items = isset( $structured_result['items'] ) ? $structured_result['items'] : array( $structured_result );
$accumulated_items = array_merge( $accumulated_items, $page_items );

$context->log(
'info',
'Universal Web Scraper: Accumulated structured items from page',
array(
'page' => $current_page,
'page_items' => count( $page_items ),
'total_items' => count( $accumulated_items ),
'source_url' => $current_url,
'page' => $current_page,
'page_items' => count( $page_items ),
'total_items' => count( $accumulated_items ),
'source_url' => $current_url,
)
);

Expand Down Expand Up @@ -556,9 +566,9 @@ private function tryHtmlSectionExtraction(
'source_type' => 'universal_web_scraper',
'pipeline_id' => $context->getPipelineId(),
'flow_id' => $context->getFlowId(),
'original_title' => 'HTML Section from ' . parse_url( $current_url, PHP_URL_HOST ),
'original_title' => 'HTML Section from ' . wp_parse_url( $current_url, PHP_URL_HOST ),
'event_identifier' => $event_section['identifier'],
'item_identifier' => $event_section['identifier'],
'item_identifier' => $event_section['identifier'],
'import_timestamp' => time(),
),
);
Expand Down Expand Up @@ -771,7 +781,7 @@ private function extract_event_sections( string $html_content, string $url, Exec
* Attempt to discover WordPress API endpoint if initial fetch fails.
*/
private function attemptWordPressApiDiscovery( string $url, ExecutionContext $context ): ?string {
$parsed = parse_url( $url );
$parsed = wp_parse_url( $url );
if ( empty( $parsed['host'] ) ) {
return null;
}
Expand Down
74 changes: 74 additions & 0 deletions tests/Unit/SourceInventoryCapabilitiesTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
<?php
/**
* Source inventory capability tests.
*
* @package DataMachineEvents\Tests\Unit
*/

namespace DataMachineEvents\Tests\Unit;

use DataMachineEvents\Steps\EventImport\Handlers\EventFlyer\EventFlyer;
use DataMachineEvents\Steps\EventImport\Handlers\Ticketmaster\Ticketmaster;
use DataMachineEvents\Steps\EventImport\Handlers\WebScraper\UniversalWebScraper;
use WP_UnitTestCase;

class SourceInventoryCapabilitiesTest extends WP_UnitTestCase {

public function setUp(): void {
parent::setUp();
new Ticketmaster();
new EventFlyer();
new UniversalWebScraper();
}

public function test_ticketmaster_source_reports_counted_search_capabilities(): void {
$capabilities = apply_filters(
'datamachine_source_inventory_capabilities',
array(),
array(
'kind' => 'event_import',
'provider' => 'ticketmaster',
)
);

$this->assertTrue( $capabilities['stable_ids'] );
$this->assertTrue( $capabilities['has_total_count'] );
$this->assertTrue( $capabilities['supports_time_windows'] );
$this->assertSame( 20, $capabilities['max_pages'] );
}

public function test_event_flyer_source_reports_inventory_capabilities(): void {
$capabilities = apply_filters(
'datamachine_source_inventory_capabilities',
array(),
array(
'handler_type' => 'event_flyer',
)
);

$this->assertTrue( $capabilities['can_enumerate'] );
$this->assertTrue( $capabilities['stable_ids'] );
$this->assertSame( 'uploaded_files', $capabilities['inventory_source'] );
}

public function test_existing_source_capability_overrides_default(): void {
$capabilities = apply_filters(
'datamachine_source_inventory_capabilities',
array( 'max_pages' => 5 ),
array( 'provider' => 'universal-web-scraper' )
);

$this->assertTrue( $capabilities['supports_pagination'] );
$this->assertSame( 5, $capabilities['max_pages'] );
}

public function test_unknown_source_is_unchanged(): void {
$capabilities = apply_filters(
'datamachine_source_inventory_capabilities',
array( 'stable_ids' => false ),
array( 'provider' => 'unknown' )
);

$this->assertSame( array( 'stable_ids' => false ), $capabilities );
}
}
Loading