|
23 | 23 |
|
24 | 24 | #include "cdb/cdbtm.h" |
25 | 25 | #include "cdb/cdbvars.h" |
| 26 | +#include "miscadmin.h" |
| 27 | +#include "storage/lock.h" |
26 | 28 | #include "utils/builtins.h" |
27 | 29 |
|
28 | 30 | /* helper function declarations */ |
@@ -392,3 +394,66 @@ PxfBridgeImportStartFragment(PxfFdwScanState *pxfsstate, int fragmentIndex) |
392 | 394 | /* Update current fragment tracking */ |
393 | 395 | pxfsstate->current_fragment = fragmentIndex; |
394 | 396 | } |
| 397 | + |
| 398 | +/* |
| 399 | + * ============================================================================ |
| 400 | + * Cloudberry Gang-Parallel Support (Virtual Segment ID) |
| 401 | + * |
| 402 | + * In Cloudberry/Greenplum, parallel execution uses "gang expansion" where |
| 403 | + * multiple processes share the same physical segment ID. PostgreSQL's DSM |
| 404 | + * callbacks (InitializeDSMForeignScan, InitializeWorkerForeignScan) are |
| 405 | + * NOT invoked in this model. |
| 406 | + * |
| 407 | + * Instead of fragment-by-fragment coordination, we use "virtual segment IDs": |
| 408 | + * each gang worker sends a unique virtual segment ID to PXF, so PXF's |
| 409 | + * existing round-robin fragment distribution splits the data among workers |
| 410 | + * automatically — no PXF server changes needed. |
| 411 | + * |
| 412 | + * Example: 3 physical segments × 4 workers = 12 virtual segments. |
| 413 | + * Worker i on physical segment S sends virtual_seg_id = S + i * seg_count, |
| 414 | + * with virtual_seg_count = seg_count * workers. |
| 415 | + * ============================================================================ |
| 416 | + */ |
| 417 | + |
| 418 | +/* |
| 419 | + * PxfBridgeImportStartVirtual |
| 420 | + * Start import with virtual segment ID for Cloudberry gang-parallel mode. |
| 421 | + * |
| 422 | + * Same as PxfBridgeImportStart, but after building the standard HTTP headers, |
| 423 | + * overrides X-GP-SEGMENT-ID and X-GP-SEGMENT-COUNT with the virtual values. |
| 424 | + * This makes PXF's round-robin assign a unique subset of fragments to each |
| 425 | + * gang worker, eliminating data duplication. |
| 426 | + */ |
| 427 | +void |
| 428 | +PxfBridgeImportStartVirtual(PxfFdwScanState *pxfsstate, |
| 429 | + int virtualSegId, int virtualSegCount) |
| 430 | +{ |
| 431 | + char seg_id_str[16]; |
| 432 | + char seg_count_str[16]; |
| 433 | + |
| 434 | + pxfsstate->churl_headers = churl_headers_init(); |
| 435 | + |
| 436 | + BuildUriForRead(pxfsstate); |
| 437 | + BuildHttpHeaders(pxfsstate->churl_headers, |
| 438 | + pxfsstate->options, |
| 439 | + pxfsstate->relation, |
| 440 | + pxfsstate->filter_str, |
| 441 | + pxfsstate->retrieved_attrs, |
| 442 | + pxfsstate->projectionInfo); |
| 443 | + |
| 444 | + /* Override physical segment ID/count with virtual values */ |
| 445 | + pg_ltoa(virtualSegId, seg_id_str); |
| 446 | + pg_ltoa(virtualSegCount, seg_count_str); |
| 447 | + churl_headers_override(pxfsstate->churl_headers, "X-GP-SEGMENT-ID", seg_id_str); |
| 448 | + churl_headers_override(pxfsstate->churl_headers, "X-GP-SEGMENT-COUNT", seg_count_str); |
| 449 | + |
| 450 | + elog(DEBUG3, "pxf_fdw: PxfBridgeImportStartVirtual physical_seg=%d " |
| 451 | + "virtual_seg_id=%d virtual_seg_count=%d", |
| 452 | + PXF_SEGMENT_ID, virtualSegId, virtualSegCount); |
| 453 | + |
| 454 | + pxfsstate->churl_handle = churl_init_download(pxfsstate->uri.data, |
| 455 | + pxfsstate->churl_headers); |
| 456 | + |
| 457 | + /* read some bytes to make sure the connection is established */ |
| 458 | + churl_read_check_connectivity(pxfsstate->churl_handle); |
| 459 | +} |
0 commit comments