@@ -177653,27 +177653,37 @@ async function readHeaderFooter(tuple) {
177653177653 });
177654177654}
177655177655
177656- function readEntry(rntuple, fieldName, entryIndex) {
177656+ function readEntry(rntuple, fieldName, clusterIndex, entryIndex) {
177657177657 const builder = rntuple.builder,
177658177658 field = builder.fieldDescriptors.find(f => f.fieldName === fieldName),
177659- fieldData = rntuple._clusterData [fieldName];
177659+ columns = rntuple.fieldToColumns [fieldName];
177660177660
177661177661 if (!field)
177662177662 throw new Error(`No descriptor for field ${fieldName}`);
177663- if (!fieldData )
177664- throw new Error(`No data for field ${fieldName}`);
177663+ if (!columns )
177664+ throw new Error(`No columns field ${fieldName}`);
177665177665
177666- // Detect and decode string fields
177667- if (Array.isArray(fieldData) && fieldData.length === 2) {
177668- const [offsets, payload] = fieldData,
177669- start = entryIndex === 0 ? 0 : Number(offsets[entryIndex - 1]),
177670- end = Number(offsets[entryIndex]),
177671- decoded = payload.slice(start, end).join(''); // Convert to string
177672- return decoded;
177666+
177667+ const pages = builder.pageLocations[clusterIndex]?.[columns[0].index]?.pages;
177668+ if (!pages)
177669+ throw new Error(`No pages found ${fieldName}`);
177670+
177671+ let pageid = 0;
177672+ while ((pageid < pages.length - 1) && (entryIndex >= Number(pages[pageid].numElements))) {
177673+ entryIndex -= Number(pages[pageid].numElements);
177674+ pageid++;
177673177675 }
177674177676
177675- // Fallback: primitive type (e.g. int, float)
177676- return fieldData[0][entryIndex];
177677+ if (field.typeName === 'std::string') {
177678+ // string extracted from two columns
177679+ const offsets = rntuple._clusterData[columns[0].index][pageid],
177680+ payload = rntuple._clusterData[columns[1].index][pageid],
177681+ start = entryIndex === 0 ? 0 : Number(offsets[entryIndex - 1]),
177682+ end = Number(offsets[entryIndex]);
177683+ return payload.slice(start, end).join(''); // Convert to string
177684+ }
177685+ const values = rntuple._clusterData[columns[0].index];
177686+ return values[pageid][entryIndex];
177677177687}
177678177688
177679177689/** @summary Return field name for specified branch index
@@ -177684,11 +177694,11 @@ function getSelectorFieldName(selector, i) {
177684177694}
177685177695
177686177696// Read and process the next data cluster from the RNTuple
177687- function readNextCluster(rntuple, selector) {
177697+ async function readNextCluster(rntuple, selector) {
177688177698 const builder = rntuple.builder;
177689177699
177690177700 // Add validation
177691- if (!builder.clusterSummaries || builder.clusterSummaries.length === 0 )
177701+ if (!builder.clusterSummaries)
177692177702 throw new Error('No cluster summaries available - possibly incomplete file reading');
177693177703
177694177704 const clusterIndex = selector.currentCluster,
@@ -177698,6 +177708,11 @@ function readNextCluster(rntuple, selector) {
177698177708 // Collect only selected field names from selector
177699177709 selectedFields = [];
177700177710
177711+ if (!clusterSummary) {
177712+ selector.Terminate(clusterIndex > 0);
177713+ return false;
177714+ }
177715+
177701177716 for (let i = 0; i < selector.numBranches(); ++i)
177702177717 selectedFields.push(getSelectorFieldName(selector, i));
177703177718
@@ -177724,7 +177739,7 @@ function readNextCluster(rntuple, selector) {
177724177739 // Early exit if no pages to read (i.e., no selected fields matched)
177725177740 if (pages.length === 0) {
177726177741 selector.Terminate(false);
177727- return Promise.resolve() ;
177742+ return false ;
177728177743 }
177729177744
177730177745 // Build flat array of [offset, size, offset, size, ...] to read pages
@@ -177771,68 +177786,35 @@ function readNextCluster(rntuple, selector) {
177771177786 });
177772177787
177773177788 return Promise.all(unzipPromises).then(unzipBlobs => {
177774- rntuple._clusterData = {}; // store deserialized data per field
177789+ rntuple._clusterData = {}; // store deserialized data per column index
177775177790
177776177791 for (let i = 0; i < unzipBlobs.length; ++i) {
177777177792 const blob = unzipBlobs[i];
177778177793 // Ensure blob is a DataView
177779177794 if (!(blob instanceof DataView))
177780177795 throw new Error(`Invalid blob type for page ${i}: ${Object.prototype.toString.call(blob)}`);
177781- const {
177782- page,
177783- colDesc
177784- } = pages[i],
177785- field = builder.fieldDescriptors[colDesc.fieldId],
177786- values = builder.deserializePage(blob, colDesc, page);
177796+ const colDesc = pages[i].colDesc,
177797+ values = builder.deserializePage(blob, colDesc, pages[i].page);
177787177798
177788177799 // Support multiple representations (e.g., string fields with offsets + payload)
177789- if (!rntuple._clusterData[field.fieldName])
177790- rntuple._clusterData[field.fieldName] = [];
177791-
177792- // splitting string fields into offset and payload components
177793- if (field.typeName === 'std::string') {
177794- if (
177795- colDesc.coltype === ENTupleColumnType.kIndex64 ||
177796- colDesc.coltype === ENTupleColumnType.kIndex32 ||
177797- colDesc.coltype === ENTupleColumnType.kSplitIndex64 ||
177798- colDesc.coltype === ENTupleColumnType.kSplitIndex32
177799- ) // Index64/Index32
177800- rntuple._clusterData[field.fieldName][0] = values; // Offsets
177801- else if (colDesc.coltype === ENTupleColumnType.kChar)
177802- rntuple._clusterData[field.fieldName][1] = values; // Payload
177803- else
177804- throw new Error(`Unsupported column type for string field: ${colDesc.coltype}`);
177805- } else
177806- rntuple._clusterData[field.fieldName][0] = values;
177807- }
177800+ if (!rntuple._clusterData[colDesc.index])
177801+ rntuple._clusterData[colDesc.index] = [];
177808177802
177809- // Ensure string fields have ending offset for proper reconstruction of the last entry
177810- for (const fieldName of selectedFields) {
177811- const field = builder.fieldDescriptors.find(f => f.fieldName === fieldName),
177812- colData = rntuple._clusterData[fieldName];
177813- if (field.typeName === 'std::string') {
177814- if (!Array.isArray(colData) || colData.length !== 2)
177815- throw new Error(`String field '${fieldName}' must have 2 columns`);
177816- if (colData[0].length !== builder.clusterSummaries[clusterIndex].numEntries)
177817- throw new Error(`Malformed string field '${fieldName}': missing final offset`);
177818- }
177803+ rntuple._clusterData[colDesc.index].push(values);
177819177804 }
177820177805
177821177806 const numEntries = clusterSummary.numEntries;
177822177807 for (let i = 0; i < numEntries; ++i) {
177823177808 for (let b = 0; b < selector.numBranches(); ++b) {
177824177809 const fieldName = getSelectorFieldName(selector, b),
177825- tgtName = selector.nameOfBranch(b),
177826- values = rntuple._clusterData[fieldName];
177810+ tgtName = selector.nameOfBranch(b);
177827177811
177828- if (!values)
177829- throw new Error(`Missing values for selected field: ${fieldName}`);
177830- selector.tgtobj[tgtName] = readEntry(rntuple, fieldName, i);
177812+ selector.tgtobj[tgtName] = readEntry(rntuple, fieldName, clusterIndex, i);
177831177813 }
177832- selector.Process();
177814+ selector.Process(selector.currentEntry++ );
177833177815 }
177834177816
177835- selector.Terminate(true );
177817+ return readNextCluster(rntuple, selector );
177836177818 });
177837177819 });
177838177820}
@@ -177843,6 +177825,7 @@ function rntupleProcess(rntuple, selector, args) {
177843177825 return readHeaderFooter(rntuple).then(() => {
177844177826 selector.Begin();
177845177827 selector.currentCluster = 0;
177828+ selector.currentEntry = 0;
177846177829 return readNextCluster(rntuple, selector);
177847177830 }).then(() => selector);
177848177831}
0 commit comments