@@ -94,6 +94,123 @@ impl CatalogEntry {
9494 out. extend_from_slice ( name_bytes) ;
9595 out
9696 }
97+
98+ /// Scan the raw data area of an ESE catalog leaf page for all TABLE entries.
99+ ///
100+ /// Unlike [`parse_real_catalog_record`], which scans a single tag's bytes
101+ /// and returns the first match, this function scans the entire page data
102+ /// area (from the end of the 40-byte header to the start of the tag array)
103+ /// and returns every distinct entry found.
104+ ///
105+ /// Real ESE catalog leaf pages use a cumulative key-prefix-compression
106+ /// format where the first logical records can reside in the page data area
107+ /// before the offset of the first tag. Scanning individual tags therefore
108+ /// misses those early records. This function avoids that problem by
109+ /// scanning the full data span directly.
110+ ///
111+ /// Entries are deduplicated by `object_name` — if the same name appears
112+ /// more than once (because the cumulative format causes successive tags to
113+ /// re-include earlier data), only the first occurrence is kept.
114+ pub fn scan_catalog_page_data ( data_area : & [ u8 ] ) -> Vec < Self > {
115+ const MIN_I : usize = 20 ; // need ≥20 bytes before \xff for obj_id + pgnoFDP
116+ const MAX_NAME : usize = 64 ;
117+ let len = data_area. len ( ) ;
118+ let mut entries: Vec < Self > = Vec :: new ( ) ;
119+ let mut seen: std:: collections:: HashSet < & str > = std:: collections:: HashSet :: new ( ) ;
120+ let mut i = MIN_I ;
121+ while i + 4 <= len {
122+ if data_area[ i] != 0xff || data_area[ i + 1 ] != 0x00 {
123+ i += 1 ;
124+ continue ;
125+ }
126+ let name_len = u16:: from_le_bytes ( [ data_area[ i + 2 ] , data_area[ i + 3 ] ] ) as usize ;
127+ if name_len == 0 || name_len > MAX_NAME || i + 4 + name_len > len {
128+ i += 1 ;
129+ continue ;
130+ }
131+ let name_bytes = & data_area[ i + 4 ..i + 4 + name_len] ;
132+ if !name_bytes. is_ascii ( ) {
133+ i += 1 ;
134+ continue ;
135+ }
136+ let Ok ( name) = std:: str:: from_utf8 ( name_bytes) else {
137+ i += 1 ;
138+ continue ;
139+ } ;
140+ if name. is_empty ( ) || seen. contains ( name) {
141+ i += 1 ;
142+ continue ;
143+ }
144+ // Safety: i >= 20, so i-16 and i-20 are both in-bounds.
145+ let pgnofdf_raw =
146+ u32:: from_le_bytes ( data_area[ i - 16 ..i - 12 ] . try_into ( ) . unwrap ( ) ) ;
147+ let object_id =
148+ u32:: from_le_bytes ( data_area[ i - 20 ..i - 16 ] . try_into ( ) . unwrap ( ) ) ;
149+ seen. insert ( name) ;
150+ entries. push ( Self {
151+ object_type : 1 ,
152+ object_id,
153+ parent_object_id : 1 ,
154+ table_page : pgnofdf_raw + 1 ,
155+ object_name : name. to_owned ( ) ,
156+ } ) ;
157+ i += 4 + name_len;
158+ }
159+ entries
160+ }
161+
162+ /// Try to parse a real ESE catalog TABLE entry from a leaf-page tag byte slice.
163+ ///
164+ /// Real ESE MSysObjects records use a tagged-column encoding where the `Name`
165+ /// column (column 128) is preceded by a two-byte marker `[0xFF, 0x00]` followed
166+ /// by a two-byte LE length and the ASCII name bytes. The `pgnoFDP` (root B-tree
167+ /// page of the table) lives 16 bytes before the `0xFF` marker, and the object ID
168+ /// lives 20 bytes before it — both as u32 LE.
169+ ///
170+ /// `pgnoFDP` is stored as an ESE 0-based data-page number; this function adds 1
171+ /// to convert it to the physical page number expected by [`EseDatabase::read_page`].
172+ ///
173+ /// Returns `None` if the slice contains no recognisable TABLE entry.
174+ pub fn parse_real_catalog_record ( data : & [ u8 ] ) -> Option < Self > {
175+ const MIN_BEFORE : usize = 20 ; // need ≥20 bytes before 0xFF for object_id + pgnoFDP + gap
176+ let len = data. len ( ) ;
177+ let mut i = MIN_BEFORE ;
178+ while i + 4 <= len {
179+ if data[ i] != 0xff || data[ i + 1 ] != 0x00 {
180+ i += 1 ;
181+ continue ;
182+ }
183+ let name_len = u16:: from_le_bytes ( [ data[ i + 2 ] , data[ i + 3 ] ] ) as usize ;
184+ if name_len == 0 || i + 4 + name_len > len {
185+ i += 1 ;
186+ continue ;
187+ }
188+ let name_bytes = & data[ i + 4 ..i + 4 + name_len] ;
189+ if !name_bytes. is_ascii ( ) {
190+ i += 1 ;
191+ continue ;
192+ }
193+ let Ok ( name) = std:: str:: from_utf8 ( name_bytes) else {
194+ i += 1 ;
195+ continue ;
196+ } ;
197+ if name. is_empty ( ) {
198+ i += 1 ;
199+ continue ;
200+ }
201+ let pgnofdf_raw = u32:: from_le_bytes ( data[ i - 16 ..i - 12 ] . try_into ( ) . ok ( ) ?) ;
202+ let object_id = u32:: from_le_bytes ( data[ i - 20 ..i - 16 ] . try_into ( ) . ok ( ) ?) ;
203+ let table_page = pgnofdf_raw + 1 ; // ESE 0-based → physical page
204+ return Some ( Self {
205+ object_type : 1 ,
206+ object_id,
207+ parent_object_id : 1 ,
208+ table_page,
209+ object_name : name. to_owned ( ) ,
210+ } ) ;
211+ }
212+ None
213+ }
97214}
98215
99216#[ cfg( test) ]
@@ -121,4 +238,51 @@ mod tests {
121238 let result = CatalogEntry :: from_bytes ( & [ 0u8 ; 5 ] ) ;
122239 assert ! ( result. is_err( ) ) ;
123240 }
241+
242+ #[ test]
243+ fn parse_real_catalog_record_extracts_name_and_page ( ) {
244+ // Build a minimal real-format catalog record:
245+ // 20 bytes before 0xFF: [object_id at -20..-16][pgnoFDP at -16..-12][12 bytes padding]
246+ // then: [0xFF][0x00][name_len u16 LE][name bytes]
247+ let object_id: u32 = 42 ;
248+ let pgnofdf_raw: u32 = 31 ; // ESE page 31 → physical page 32
249+ let name = b"SruDbIdMapTable" ;
250+ let name_len = name. len ( ) as u16 ;
251+
252+ let mut data = vec ! [ 0u8 ; 20 + 4 + name. len( ) ] ;
253+ // object_id at offset 0 (= i-20)
254+ data[ 0 ..4 ] . copy_from_slice ( & object_id. to_le_bytes ( ) ) ;
255+ // pgnoFDP at offset 4 (= i-16)
256+ data[ 4 ..8 ] . copy_from_slice ( & pgnofdf_raw. to_le_bytes ( ) ) ;
257+ // 12 bytes of zero padding (offsets 8..20)
258+ // 0xFF 0x00 marker at offset 20 (= i)
259+ data[ 20 ] = 0xff ;
260+ data[ 21 ] = 0x00 ;
261+ data[ 22 ..24 ] . copy_from_slice ( & name_len. to_le_bytes ( ) ) ;
262+ data[ 24 ..24 + name. len ( ) ] . copy_from_slice ( name) ;
263+
264+ let entry = CatalogEntry :: parse_real_catalog_record ( & data) . expect ( "must find TABLE entry" ) ;
265+ assert_eq ! ( entry. object_name, "SruDbIdMapTable" ) ;
266+ assert_eq ! ( entry. table_page, 32 ) ; // pgnoFDP + 1
267+ assert_eq ! ( entry. object_id, 42 ) ;
268+ assert_eq ! ( entry. object_type, 1 ) ;
269+ }
270+
271+ #[ test]
272+ fn parse_real_catalog_record_returns_none_for_synthetic_format ( ) {
273+ // Synthetic format starts with object_type u16 = [0x01, 0x00],
274+ // which does not contain the 0xFF marker, so must return None.
275+ let entry = CatalogEntry {
276+ object_type : 1 ,
277+ object_id : 2 ,
278+ parent_object_id : 1 ,
279+ table_page : 100 ,
280+ object_name : "OrphanedTable" . to_owned ( ) ,
281+ } ;
282+ let bytes = entry. to_bytes ( ) ;
283+ assert ! (
284+ CatalogEntry :: parse_real_catalog_record( & bytes) . is_none( ) ,
285+ "synthetic format must not match real catalog scanner"
286+ ) ;
287+ }
124288}
0 commit comments