210210define ('EBML_ID_CLUSTERREFERENCEVIRTUAL ' , 0x7D ); // [FD] -- Relative position of the data that should be in position of the virtual block.
211211
212212
213+ /**
214+ * Matroska constants
215+ */
216+ define ('MATROSKA_DEFAULT_TIMECODESCALE ' , 1000000 );
217+
218+ /**
219+ * Matroska scan modes are internal state flags for how much of the file we are scanning
220+ */
221+ define ('MATROSKA_SCAN_HEADER ' , 0 );
222+ define ('MATROSKA_SCAN_WHOLE_FILE ' , 1 );
223+ define ('MATROSKA_SCAN_FIRST_CLUSTER ' , 2 );
224+ define ('MATROSKA_SCAN_LAST_CLUSTER ' , 3 );
225+
213226/**
214227* @tutorial http://www.matroska.org/technical/specs/index.html
215228*
@@ -241,13 +254,15 @@ class getid3_matroska extends getid3_handler
241254 private $ EBMLbuffer_length = 0 ;
242255 private $ current_offset = 0 ;
243256 private $ unuseful_elements = array (EBML_ID_CRC32 , EBML_ID_VOID );
257+ private $ scan_mode = MATROSKA_SCAN_HEADER ;
244258
245259 /**
246260 * @return bool
247261 */
248262 public function Analyze ()
249263 {
250264 $ info = &$ this ->getid3 ->info ;
265+ $ this ->scan_mode = $ this ->parse_whole_file ? MATROSKA_SCAN_WHOLE_FILE : MATROSKA_SCAN_HEADER ;
251266
252267 // parse container
253268 try {
@@ -256,14 +271,25 @@ public function Analyze()
256271 $ this ->error ('EBML parser: ' .$ e ->getMessage ());
257272 }
258273
259- // calculate playtime
260- if (isset ($ info ['matroska ' ]['info ' ]) && is_array ($ info ['matroska ' ]['info ' ])) {
261- foreach ($ info ['matroska ' ]['info ' ] as $ key => $ infoarray ) {
262- if (isset ($ infoarray ['Duration ' ])) {
263- // TimecodeScale is how many nanoseconds each Duration unit is
264- $ info ['playtime_seconds ' ] = $ infoarray ['Duration ' ] * ((isset ($ infoarray ['TimecodeScale ' ]) ? $ infoarray ['TimecodeScale ' ] : 1000000 ) / 1000000000 );
265- break ;
266- }
274+ $ this ->playtimeFromMetadata ($ info );
275+
276+ // If there was no duration metadata, this might be an incomplete file or a streaming file
277+ // We need Cluster information so we can use their timecodes to estimate playtime.
278+ if (!isset ($ info ['playtime_seconds ' ]) && !$ this ->parse_whole_file ) {
279+ // Scan the start and end of file for Clusters to estimate duration
280+ $ this ->scanStartEndForClusters ($ info );
281+ }
282+
283+ if (isset ($ info ['matroska ' ]['cluster ' ]) && is_array ($ info ['matroska ' ]['cluster ' ])) {
284+ if (!isset ($ info ['playtime_seconds ' ]) && !empty ($ info ['matroska ' ]['cluster ' ])) {
285+ // estimate playtime using clusters if not yet known
286+ $ this ->calculatePlaytimeFromClusters ($ info );
287+ }
288+
289+ // Remove cluster information from output if hide_clusters is true
290+ // These could have been set during scanStartEndForClusters()
291+ if ($ this ->hide_clusters ) {
292+ unset($ info ['matroska ' ]['cluster ' ]);
267293 }
268294 }
269295
@@ -332,7 +358,11 @@ public function Analyze()
332358 break;*/
333359 }
334360
335- $ info ['video ' ]['streams ' ][$ trackarray ['TrackUID ' ]] = $ track_info ;
361+ if (isset ($ trackarray ['TrackUID ' ])) {
362+ $ info ['video ' ]['streams ' ][$ trackarray ['TrackUID ' ]] = $ track_info ;
363+ } else {
364+ $ this ->warning ('Missing mandatory TrackUID for video track ' );
365+ }
336366 break ;
337367
338368 case 2 : // Audio
@@ -480,8 +510,11 @@ public function Analyze()
480510 $ this ->warning ('Unhandled audio type " ' .(isset ($ trackarray ['CodecID ' ]) ? $ trackarray ['CodecID ' ] : '' ).'" ' );
481511 break ;
482512 }
483-
484- $ info ['audio ' ]['streams ' ][$ trackarray ['TrackUID ' ]] = $ track_info ;
513+ if (isset ($ trackarray ['TrackUID ' ])) {
514+ $ info ['audio ' ]['streams ' ][$ trackarray ['TrackUID ' ]] = $ track_info ;
515+ } else {
516+ $ this ->warning ('Missing mandatory TrackUID for audio track ' );
517+ }
485518 break ;
486519 }
487520 }
@@ -1246,12 +1279,17 @@ private function parseEBML(&$info) {
12461279 }
12471280 $ this ->current_offset = $ subelement ['end ' ];
12481281 }
1249- if (!$ this ->hide_clusters ) {
1282+
1283+ if (!$ this ->hide_clusters || $ this ->playtimeFromMetadata ($ info ) === false ) {
12501284 $ info ['matroska ' ]['cluster ' ][] = $ cluster_entry ;
12511285 }
1286+ if ($ this ->scan_mode === MATROSKA_SCAN_FIRST_CLUSTER ) {
1287+ // Stop parsing after finding first cluster
1288+ return ;
1289+ }
12521290
12531291 // check to see if all the data we need exists already, if so, break out of the loop
1254- if (! $ this ->parse_whole_file ) {
1292+ if ($ this ->scan_mode === MATROSKA_SCAN_HEADER ) {
12551293 if (isset ($ info ['matroska ' ]['info ' ]) && is_array ($ info ['matroska ' ]['info ' ])) {
12561294 if (isset ($ info ['matroska ' ]['tracks ' ]['tracks ' ]) && is_array ($ info ['matroska ' ]['tracks ' ]['tracks ' ])) {
12571295 if (count ($ info ['matroska ' ]['track_data_offsets ' ]) == count ($ info ['matroska ' ]['tracks ' ]['tracks ' ])) {
@@ -1919,4 +1957,116 @@ private static function getDefaultStreamInfo($streams)
19191957 return $ info ;
19201958 }
19211959
1960+ /**
1961+ * @param array $info
1962+ *
1963+ * @return float|bool Duration when present in metadata or false
1964+ */
1965+ private function playtimeFromMetadata (&$ info ) {
1966+ if (isset ($ info ['matroska ' ]['info ' ]) && is_array ($ info ['matroska ' ]['info ' ])) {
1967+ foreach ($ info ['matroska ' ]['info ' ] as $ infoarray ) {
1968+ if (isset ($ infoarray ['Duration ' ])) {
1969+ // TimecodeScale is how many nanoseconds each Duration unit is
1970+ $ info ['playtime_seconds ' ] = $ infoarray ['Duration ' ] * ((isset ($ infoarray ['TimecodeScale ' ]) ? $ infoarray ['TimecodeScale ' ] : MATROSKA_DEFAULT_TIMECODESCALE ) / 1000000000 );
1971+ return $ info ['playtime_seconds ' ];
1972+ }
1973+ }
1974+ }
1975+ return false ;
1976+ }
1977+
1978+ /**
1979+ * @param int $offset New starting offset for the buffer
1980+ *
1981+ * @return void
1982+ */
1983+ private function resetParserBuffer ($ offset ) {
1984+ $ this ->current_offset = $ offset ;
1985+ $ this ->EBMLbuffer = '' ;
1986+ $ this ->EBMLbuffer_offset = 0 ;
1987+ $ this ->EBMLbuffer_length = 0 ;
1988+ }
1989+
1990+ /**
1991+ * Scan start and end of file for cluster information when Duration is missing
1992+ * Only use this if no Duration was found in the Info element and we are not in parse_whole_file mode
1993+ *
1994+ * @param array $info
1995+ *
1996+ * @return void
1997+ */
1998+ private function scanStartEndForClusters (&$ info ) {
1999+ // Scan beginning of file for first cluster
2000+ $ this ->resetParserBuffer ($ info ['avdataoffset ' ]);
2001+ $ this ->scan_mode = MATROSKA_SCAN_FIRST_CLUSTER ;
2002+
2003+ try {
2004+ $ this ->parseEBML ($ info );
2005+ } catch (Exception $ e ) {
2006+ $ this ->error ('EBML parser (start of file): ' .$ e ->getMessage ());
2007+ }
2008+
2009+ // Scan end of file for last cluster
2010+ if (is_array ($ info ['matroska ' ]['cluster ' ]) && !empty ($ info ['matroska ' ]['cluster ' ])) {
2011+ // Scan maximum 1MB window before EOF
2012+ $ this ->resetParserBuffer (max (0 , $ info ['avdataend ' ] - (1024 * 1024 )));
2013+ $ this ->scan_mode = MATROSKA_SCAN_LAST_CLUSTER ;
2014+
2015+ try {
2016+ $ this ->parseEBML ($ info );
2017+ } catch (Exception $ e ) {
2018+ $ this ->error ('EBML parser (end of file): ' .$ e ->getMessage ());
2019+ }
2020+ }
2021+
2022+ // Reset to header parsing mode (this method is only called during header-only parsing)
2023+ $ this ->scan_mode = MATROSKA_SCAN_HEADER ;
2024+ }
2025+
2026+ /**
2027+ * Fetch TimecodeScale from Info element
2028+ *
2029+ * @param array $info
2030+ *
2031+ * @return int TimecodeScale value
2032+ */
2033+ private function getTimecodeScale (&$ info ) {
2034+ $ timecodeScale = MATROSKA_DEFAULT_TIMECODESCALE ;
2035+ if (isset ($ info ['matroska ' ]['info ' ]) && is_array ($ info ['matroska ' ]['info ' ])) {
2036+ foreach ($ info ['matroska ' ]['info ' ] as $ infoarray ) {
2037+ if (isset ($ infoarray ['TimecodeScale ' ])) {
2038+ $ timecodeScale = $ infoarray ['TimecodeScale ' ];
2039+ break ;
2040+ }
2041+ }
2042+ }
2043+ return $ timecodeScale ;
2044+ }
2045+
2046+ /**
2047+ * Calculate duration from scanned cluster timecodes
2048+ *
2049+ * @param array $info
2050+ *
2051+ * @return void
2052+ */
2053+ private function calculatePlaytimeFromClusters (&$ info ) {
2054+ $ minTimecode = null ;
2055+ $ maxTimecode = null ;
2056+ if (isset ($ info ['matroska ' ]['cluster ' ]) && is_array ($ info ['matroska ' ]['cluster ' ])) {
2057+ foreach ($ info ['matroska ' ]['cluster ' ] as $ cluster ) {
2058+ if (isset ($ cluster ['ClusterTimecode ' ])) {
2059+ if ($ minTimecode === null || $ cluster ['ClusterTimecode ' ] < $ minTimecode ) {
2060+ $ minTimecode = $ cluster ['ClusterTimecode ' ];
2061+ }
2062+ if ($ maxTimecode === null || $ cluster ['ClusterTimecode ' ] > $ maxTimecode ) {
2063+ $ maxTimecode = $ cluster ['ClusterTimecode ' ];
2064+ }
2065+ }
2066+ }
2067+ }
2068+ if ($ maxTimecode !== null && $ minTimecode !== null && $ maxTimecode > $ minTimecode ) {
2069+ $ info ['playtime_seconds ' ] = ($ maxTimecode - $ minTimecode ) * ($ this ->getTimecodeScale ($ info ) / 1000000000 );
2070+ }
2071+ }
19222072}
0 commit comments