@@ -1760,6 +1760,65 @@ fn validate_orderings(
17601760 . collect ( )
17611761}
17621762
1763+ /// The various listing tables does not attempt to read all files
1764+ /// concurrently, instead they will read files in sequence within a
1765+ /// partition. This is an important property as it allows plans to
1766+ /// run against 1000s of files and not try to open them all
1767+ /// concurrently.
1768+ ///
1769+ /// However, it means if we assign more than one file to a partition
1770+ /// the output sort order will not be preserved as illustrated in the
1771+ /// following diagrams:
1772+ ///
1773+ /// When only 1 file is assigned to each partition, each partition is
1774+ /// correctly sorted on `(A, B, C)`
1775+ ///
1776+ /// ```text
1777+ /// ┏ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ┓
1778+ /// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ┐
1779+ /// ┃ ┌───────────────┐ ┌──────────────┐ │ ┌──────────────┐ │ ┌─────────────┐ ┃
1780+ /// │ │ 1.parquet │ │ │ │ 2.parquet │ │ │ 3.parquet │ │ │ 4.parquet │ │
1781+ /// ┃ │ Sort: A, B, C │ │Sort: A, B, C │ │ │Sort: A, B, C │ │ │Sort: A, B, C│ ┃
1782+ /// │ └───────────────┘ │ │ └──────────────┘ │ └──────────────┘ │ └─────────────┘ │
1783+ /// ┃ │ │ ┃
1784+ /// │ │ │ │ │ │
1785+ /// ┃ │ │ ┃
1786+ /// │ │ │ │ │ │
1787+ /// ┃ │ │ ┃
1788+ /// │ │ │ │ │ │
1789+ /// ┃ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┃
1790+ /// DataFusion DataFusion DataFusion DataFusion
1791+ /// ┃ Partition 1 Partition 2 Partition 3 Partition 4 ┃
1792+ /// ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━
1793+ ///
1794+ /// DataSourceExec
1795+ /// ```
1796+ ///
1797+ /// However, when more than 1 file is assigned to each partition, each
1798+ /// partition is NOT correctly sorted on `(A, B, C)`. Once the second
1799+ /// file is scanned, the same values for A, B and C can be repeated in
1800+ /// the same sorted stream
1801+ ///
1802+ ///```text
1803+ /// ┏ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━
1804+ /// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┃
1805+ /// ┃ ┌───────────────┐ ┌──────────────┐ │
1806+ /// │ │ 1.parquet │ │ │ │ 2.parquet │ ┃
1807+ /// ┃ │ Sort: A, B, C │ │Sort: A, B, C │ │
1808+ /// │ └───────────────┘ │ │ └──────────────┘ ┃
1809+ /// ┃ ┌───────────────┐ ┌──────────────┐ │
1810+ /// │ │ 3.parquet │ │ │ │ 4.parquet │ ┃
1811+ /// ┃ │ Sort: A, B, C │ │Sort: A, B, C │ │
1812+ /// │ └───────────────┘ │ │ └──────────────┘ ┃
1813+ /// ┃ │
1814+ /// │ │ │ ┃
1815+ /// ┃ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘
1816+ /// DataFusion DataFusion ┃
1817+ /// ┃ Partition 1 Partition 2
1818+ /// ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ┛
1819+ ///
1820+ /// DataSourceExec
1821+ /// ```
17631822fn get_projected_output_ordering (
17641823 base_config : & FileScanConfig ,
17651824 projected_schema : & SchemaRef ,
0 commit comments