Skip to content

Commit 64943ed

Browse files
authored
Merge pull request #1776 from Nidhi-M21/fix/split-into-null-columns
fix: create null-filled columns for remaining names in split into
2 parents 0699bf6 + 7889058 commit 64943ed

2 files changed

Lines changed: 196 additions & 0 deletions

File tree

  • core/src
    • main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api
    • test/kotlin/org/jetbrains/kotlinx/dataframe/api

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/split.kt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,13 @@ internal fun <T, C, R> splitImpl(
5353
}
5454

5555
val names = columnNamesGenerator(column, columnCollectors.size)
56+
57+
repeat(names.size - columnCollectors.size) {
58+
val collector = createDataCollector(nrow)
59+
repeat(nrow) { collector.add(clause.default) }
60+
columnCollectors.add(collector)
61+
}
62+
5663
val sourcePath = node.pathFromRoot()
5764

5865
columnCollectors.forEachIndexed { i, col ->

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/split.kt

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,4 +173,193 @@ class SplitTests {
173173
columnOf("d") named "first",
174174
)
175175
}
176+
177+
@Test
178+
fun `split into null filled columns`() {
179+
val df = dataFrameOf("a")(
180+
emptyList<Int>(),
181+
listOf(1, 2),
182+
listOf(1),
183+
)
184+
185+
val res = df.split { "a"<List<Int>>() }.into("a1", "a2", "a3")
186+
187+
res.columnNames() shouldBe listOf("a1", "a2", "a3")
188+
189+
res["a1"][0] shouldBe null
190+
res["a2"][0] shouldBe null
191+
res["a3"][0] shouldBe null
192+
193+
res["a1"][1] shouldBe 1
194+
res["a2"][1] shouldBe 2
195+
res["a3"][1] shouldBe null
196+
197+
res["a1"][2] shouldBe 1
198+
res["a2"][2] shouldBe null
199+
res["a3"][2] shouldBe null
200+
}
201+
202+
@Test
203+
fun `split into with exact number of elements`() {
204+
val df = dataFrameOf("a")(
205+
listOf(1, 2, 3),
206+
listOf(4, 5, 6),
207+
)
208+
209+
val res = df.split { "a"<List<Int>>() }.into("a1", "a2", "a3")
210+
211+
res.columnNames() shouldBe listOf("a1", "a2", "a3")
212+
213+
res["a1"][0] shouldBe 1
214+
res["a2"][0] shouldBe 2
215+
res["a3"][0] shouldBe 3
216+
217+
res["a1"][1] shouldBe 4
218+
res["a2"][1] shouldBe 5
219+
res["a3"][1] shouldBe 6
220+
}
221+
222+
@Test
223+
fun `split into with all empty lists should fill all with nulls`() {
224+
val df = dataFrameOf("a")(
225+
emptyList<Int>(),
226+
emptyList<Int>(),
227+
emptyList<Int>(),
228+
)
229+
val res = df.split { "a"<List<Int>>() }.into("a1", "a2", "a3")
230+
231+
res.columnNames() shouldBe listOf("a1", "a2", "a3")
232+
233+
res["a1"][0] shouldBe null
234+
res["a2"][0] shouldBe null
235+
res["a3"][0] shouldBe null
236+
237+
res["a1"][1] shouldBe null
238+
res["a2"][1] shouldBe null
239+
res["a3"][1] shouldBe null
240+
241+
res["a1"][2] shouldBe null
242+
res["a2"][2] shouldBe null
243+
res["a3"][2] shouldBe null
244+
}
245+
246+
@Test
247+
fun `split into with mixed empty and partial lists`() {
248+
val df = dataFrameOf("a")(
249+
emptyList<Int>(),
250+
listOf(1, 2, 3),
251+
emptyList<Int>(),
252+
listOf(4, 5),
253+
emptyList<Int>(),
254+
listOf(6, 7, 8),
255+
)
256+
257+
val res = df.split { "a"<List<Int>>() }.into("a1", "a2", "a3", "a4", "a5", "a6")
258+
259+
res.columnNames() shouldBe listOf("a1", "a2", "a3", "a4", "a5", "a6")
260+
261+
res["a1"][0] shouldBe null
262+
res["a2"][0] shouldBe null
263+
res["a3"][0] shouldBe null
264+
265+
res["a1"][1] shouldBe 1
266+
res["a2"][1] shouldBe 2
267+
res["a3"][1] shouldBe 3
268+
269+
res["a1"][2] shouldBe null
270+
res["a2"][2] shouldBe null
271+
res["a3"][2] shouldBe null
272+
273+
res["a1"][3] shouldBe 4
274+
res["a2"][3] shouldBe 5
275+
res["a3"][3] shouldBe null
276+
277+
res["a1"][4] shouldBe null
278+
res["a2"][4] shouldBe null
279+
res["a3"][4] shouldBe null
280+
281+
res["a1"][5] shouldBe 6
282+
res["a2"][5] shouldBe 7
283+
res["a3"][5] shouldBe 8
284+
}
285+
286+
@Test
287+
fun `split into with custom default value`() {
288+
val df = dataFrameOf("a")(
289+
listOf("A"),
290+
listOf("B", "C", "D"),
291+
)
292+
val res = df.split { "a"<List<String>>() }
293+
.default("something")
294+
.into("a1", "a2", "a3")
295+
296+
res["a1"][0] shouldBe "A"
297+
res["a2"][0] shouldBe "something"
298+
res["a3"][0] shouldBe "something"
299+
300+
res["a1"][1] shouldBe "B"
301+
res["a2"][1] shouldBe "C"
302+
res["a3"][1] shouldBe "D"
303+
}
304+
305+
@Test
306+
fun `split list with empty list uses custom default value`() {
307+
val df = dataFrameOf("a")(
308+
emptyList<String>(),
309+
listOf("B", "C", "D"),
310+
)
311+
val res = df.split { "a"<List<String>>() }
312+
.default("something")
313+
.into("a1", "a2", "a3")
314+
315+
res.columnNames() shouldBe listOf("a1", "a2", "a3")
316+
317+
res["a1"][0] shouldBe "something"
318+
res["a2"][0] shouldBe "something"
319+
res["a3"][0] shouldBe "something"
320+
321+
res["a1"][1] shouldBe "B"
322+
res["a2"][1] shouldBe "C"
323+
res["a3"][1] shouldBe "D"
324+
}
325+
326+
@Test
327+
fun `split string by delimiter with custom default value`() {
328+
val df = dataFrameOf("a")(
329+
"apple",
330+
"banana,orange",
331+
"cherry",
332+
)
333+
val res = df.split("a").by(",")
334+
.default("something")
335+
.into("a1", "a2")
336+
337+
res.columnNames() shouldBe listOf("a1", "a2")
338+
res["a1"][0] shouldBe "apple"
339+
res["a2"][0] shouldBe "something"
340+
341+
res["a1"][1] shouldBe "banana"
342+
res["a2"][1] shouldBe "orange"
343+
344+
res["a1"][2] shouldBe "cherry"
345+
res["a2"][2] shouldBe "something"
346+
}
347+
348+
@Test
349+
fun `split does not use default when all values present`() {
350+
val df = dataFrameOf("a")(
351+
listOf("A", "B"),
352+
listOf("C", "D"),
353+
)
354+
355+
val res = df.split { "a"<List<String>>() }
356+
.default("something")
357+
.into("a1", "a2")
358+
359+
res["a1"][0] shouldBe "A"
360+
res["a2"][0] shouldBe "B"
361+
362+
res["a1"][1] shouldBe "C"
363+
res["a2"][1] shouldBe "D"
364+
}
176365
}

0 commit comments

Comments
 (0)