File tree Expand file tree Collapse file tree 1 file changed +3
-16
lines changed
Expand file tree Collapse file tree 1 file changed +3
-16
lines changed Original file line number Diff line number Diff line change 4848df_nation = ctx .read_parquet (get_data_path ("nation.parquet" )).select (
4949 "n_nationkey" , "n_name"
5050)
51- print ("df_orders" )
52- df_orders .show ()
53- print ("df_lineitem" )
54- df_lineitem .show ()
55- print ("df_supplier" )
56- df_supplier .show ()
57- print ("df_nation" )
58- df_nation .show ()
59-
6051
6152# Limit to suppliers in the nation of interest
6253df_suppliers_of_interest = df_nation .filter (col ("n_name" ) == lit (NATION_OF_INTEREST ))
9081 [col ("o_orderkey" )],
9182 [
9283 F .array_agg (col ("l_suppkey" ), distinct = True ).alias ("all_suppliers" ),
93- F .array_agg (col ("failed_supp" ), distinct = True ).alias ("failed_suppliers" ),
84+ F .array_agg (
85+ col ("failed_supp" ), filter = col ("failed_supp" ).is_not_null (), distinct = True
86+ ).alias ("failed_suppliers" ),
9487 ],
9588)
9689
97- # Remove the null entries that will get returned by array_agg so we can test to see where we only
98- # have a single failed supplier in a multiple supplier order
99- df = df .with_column (
100- "failed_suppliers" , F .array_remove (col ("failed_suppliers" ), lit (None ))
101- )
102-
10390# This is the check described above which will identify single failed supplier in a multiple
10491# supplier order.
10592df = df .filter (F .array_length (col ("failed_suppliers" )) == lit (1 )).filter (
You can’t perform that action at this time.
0 commit comments