@@ -1369,6 +1369,45 @@ def test_read_csv_for_names_and_index_col(
13691369 )
13701370
13711371
1372+ def test_read_csv_for_dtype (session , df_and_gcs_csv_for_two_columns ):
1373+ _ , path = df_and_gcs_csv_for_two_columns
1374+
1375+ dtype = {"bool_col" : pd .BooleanDtype (), "int64_col" : pd .Float64Dtype ()}
1376+ bf_df = session .read_csv (path , engine = "bigquery" , dtype = dtype )
1377+
1378+ # Convert default pandas dtypes to match BigQuery DataFrames dtypes.
1379+ pd_df = session .read_csv (path , dtype = dtype )
1380+
1381+ assert bf_df .shape == pd_df .shape
1382+ assert bf_df .columns .tolist () == pd_df .columns .tolist ()
1383+
1384+ # BigFrames requires `sort_index()` because BigQuery doesn't preserve row IDs
1385+ # (b/280889935) or guarantee row ordering.
1386+ bf_df = bf_df .set_index ("rowindex" ).sort_index ()
1387+ pd_df = pd_df .set_index ("rowindex" )
1388+ pd .testing .assert_frame_equal (bf_df .to_pandas (), pd_df .to_pandas ())
1389+
1390+
1391+ def test_read_csv_for_dtype_w_names (session , df_and_gcs_csv_for_two_columns ):
1392+ _ , path = df_and_gcs_csv_for_two_columns
1393+
1394+ names = ["a" , "b" , "c" ]
1395+ dtype = {"b" : pd .BooleanDtype (), "c" : pd .Float64Dtype ()}
1396+ bf_df = session .read_csv (path , engine = "bigquery" , names = names , dtype = dtype )
1397+
1398+ # Convert default pandas dtypes to match BigQuery DataFrames dtypes.
1399+ pd_df = session .read_csv (path , names = names , dtype = dtype )
1400+
1401+ assert bf_df .shape == pd_df .shape
1402+ assert bf_df .columns .tolist () == pd_df .columns .tolist ()
1403+
1404+ # BigFrames requires `sort_index()` because BigQuery doesn't preserve row IDs
1405+ # (b/280889935) or guarantee row ordering.
1406+ bf_df = bf_df .set_index ("a" ).sort_index ()
1407+ pd_df = pd_df .set_index ("a" )
1408+ pd .testing .assert_frame_equal (bf_df .to_pandas (), pd_df .to_pandas ())
1409+
1410+
13721411@pytest .mark .parametrize (
13731412 ("kwargs" , "match" ),
13741413 [
0 commit comments