@@ -399,13 +399,14 @@ if(NOT BUILD_CPU_ONLY)
399399 target_compile_features (jit_lto_kernel_usage_requirements INTERFACE cuda_std_20 )
400400 target_link_libraries (jit_lto_kernel_usage_requirements INTERFACE rmm::rmm raft::raft CCCL::CCCL )
401401
402- block (PROPAGATE interleaved_scan_files metric_files filter_files post_lambda_files )
402+ block (PROPAGATE jit_lto_files )
403+ set (jit_lto_files)
403404 set (CMAKE_CUDA_ARCHITECTURES ${JIT_LTO_TARGET_ARCHITECTURE} )
404405 set (ivf_flat_ns "cuvs::neighbors::ivf_flat::detail" )
405406 generate_jit_lto_kernels (
406- interleaved_scan_files
407+ jit_lto_files
407408 NAME_FORMAT
408- "interleaved_scan_capacity_ @capacity@_veclen_@veclen@_@ascending_descending@_@compute_norm_name@_data_@type_abbrev@_acc_@acc_abbrev@_idx_@idx_abbrev@"
409+ "ivf_flat_interleaved_scan_capacity_ @capacity@_veclen_@veclen@_@ascending_descending@_@compute_norm_name@_data_@type_abbrev@_acc_@acc_abbrev@_idx_@idx_abbrev@"
409410 MATRIX_JSON_FILE
410411 "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_matrix.json"
411412 KERNEL_INPUT_FILE
@@ -414,12 +415,12 @@ if(NOT BUILD_CPU_ONLY)
414415 "${ivf_flat_ns} ::fragment_tag_interleaved_scan<${ivf_flat_ns} ::tag_@type_abbrev@, ${ivf_flat_ns} ::tag_acc_@acc_abbrev@, ${ivf_flat_ns} ::tag_idx_@idx_abbrev@, @capacity@, @veclen@, @ascending_value@, @compute_norm_value@>"
415416 FRAGMENT_TAG_HEADER_FILES "<cuvs/detail/jit_lto/ivf_flat/interleaved_scan_fragments.hpp>"
416417 "<cuvs/detail/jit_lto/ivf_flat/interleaved_scan_tags.hpp>"
417- OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/interleaved_scan"
418+ OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/ivf_flat/ interleaved_scan"
418419 KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
419420 )
420421 generate_jit_lto_kernels (
421- metric_files
422- NAME_FORMAT "metric_ @metric_name@_veclen_@veclen@_data_@type_abbrev@_acc_@acc_abbrev@"
422+ jit_lto_files
423+ NAME_FORMAT "ivf_flat_metric_ @metric_name@_veclen_@veclen@_data_@type_abbrev@_acc_@acc_abbrev@"
423424 MATRIX_JSON_FILE
424425 "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/metric_matrix.json"
425426 KERNEL_INPUT_FILE
@@ -428,12 +429,12 @@ if(NOT BUILD_CPU_ONLY)
428429 "${ivf_flat_ns} ::fragment_tag_metric<@veclen@, ${ivf_flat_ns} ::tag_@type_abbrev@, ${ivf_flat_ns} ::tag_acc_@acc_abbrev@, ${ivf_flat_ns} ::tag_metric_@metric_name@>"
429430 FRAGMENT_TAG_HEADER_FILES "<cuvs/detail/jit_lto/ivf_flat/interleaved_scan_fragments.hpp>"
430431 "<cuvs/detail/jit_lto/ivf_flat/interleaved_scan_tags.hpp>"
431- OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/metric"
432+ OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/ivf_flat/ metric"
432433 KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
433434 )
434435 generate_jit_lto_kernels (
435- filter_files
436- NAME_FORMAT "@filter_name@"
436+ jit_lto_files
437+ NAME_FORMAT "ivf_flat_ @filter_name@"
437438 MATRIX_JSON_FILE
438439 "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/filter_matrix.json"
439440 KERNEL_INPUT_FILE
@@ -442,12 +443,12 @@ if(NOT BUILD_CPU_ONLY)
442443 "${ivf_flat_ns} ::fragment_tag_filter<${ivf_flat_ns} ::tag_filter<${ivf_flat_ns} ::tag_idx_l, ${ivf_flat_ns} ::tag_@filter_name@_impl>>"
443444 FRAGMENT_TAG_HEADER_FILES "<cuvs/detail/jit_lto/ivf_flat/interleaved_scan_fragments.hpp>"
444445 "<cuvs/detail/jit_lto/ivf_flat/interleaved_scan_tags.hpp>"
445- OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/filter"
446+ OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/ivf_flat/ filter"
446447 KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
447448 )
448449 generate_jit_lto_kernels (
449- post_lambda_files
450- NAME_FORMAT "@post_lambda_name@"
450+ jit_lto_files
451+ NAME_FORMAT "ivf_flat_ @post_lambda_name@"
451452 MATRIX_JSON_FILE
452453 "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/post_lambda_matrix.json"
453454 KERNEL_INPUT_FILE
@@ -456,21 +457,155 @@ if(NOT BUILD_CPU_ONLY)
456457 "${ivf_flat_ns} ::fragment_tag_post_lambda<${ivf_flat_ns} ::tag_@post_lambda_name@>"
457458 FRAGMENT_TAG_HEADER_FILES "<cuvs/detail/jit_lto/ivf_flat/interleaved_scan_fragments.hpp>"
458459 "<cuvs/detail/jit_lto/ivf_flat/interleaved_scan_tags.hpp>"
459- OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/post_lambda"
460+ OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/ivf_flat/ post_lambda"
460461 KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
461462 )
462- endblock ()
463-
464- set (jit_lto_files
465- ${interleaved_scan_files}
466- ${metric_files}
467- ${filter_files}
468- ${post_lambda_files}
469- src/detail/jit_lto/AlgorithmLauncher.cpp
470- src/detail/jit_lto/AlgorithmPlanner.cpp
471- src/detail/jit_lto/FragmentEntry.cpp
472- src/detail/jit_lto/nvjitlink_checker.cpp
463+ set (ivf_pq_ns "cuvs::neighbors::ivf_pq::detail" )
464+ generate_jit_lto_kernels (
465+ jit_lto_files
466+ NAME_FORMAT "ivf_pq_compute_similarity_out_@out_abbrev@_lut_@lut_abbrev@"
467+ MATRIX_JSON_FILE
468+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/jit_lto_kernels/compute_similarity_matrix.json"
469+ KERNEL_INPUT_FILE
470+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/jit_lto_kernels/compute_similarity_kernel.cu.in"
471+ FRAGMENT_TAG_FORMAT
472+ "${ivf_pq_ns} ::fragment_tag_compute_similarity<${ivf_pq_ns} ::tag_out_@out_abbrev@, ${ivf_pq_ns} ::tag_lut_@lut_abbrev@>"
473+ FRAGMENT_TAG_HEADER_FILES "<cuvs/detail/jit_lto/ivf_pq/compute_similarity_fragments.hpp>"
474+ OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/ivf_pq/compute_similarity"
475+ KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
476+ )
477+ generate_jit_lto_kernels (
478+ jit_lto_files
479+ NAME_FORMAT "ivf_pq_prepare_lut_lut_@lut_abbrev@_@enable_smem_lut_str@_@pq_bits@pq"
480+ MATRIX_JSON_FILE
481+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/jit_lto_kernels/prepare_lut_matrix.json"
482+ KERNEL_INPUT_FILE
483+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/jit_lto_kernels/prepare_lut_kernel.cu.in"
484+ FRAGMENT_TAG_FORMAT
485+ "${ivf_pq_ns} ::fragment_tag_prepare_lut<${ivf_pq_ns} ::tag_lut_@lut_abbrev@, @enable_smem_lut@, @pq_bits@>"
486+ FRAGMENT_TAG_HEADER_FILES "<cuvs/detail/jit_lto/ivf_pq/compute_similarity_fragments.hpp>"
487+ OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/ivf_pq/prepare_lut"
488+ KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
489+ )
490+ generate_jit_lto_kernels (
491+ jit_lto_files
492+ NAME_FORMAT "ivf_pq_store_calculated_distances_out_@out_abbrev@_@k_manage_local_topk_str@"
493+ MATRIX_JSON_FILE
494+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/jit_lto_kernels/store_calculated_distances_matrix.json"
495+ KERNEL_INPUT_FILE
496+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/jit_lto_kernels/store_calculated_distances_kernel.cu.in"
497+ FRAGMENT_TAG_FORMAT
498+ "${ivf_pq_ns} ::fragment_tag_store_calculated_distances<${ivf_pq_ns} ::tag_out_@out_abbrev@, @k_manage_local_topk@>"
499+ FRAGMENT_TAG_HEADER_FILES "<cuvs/detail/jit_lto/ivf_pq/compute_similarity_fragments.hpp>"
500+ OUTPUT_DIRECTORY
501+ "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/ivf_pq/store_calculated_distances"
502+ KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
503+ )
504+ generate_jit_lto_kernels (
505+ jit_lto_files
506+ NAME_FORMAT "ivf_pq_precompute_base_diff_metric_@metric@"
507+ MATRIX_JSON_FILE
508+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/jit_lto_kernels/precompute_base_diff_matrix.json"
509+ KERNEL_INPUT_FILE
510+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/jit_lto_kernels/precompute_base_diff_kernel.cu.in"
511+ FRAGMENT_TAG_FORMAT
512+ "${ivf_pq_ns} ::fragment_tag_precompute_base_diff<${ivf_pq_ns} ::tag_metric_@metric@>"
513+ FRAGMENT_TAG_HEADER_FILES "<cuvs/detail/jit_lto/ivf_pq/compute_similarity_fragments.hpp>"
514+ OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/ivf_pq/precompute_base_diff"
515+ KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
516+ )
517+ generate_jit_lto_kernels (
518+ jit_lto_files
519+ NAME_FORMAT
520+ "ivf_pq_create_lut_lut_@lut_abbrev@_@precomp_base_diff_str@_@pq_bits@pq_metric_@metric@"
521+ MATRIX_JSON_FILE
522+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/jit_lto_kernels/create_lut_matrix.json"
523+ KERNEL_INPUT_FILE
524+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/jit_lto_kernels/create_lut_kernel.cu.in"
525+ FRAGMENT_TAG_FORMAT
526+ "${ivf_pq_ns} ::fragment_tag_create_lut<${ivf_pq_ns} ::tag_lut_@lut_abbrev@, ${ivf_pq_ns} ::tag_metric_@metric@, @precomp_base_diff@, @pq_bits@>"
527+ FRAGMENT_TAG_HEADER_FILES "<cuvs/detail/jit_lto/ivf_pq/compute_similarity_fragments.hpp>"
528+ OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/ivf_pq/create_lut"
529+ KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
530+ )
531+ generate_jit_lto_kernels (
532+ jit_lto_files
533+ NAME_FORMAT "ivf_pq_compute_distances_out_@out_abbrev@_lut_@lut_abbrev@_capacity_@capacity@"
534+ MATRIX_JSON_FILE
535+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/jit_lto_kernels/compute_distances_matrix.json"
536+ KERNEL_INPUT_FILE
537+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/jit_lto_kernels/compute_distances_kernel.cu.in"
538+ FRAGMENT_TAG_FORMAT
539+ "${ivf_pq_ns} ::fragment_tag_compute_distances<${ivf_pq_ns} ::tag_out_@out_abbrev@, ${ivf_pq_ns} ::tag_lut_@lut_abbrev@, @capacity@>"
540+ FRAGMENT_TAG_HEADER_FILES "<cuvs/detail/jit_lto/ivf_pq/compute_similarity_fragments.hpp>"
541+ OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/ivf_pq/compute_distances"
542+ KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
543+ )
544+ generate_jit_lto_kernels (
545+ jit_lto_files
546+ NAME_FORMAT "ivf_pq_get_early_stop_limit_out_@out_abbrev@_metric_@metric@"
547+ MATRIX_JSON_FILE
548+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/jit_lto_kernels/get_early_stop_limit_matrix.json"
549+ KERNEL_INPUT_FILE
550+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/jit_lto_kernels/get_early_stop_limit_kernel.cu.in"
551+ FRAGMENT_TAG_FORMAT
552+ "${ivf_pq_ns} ::fragment_tag_get_early_stop_limit<${ivf_pq_ns} ::tag_out_@out_abbrev@, ${ivf_pq_ns} ::tag_metric_@metric@>"
553+ FRAGMENT_TAG_HEADER_FILES "<cuvs/detail/jit_lto/ivf_pq/compute_similarity_fragments.hpp>"
554+ OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/ivf_pq/get_early_stop_limit"
555+ KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
473556 )
557+ generate_jit_lto_kernels (
558+ jit_lto_files
559+ NAME_FORMAT "ivf_pq_sample_filter_@filter_name@"
560+ MATRIX_JSON_FILE
561+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/jit_lto_kernels/sample_filter_matrix.json"
562+ KERNEL_INPUT_FILE
563+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/jit_lto_kernels/sample_filter_kernel.cu.in"
564+ FRAGMENT_TAG_FORMAT
565+ "${ivf_pq_ns} ::fragment_tag_sample_filter<${ivf_pq_ns} ::tag_filter_@filter_name@>"
566+ FRAGMENT_TAG_HEADER_FILES "<cuvs/detail/jit_lto/ivf_pq/compute_similarity_fragments.hpp>"
567+ OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/ivf_pq/sample_filter"
568+ KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
569+ )
570+ generate_jit_lto_kernels (
571+ jit_lto_files
572+ NAME_FORMAT "ivf_pq_get_line_width_@pq_bits@pq"
573+ MATRIX_JSON_FILE
574+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/jit_lto_kernels/get_line_width_matrix.json"
575+ KERNEL_INPUT_FILE
576+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/jit_lto_kernels/get_line_width_kernel.cu.in"
577+ FRAGMENT_TAG_FORMAT "${ivf_pq_ns} ::fragment_tag_get_line_width<@pq_bits@>"
578+ FRAGMENT_TAG_HEADER_FILES "<cuvs/detail/jit_lto/ivf_pq/compute_similarity_fragments.hpp>"
579+ OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/ivf_pq/get_line_width"
580+ KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
581+ )
582+ generate_jit_lto_kernels (
583+ jit_lto_files
584+ NAME_FORMAT "ivf_pq_compute_score_out_@out_abbrev@_lut_@lut_abbrev@_@pq_bits@pq"
585+ MATRIX_JSON_FILE
586+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/jit_lto_kernels/compute_score_matrix.json"
587+ KERNEL_INPUT_FILE
588+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/jit_lto_kernels/compute_score_kernel.cu.in"
589+ FRAGMENT_TAG_FORMAT
590+ "${ivf_pq_ns} ::fragment_tag_compute_score<${ivf_pq_ns} ::tag_out_@out_abbrev@, ${ivf_pq_ns} ::tag_lut_@lut_abbrev@, @pq_bits@>"
591+ FRAGMENT_TAG_HEADER_FILES "<cuvs/detail/jit_lto/ivf_pq/compute_similarity_fragments.hpp>"
592+ OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/ivf_pq/compute_score"
593+ KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
594+ )
595+ generate_jit_lto_kernels (
596+ jit_lto_files
597+ NAME_FORMAT "ivf_pq_increment_score_out_@out_abbrev@_@increment_str@"
598+ MATRIX_JSON_FILE
599+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/jit_lto_kernels/increment_score_matrix.json"
600+ KERNEL_INPUT_FILE
601+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/jit_lto_kernels/increment_score_kernel.cu.in"
602+ FRAGMENT_TAG_FORMAT
603+ "${ivf_pq_ns} ::fragment_tag_increment_score<${ivf_pq_ns} ::tag_out_@out_abbrev@, @increment@>"
604+ FRAGMENT_TAG_HEADER_FILES "<cuvs/detail/jit_lto/ivf_pq/compute_similarity_fragments.hpp>"
605+ OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/ivf_pq/increment_score"
606+ KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
607+ )
608+ endblock ()
474609
475610 # Note that this matrix contains an `arch_includes` placeholder, since we don't currently have a
476611 # way to do an item-wise transform on a list after computing the matrix product and before
@@ -586,11 +721,20 @@ if(NOT BUILD_CPU_ONLY)
586721 generate_inst_matrix (
587722 ivf_pq_compute_similarity_inst_files
588723 MATRIX_JSON_FILE
589- "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_matrix.json"
724+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_select_matrix.json"
725+ INPUT_FILE
726+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_select_inst.cu.in"
727+ OUTPUT_FILE_FORMAT
728+ "${CMAKE_CURRENT_BINARY_DIR} /src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_select_inst_out_@out_abbrev@_lut_@lut_abbrev@_filter_@filter_name@_metric_@metric@_@increment_score_str@.cu"
729+ )
730+ generate_inst_matrix (
731+ ivf_pq_compute_similarity_inst_files
732+ MATRIX_JSON_FILE
733+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_run_matrix.json"
590734 INPUT_FILE
591- "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_inst .cu.in"
735+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_run_inst .cu.in"
592736 OUTPUT_FILE_FORMAT
593- "${CMAKE_CURRENT_BINARY_DIR} /src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_inst_out_ @out_abbrev@_lut_@lut_abbrev@.cu"
737+ "${CMAKE_CURRENT_BINARY_DIR} /src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_run_inst_out_ @out_abbrev@_lut_@lut_abbrev@.cu"
594738 )
595739 generate_inst_matrix (
596740 ivf_pq_search_inst_files
@@ -671,6 +815,10 @@ if(NOT BUILD_CPU_ONLY)
671815 src/core/omp_wrapper.cpp
672816 src/util/file_io.cpp
673817 src/util/host_memory.cpp
818+ src/detail/jit_lto/AlgorithmLauncher.cpp
819+ src/detail/jit_lto/AlgorithmPlanner.cpp
820+ src/detail/jit_lto/FragmentEntry.cpp
821+ src/detail/jit_lto/nvjitlink_checker.cpp
674822 src/distance/detail/kernels/gram_matrix.cu
675823 src/distance/detail/kernels/kernel_factory.cu
676824 src/distance/detail/kernels/kernel_matrices.cu
0 commit comments