Skip to content

Commit c180e91

Browse files
committed
feat(sparse): CKKS sparse-packing bootstrap (CPU + GPU)
Adds end-to-end sparse-packing CKKS bootstrap. The user opts into a sparse configuration with CkksBtpParam.create_sparse_param(log_slots) (or .create_toy_sparse_param for N=8192 dev runs), and the existing bootstrap() op in the DAG goes through: - CPU: lattigo's native sparse path (LogSlots < LogN-1) via the new SetCkksParameterLogSlots SDK export. The ABI bridge encodes/decodes at the param's LogSlots, so sparse plaintexts pack/unpack correctly. - GPU: HEonGPU's existing regular_bootstrapping_v2 once context.set_slot_count puts the encoder in gap_>1 mode. mega_ag_runners/gpu/gpu_wrapper.cu uses the static prime-chain helper ckks_sparse_bootstrap_chain_n16 (semi-public in HEonGPU; included explicitly here) to translate the frontend's dense CkksBtpParam into the sparse Q + level_starts that BootstrappingConfigV2's 4-arg ctor expects. This is a translator, not a duplicate code path -- once the frontend learns to emit sparse-correct values directly, the branch collapses into the dense path. Tests: test_sparse_bootstrap (CPU lattigo path) demonstrates the 1.32-1.59x speedup at log_slots in [8, 14] vs the dense LogSlots=LogN-1 baseline. test_gpu_ckks/cpu_ckks add CKKS smoke coverage. The example/ ckks_sparse_bootstrap_cpu/ shows the C++ and Python entry points for users. Carries submodule pointers: HEonGPU 6675bee (sparse-packing CKKS bootstrap), lattigo bb1b0bb (SetCkksParameterLogSlots + decoupled encode/decode).
1 parent 3708f97 commit c180e91

18 files changed

Lines changed: 700 additions & 47 deletions

examples/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ add_subdirectory(ckks_mult_cpu)
1010
add_subdirectory(ckks_mult_serialization_cpu)
1111
add_subdirectory(ckks_euclidean_distance_cpu)
1212
add_subdirectory(ckks_logistic_regression_cpu)
13+
add_subdirectory(ckks_sparse_bootstrap_cpu)
1314
add_subdirectory(benchmark_cpu)
1415
add_subdirectory(benchmark_convolution)
1516
if(LATTISENSE_ENABLE_GPU)
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
add_executable(ckks_sparse_bootstrap_cpu ckks_sparse_bootstrap_cpu.cpp)
2+
target_link_libraries(ckks_sparse_bootstrap_cpu PRIVATE lattisense)
3+
4+
set_target_properties(ckks_sparse_bootstrap_cpu PROPERTIES
5+
INSTALL_RPATH "$ORIGIN/../../../lib"
6+
BUILD_WITH_INSTALL_RPATH OFF
7+
)
8+
9+
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ckks_sparse_bootstrap_cpu.py
10+
${CMAKE_CURRENT_BINARY_DIR}/ckks_sparse_bootstrap_cpu.py COPYONLY)
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/*
2+
* Copyright (c) 2025-2026 CipherFlow (Shenzhen) Co., Ltd.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*
16+
* SPDX-License-Identifier: Apache-2.0
17+
*/
18+
19+
#include <cxx_sdk_v2/cxx_fhe_task.h>
20+
#include <fhe_ops_lib/fhe_lib_v2.h>
21+
22+
#include <cmath>
23+
#include <cstdio>
24+
#include <vector>
25+
26+
using namespace lattisense;
27+
using namespace std;
28+
29+
void ckks_sparse_bootstrap_cpu() {
30+
constexpr int log_slots = 8;
31+
constexpr int sparse_slots = 1 << log_slots;
32+
33+
CkksBtpParameter btp_param = CkksBtpParameter::create_toy_sparse_parameter(log_slots);
34+
CkksBtpContext btp_ctx = CkksBtpContext::create_random_context(btp_param);
35+
btp_ctx.create_bootstrapper();
36+
37+
double scale = btp_param.get_ckks_parameter().get_default_scale();
38+
vector<double> x_mg(sparse_slots);
39+
for (int i = 0; i < sparse_slots; i++)
40+
x_mg[i] = 0.5 * cos(2.0 * M_PI * i / sparse_slots);
41+
42+
CkksCiphertext x_ct = btp_ctx.encrypt_asymmetric(btp_ctx.encode(x_mg, 0, scale));
43+
CkksCiphertext y_ct = btp_ctx.new_ciphertext(9, scale);
44+
45+
FheTaskCpu cpu_project("project");
46+
vector<CxxVectorArgument> cxx_args = {
47+
{"x", &x_ct},
48+
{"y", &y_ct},
49+
};
50+
cpu_project.run(&btp_ctx, cxx_args);
51+
52+
vector<double> y_mg = btp_ctx.decode(btp_ctx.decrypt(y_ct));
53+
54+
double max_err = 0.0;
55+
for (int i = 0; i < sparse_slots; i++)
56+
max_err = std::max(max_err, std::abs(y_mg[i] - x_mg[i]));
57+
58+
printf("CKKS sparse bootstrap (log_slots=%d, n=%d), CPU\n", log_slots, btp_param.get_ckks_parameter().get_n());
59+
print_double_message(x_mg.data(), "x_mg (input, first 4 slots)", 4);
60+
print_double_message(y_mg.data(), "y_mg (bootstrapped, first 4 slots)", 4);
61+
printf("max abs error over active %d slots: %.3e\n", sparse_slots, max_err);
62+
}
63+
64+
int main() {
65+
ckks_sparse_bootstrap_cpu();
66+
return 0;
67+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Copyright (c) 2025-2026 CipherFlow (Shenzhen) Co., Ltd.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
# SPDX-License-Identifier: Apache-2.0
16+
17+
import os
18+
import sys
19+
20+
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
21+
22+
from frontend.custom_task import *
23+
24+
25+
def ckks_sparse_bootstrap():
26+
# Sparse bootstrap keeps only 2^log_slots active slots, shrinking the
27+
# CtS/StC matrices and cutting wall time vs full packing.
28+
param = CkksBtpParam.create_toy_sparse_param(log_slots=8)
29+
set_fhe_param(param)
30+
31+
x = CkksCiphertextNode('x', level=0)
32+
y = bootstrap(x, 'y')
33+
34+
process_custom_task(
35+
input_args=[Argument('x', x)],
36+
output_args=[Argument('y', y)],
37+
output_instruction_path='project',
38+
fpga_acc=False,
39+
)
40+
41+
42+
if __name__ == '__main__':
43+
ckks_sparse_bootstrap()

fhe_ops_lib/fhe_lib_v2.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,18 @@ CkksBtpParameter CkksBtpParameter::create_toy_parameter() {
617617
return CkksBtpParameter(CreateCkksToyBtpParameter());
618618
}
619619

620+
CkksBtpParameter CkksBtpParameter::create_sparse_parameter(int32_t log_slots) {
621+
CkksBtpParameter btp(CreateCkksBtpParameter());
622+
SetCkksParameterLogSlots(btp.get_ckks_parameter().get(), log_slots);
623+
return btp;
624+
}
625+
626+
CkksBtpParameter CkksBtpParameter::create_toy_sparse_parameter(int32_t log_slots) {
627+
CkksBtpParameter btp(CreateCkksToyBtpParameter());
628+
SetCkksParameterLogSlots(btp.get_ckks_parameter().get(), log_slots);
629+
return btp;
630+
}
631+
620632
// CkksParameter
621633
CkksParameter CkksParameter::create_parameter(uint64_t N) {
622634
return CkksParameter(CreateCkksParameter(N));

fhe_ops_lib/fhe_lib_v2.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,14 @@ class CkksBtpParameter : public CkksParameter {
287287

288288
static CkksBtpParameter create_toy_parameter();
289289

290+
// Sparse-packing CKKS bootstrap (LogSlots < LogN-1). The CPU runner
291+
// (lattigo) handles any log_slots in [4, LogN-2]. The GPU runner
292+
// (HEonGPU) currently only supports N=2^16 sparse, so create_toy_sparse_*
293+
// is CPU-only and will error if routed to GPU; use the production-N
294+
// create_sparse_parameter for end-to-end CPU+GPU sparse runs.
295+
static CkksBtpParameter create_sparse_parameter(int32_t log_slots);
296+
static CkksBtpParameter create_toy_sparse_parameter(int32_t log_slots);
297+
290298
CkksParameter& get_ckks_parameter();
291299

292300
protected:

frontend/bootstrap_params.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -198,10 +198,15 @@ def _find_best_bsgs_split(diag_matrix: Dict[int, bool], max_n: int, max_ratio: f
198198
_, rot_n1, rot_n2 = _bsgs_index(diag_matrix, max_n, n1)
199199
nb_n1, nb_n2 = len(rot_n1) - 1, len(rot_n2) - 1
200200

201-
if nb_n2 / nb_n1 == max_ratio:
202-
return n1
203-
if nb_n2 / nb_n1 > max_ratio:
204-
return n1 // 2
201+
# At small n1 (or sparse matrices), one of the partitions can collapse
202+
# to a single element (nb_n1 == 0). Skip the ratio check in that case
203+
# and double n1 to broaden the partition; only stop once nb_n1 > 0 and
204+
# the ratio threshold is crossed.
205+
if nb_n1 > 0:
206+
if nb_n2 / nb_n1 == max_ratio:
207+
return n1
208+
if nb_n2 / nb_n1 > max_ratio:
209+
return n1 // 2
205210

206211
n1 <<= 1
207212

frontend/custom_task.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -280,11 +280,9 @@ def create_fpga_param(cls):
280280

281281

282282
class CkksBtpParam(CkksParam):
283-
"""
284-
@class CkksBtpParam
285-
@brief CKKS Bootstrap parameter class.
286-
287-
Contains additional parameters required for CKKS bootstrapping.
283+
"""CKKS bootstrap parameter class. Set sparse packing via create_sparse_param()
284+
or by inheriting from CkksParam.set_slots(); trace compensation is handled by
285+
the bootstrap op via rotations_for_bootstrapping().
288286
"""
289287

290288
def __init__(self, n: int = 1 << 16):
@@ -293,6 +291,12 @@ def __init__(self, n: int = 1 << 16):
293291
self.stc_params: EncodingMatrixParams = None
294292
self.eval_mod_params: EvalModParams = None
295293
self.btp_output_level: int = -1
294+
self.btp_cts_start_level: int = -1
295+
self.btp_eval_mod_start_level: int = -1
296+
self.btp_stc_start_level: int = -1
297+
298+
def is_sparse(self) -> bool:
299+
return self.slots < (self.n // 2)
296300

297301
@classmethod
298302
def create_toy_param(cls):
@@ -464,6 +468,24 @@ def create_default_param(cls):
464468

465469
return instance
466470

471+
# Lattigo's genWfftIndexMap panics below this (CTS/STC depth exceeds slots).
472+
_MIN_LOG_SLOTS = 4
473+
474+
@classmethod
475+
def create_sparse_param(cls, log_slots: int, n: int = 1 << 16):
476+
"""Create sparse CKKS bootstrap params: 2^log_slots active slots."""
477+
max_log_slots = int(math.log2(n)) - 2
478+
if log_slots < cls._MIN_LOG_SLOTS or log_slots > max_log_slots:
479+
raise ValueError(f'log_slots must be in [{cls._MIN_LOG_SLOTS}, {max_log_slots}] for n={n}, got {log_slots}')
480+
instance = cls.create_default_param() if n == (1 << 16) else cls.create_toy_param()
481+
instance.set_slots(1 << log_slots)
482+
return instance
483+
484+
@classmethod
485+
def create_toy_sparse_param(cls, log_slots: int):
486+
"""Sparse toy params (n=8192). Insecure; for development only."""
487+
return cls.create_sparse_param(log_slots, n=1 << 13)
488+
467489
def rotations_for_bootstrapping(self) -> list[int]:
468490
log_n = int(math.log2(self.n))
469491
log_slots = int(math.log2(self.slots))
@@ -1969,6 +1991,7 @@ def bootstrap(x: CkksCiphertextNode, output_id: Optional[str] = None) -> CkksCip
19691991
g_swk_node_dict[rlk].level = g_param.max_level
19701992
g_dag.add_edge(g_swk_node_dict[rlk], op)
19711993

1994+
assert isinstance(g_param, CkksBtpParam)
19721995
rots = g_param.rotations_for_bootstrapping()
19731996
for rot in rots:
19741997
gal_elem = get_galois_element_for_column_rotation_by(rot, g_param.n)
@@ -2260,6 +2283,8 @@ def process_data_args(args: list[Argument] | None, phase: str) -> tuple[list[Dat
22602283

22612284
used_id = []
22622285

2286+
slots_for_task: Optional[int] = g_param.slots if isinstance(g_param, CkksParam) else None
2287+
22632288
all_input_list, input_sigdata_list = process_data_args(input_args, 'in')
22642289
all_output_list, output_sigdata_list = process_data_args(output_args, 'out')
22652290
all_offline_list, offline_sigdata_list = process_data_args(offline_input_args, 'offline')
@@ -2308,7 +2333,9 @@ def process_data_args(args: list[Argument] | None, phase: str) -> tuple[list[Dat
23082333
if g_param.algo == Algo.BFV:
23092334
parameter['t'] = g_param.t
23102335
if isinstance(g_param, CkksParam):
2311-
parameter['slots'] = g_param.slots
2336+
# slots_for_task was computed above (with optional sparse inference).
2337+
assert slots_for_task is not None
2338+
parameter['slots'] = slots_for_task
23122339
parameter['scale'] = g_param.scale
23132340
if isinstance(g_param, CkksBtpParam):
23142341
parameter['btp_cts_start_level'] = g_param.cts_params.level_start

0 commit comments

Comments
 (0)