diff --git a/cpp/include/cuopt/linear_programming/cpu_optimization_problem.hpp b/cpp/include/cuopt/linear_programming/cpu_optimization_problem.hpp index 009a8ce84e..48d61b9e0c 100644 --- a/cpp/include/cuopt/linear_programming/cpu_optimization_problem.hpp +++ b/cpp/include/cuopt/linear_programming/cpu_optimization_problem.hpp @@ -41,6 +41,8 @@ class mip_solution_interface_t; template class cpu_optimization_problem_t : public optimization_problem_interface_t { public: + using typename optimization_problem_interface_t::quadratic_constraint_t; + cpu_optimization_problem_t(); // Setters @@ -113,6 +115,10 @@ class cpu_optimization_problem_t : public optimization_problem_interface_t& get_quadratic_objective_values() const override; bool has_quadratic_objective() const override; + void set_quadratic_constraints(std::vector constraints) override; + bool has_quadratic_constraints() const override; + const std::vector& get_quadratic_constraints() const override; + // Host getters - these are the only supported getters for CPU implementation std::vector get_constraint_matrix_values_host() const override; std::vector get_constraint_matrix_indices_host() const override; @@ -185,6 +191,8 @@ class cpu_optimization_problem_t : public optimization_problem_interface_t Q_indices_; std::vector Q_values_; + std::vector quadratic_constraints_{}; + std::vector variable_lower_bounds_; std::vector variable_upper_bounds_; std::vector constraint_lower_bounds_; diff --git a/cpp/include/cuopt/linear_programming/optimization_problem.hpp b/cpp/include/cuopt/linear_programming/optimization_problem.hpp index df78dd17c7..c355fbde94 100644 --- a/cpp/include/cuopt/linear_programming/optimization_problem.hpp +++ b/cpp/include/cuopt/linear_programming/optimization_problem.hpp @@ -72,6 +72,9 @@ class optimization_problem_t : public optimization_problem_interface_t static_assert(std::is_floating_point::value, "'optimization_problem_t' accepts only floating point types for weights"); + // nvcc does not always find base typedefs in derived class scope; inject explicitly. + using typename optimization_problem_interface_t::quadratic_constraint_t; + /** * @brief A device-side view of the `optimization_problem_t` structure with * the RAII stuffs stripped out, to make it easy to work inside kernels @@ -196,6 +199,8 @@ class optimization_problem_t : public optimization_problem_interface_t i_t size_offsets, bool validate_positive_semi_definite = false) override; + void set_quadratic_constraints(std::vector constraints) override; + /** @copydoc optimization_problem_interface_t::set_variable_lower_bounds */ void set_variable_lower_bounds(const f_t* variable_lower_bounds, i_t size) override; /** @copydoc optimization_problem_interface_t::set_variable_upper_bounds */ @@ -259,7 +264,9 @@ class optimization_problem_t : public optimization_problem_interface_t const std::vector& get_quadratic_objective_offsets() const override; const std::vector& get_quadratic_objective_indices() const override; const std::vector& get_quadratic_objective_values() const override; + const std::vector& get_quadratic_constraints() const override; bool has_quadratic_objective() const override; + bool has_quadratic_constraints() const override; // ============================================================================ // Host getters @@ -376,6 +383,9 @@ class optimization_problem_t : public optimization_problem_interface_t std::vector Q_indices_; std::vector Q_values_; + /** QCQP: quadratic constraints **/ + std::vector quadratic_constraints_{}; + rmm::device_uvector variable_lower_bounds_; rmm::device_uvector variable_upper_bounds_; rmm::device_uvector constraint_lower_bounds_; diff --git a/cpp/include/cuopt/linear_programming/optimization_problem_interface.hpp b/cpp/include/cuopt/linear_programming/optimization_problem_interface.hpp index 767e62e746..8ffd38578b 100644 --- a/cpp/include/cuopt/linear_programming/optimization_problem_interface.hpp +++ b/cpp/include/cuopt/linear_programming/optimization_problem_interface.hpp @@ -56,8 +56,52 @@ class optimization_problem_interface_t { static_assert(std::is_floating_point::value, "'optimization_problem_interface_t' accepts only floating point types for weights"); + /** Quadratic constraint bundle used by core optimization problem interfaces. */ + struct quadratic_constraint_t { + i_t constraint_row_index{}; + std::string constraint_row_name{}; + char constraint_row_type{}; + std::vector linear_values{}; + std::vector linear_indices{}; + f_t rhs_value{f_t(0)}; + std::vector quadratic_values{}; + std::vector quadratic_indices{}; + std::vector quadratic_offsets{}; + }; + virtual ~optimization_problem_interface_t() = default; + /** + * @brief Store quadratic constraints for MPS round-trip (linear + Q parts per QC row). + */ + virtual void set_quadratic_constraints(std::vector constraints) = 0; + template >> + void set_quadratic_constraints(const std::vector& constraints) + { + std::vector converted_constraints; + converted_constraints.reserve(constraints.size()); + for (const auto& qc : constraints) { + converted_constraints.push_back( + {static_cast(qc.constraint_row_index), + qc.constraint_row_name, + qc.constraint_row_type, + std::vector(qc.linear_values.begin(), qc.linear_values.end()), + std::vector(qc.linear_indices.begin(), qc.linear_indices.end()), + static_cast(qc.rhs_value), + std::vector(qc.quadratic_values.begin(), qc.quadratic_values.end()), + std::vector(qc.quadratic_indices.begin(), qc.quadratic_indices.end()), + std::vector(qc.quadratic_offsets.begin(), qc.quadratic_offsets.end())}); + } + set_quadratic_constraints(std::move(converted_constraints)); + } + + /** @brief Whether quadratic constraint metadata is present (for MPS export). */ + virtual bool has_quadratic_constraints() const = 0; + + /** @brief Quadratic constraints for MPS export (empty if none). */ + virtual const std::vector& get_quadratic_constraints() const = 0; + // ============================================================================ // Setters (accept both CPU and GPU pointers) // ============================================================================ diff --git a/cpp/include/cuopt/linear_programming/optimization_problem_utils.hpp b/cpp/include/cuopt/linear_programming/optimization_problem_utils.hpp index 90e853f530..e37ade9660 100644 --- a/cpp/include/cuopt/linear_programming/optimization_problem_utils.hpp +++ b/cpp/include/cuopt/linear_programming/optimization_problem_utils.hpp @@ -109,6 +109,10 @@ void populate_from_mps_data_model(optimization_problem_interface_t* pr q_offsets.data(), n_vars + 1); } + // Handle quadratic constraints if present + if (data_model.has_quadratic_constraints()) { + problem->set_quadratic_constraints(data_model.get_quadratic_constraints()); + } } /** @@ -266,6 +270,10 @@ void populate_from_data_model_view(optimization_problem_interface_t* p if (data_model->get_row_names().size() != 0) { problem->set_row_names(data_model->get_row_names()); } + + if (data_model->has_quadratic_constraints()) { + problem->set_quadratic_constraints(data_model->get_quadratic_constraints()); + } } } // namespace cuopt::linear_programming diff --git a/cpp/libmps_parser/include/mps_parser/data_model_view.hpp b/cpp/libmps_parser/include/mps_parser/data_model_view.hpp index c2a8f84980..100492e2b0 100644 --- a/cpp/libmps_parser/include/mps_parser/data_model_view.hpp +++ b/cpp/libmps_parser/include/mps_parser/data_model_view.hpp @@ -7,6 +7,7 @@ #pragma once +#include #include #include @@ -415,6 +416,35 @@ class data_model_view_t { */ bool is_Q_symmetrized() const noexcept; + /** + * @brief Quadratic constraints (MPS QCMATRIX); owned copy for writers when not using spans. + */ + void set_quadratic_constraints( + std::vector::quadratic_constraint_t> constraints); + template + void set_quadratic_constraints(const std::vector& constraints) + { + quadratic_constraints_.clear(); + quadratic_constraints_.reserve(constraints.size()); + for (const auto& qc : constraints) { + quadratic_constraints_.push_back( + {static_cast(qc.constraint_row_index), + qc.constraint_row_name, + qc.constraint_row_type, + std::vector(qc.linear_values.begin(), qc.linear_values.end()), + std::vector(qc.linear_indices.begin(), qc.linear_indices.end()), + static_cast(qc.rhs_value), + std::vector(qc.quadratic_values.begin(), qc.quadratic_values.end()), + std::vector(qc.quadratic_indices.begin(), qc.quadratic_indices.end()), + std::vector(qc.quadratic_offsets.begin(), qc.quadratic_offsets.end())}); + } + } + + bool has_quadratic_constraints() const noexcept; + + const std::vector::quadratic_constraint_t>& + get_quadratic_constraints() const noexcept; + private: bool maximize_{false}; span A_; @@ -444,6 +474,8 @@ class data_model_view_t { span Q_objective_indices_; span Q_objective_offsets_; bool is_Q_symmetrized_{false}; + + std::vector::quadratic_constraint_t> quadratic_constraints_; }; // class data_model_view_t } // namespace cuopt::mps_parser diff --git a/cpp/libmps_parser/include/mps_parser/mps_data_model.hpp b/cpp/libmps_parser/include/mps_parser/mps_data_model.hpp index 6879e15d60..4d9c4bd8f3 100644 --- a/cpp/libmps_parser/include/mps_parser/mps_data_model.hpp +++ b/cpp/libmps_parser/include/mps_parser/mps_data_model.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -262,6 +262,57 @@ class mps_data_model_t { const i_t* Q_offsets, i_t size_offsets); + /** + * @brief One quadratic constraint as parsed from MPS sections (ROWS, COLUMNS, RHS, QCMATRIX). + * + * This bundles all pieces of a quadratic row: + * - row identity and type (from ROWS), + * - sparse linear coefficients (from COLUMNS), + * - RHS value (from RHS), + * - quadratic matrix Q in CSR (from QCMATRIX). + */ + struct quadratic_constraint_t { + /** ROWS declaration index (among all constraint rows), not an index into the linear CSR. */ + i_t constraint_row_index{}; + std::string constraint_row_name{}; + /** MPS ROWS sense for this quadratic row; only 'L' (≤) is supported for convex QCQP at the + * moment. */ + char constraint_row_type{}; + std::vector linear_values{}; + std::vector linear_indices{}; + f_t rhs_value{f_t(0)}; + std::vector quadratic_values{}; + std::vector quadratic_indices{}; + std::vector quadratic_offsets{}; + }; + + /** + * @brief Append one complete quadratic constraint (row + linear + rhs + quadratic Q). + * @note Pointer+size signature is kept for current CI/toolchain compatibility; `std::span` + * can be revisited later when compatibility constraints are lifted. + * @param linear_values, linear_indices Same nnz; can be empty for a purely quadratic row (rare). + * @param quadratic_values, quadratic_indices CSR nnz; may be empty if Q is empty. + * @param quadratic_offsets CSR row starts; must be non-empty. + * @param constraint_row_type MPS ROWS type; must be 'L'. 'G' and 'E' quadratic rows are not + * supported. + */ + void append_quadratic_constraint(i_t constraint_row_index, + const std::string& constraint_row_name, + char constraint_row_type, + const f_t* linear_values, + i_t linear_nnz, + const i_t* linear_indices, + i_t linear_indices_nnz, + f_t rhs_value, + const f_t* quadratic_values, + i_t quadratic_size_values, + const i_t* quadratic_indices, + i_t quadratic_size_indices, + const i_t* quadratic_offsets, + i_t quadratic_size_offsets); + + const std::vector& get_quadratic_constraints() const; + i_t get_n_variables() const; i_t get_n_constraints() const; i_t get_nnz() const; @@ -306,6 +357,8 @@ class mps_data_model_t { bool has_quadratic_objective() const noexcept; + bool has_quadratic_constraints() const noexcept; + /** whether to maximize or minimize the objective function */ bool maximize_; /** @@ -342,7 +395,7 @@ class mps_data_model_t { std::string problem_name_; /** names of each of the variables in the OP */ std::vector var_names_{}; - /** names of each of the rows (aka constraints or objective) in the OP */ + /** names of linear constraint rows in exported MPS order. */ std::vector row_names_{}; /** number of variables */ i_t n_vars_{0}; @@ -361,6 +414,9 @@ class mps_data_model_t { std::vector Q_objective_indices_; std::vector Q_objective_offsets_; + /** One full quadratic constraint per QCMATRIX block, in order of appearance in the file */ + std::vector quadratic_constraints_; + }; // class mps_data_model_t } // namespace cuopt::mps_parser diff --git a/cpp/libmps_parser/include/mps_parser/parser.hpp b/cpp/libmps_parser/include/mps_parser/parser.hpp index e8e8c342bd..94230a0d4c 100644 --- a/cpp/libmps_parser/include/mps_parser/parser.hpp +++ b/cpp/libmps_parser/include/mps_parser/parser.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -23,6 +23,8 @@ namespace cuopt::mps_parser { * QPS files (for quadratic programming). QPS files are MPS files with additional * sections: * - QUADOBJ: Defines quadratic terms in the objective function + * - QMATRIX: Full symmetric quadratic objective matrix (alternative to QUADOBJ) + * - QCMATRIX: Symmetric quadratic terms for a named constraint row (QCQP) * * Note: Compressed MPS files .mps.gz, .mps.bz2 can only be read if the compression * libraries zlib or libbzip2 are installed, respectively. diff --git a/cpp/libmps_parser/include/mps_parser/utilities/span.hpp b/cpp/libmps_parser/include/mps_parser/utilities/span.hpp index 02679cd378..24a865de6a 100644 --- a/cpp/libmps_parser/include/mps_parser/utilities/span.hpp +++ b/cpp/libmps_parser/include/mps_parser/utilities/span.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -18,6 +18,8 @@ class span { span(T* ptr, std::size_t size) : ptr_(ptr), size_(size) {} std::size_t size() const noexcept { return size_; } const T* data() const noexcept { return ptr_; } + T& operator[](std::size_t i) noexcept { return ptr_[i]; } + T const& operator[](std::size_t i) const noexcept { return ptr_[i]; } private: T* ptr_{nullptr}; diff --git a/cpp/libmps_parser/src/data_model_view.cpp b/cpp/libmps_parser/src/data_model_view.cpp index 62b441aa60..aacedfcadb 100644 --- a/cpp/libmps_parser/src/data_model_view.cpp +++ b/cpp/libmps_parser/src/data_model_view.cpp @@ -355,6 +355,26 @@ bool data_model_view_t::is_Q_symmetrized() const noexcept return is_Q_symmetrized_; } +template +void data_model_view_t::set_quadratic_constraints( + std::vector::quadratic_constraint_t> constraints) +{ + quadratic_constraints_ = std::move(constraints); +} + +template +bool data_model_view_t::has_quadratic_constraints() const noexcept +{ + return !quadratic_constraints_.empty(); +} + +template +const std::vector::quadratic_constraint_t>& +data_model_view_t::get_quadratic_constraints() const noexcept +{ + return quadratic_constraints_; +} + // NOTE: Explicitly instantiate all types here in order to avoid linker error template class data_model_view_t; diff --git a/cpp/libmps_parser/src/mps_data_model.cpp b/cpp/libmps_parser/src/mps_data_model.cpp index 7d0d44a038..b9ae16dc03 100644 --- a/cpp/libmps_parser/src/mps_data_model.cpp +++ b/cpp/libmps_parser/src/mps_data_model.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -9,6 +9,7 @@ #include #include +#include namespace cuopt::mps_parser { @@ -219,6 +220,88 @@ void mps_data_model_t::set_quadratic_objective_matrix(const f_t* Q_val std::copy(Q_offsets, Q_offsets + size_offsets, Q_objective_offsets_.data()); } +template +void mps_data_model_t::append_quadratic_constraint(i_t constraint_row_index, + const std::string& constraint_row_name, + char constraint_row_type, + const f_t* linear_values, + i_t linear_nnz, + const i_t* linear_indices, + i_t linear_indices_nnz, + f_t rhs_value, + const f_t* quadratic_values, + i_t quadratic_size_values, + const i_t* quadratic_indices, + i_t quadratic_size_indices, + const i_t* quadratic_offsets, + i_t quadratic_size_offsets) +{ + mps_parser_expects(constraint_row_index >= 0, + error_type_t::ValidationError, + "constraint_row_index must be non-negative"); + + mps_parser_expects(constraint_row_type == 'L', + error_type_t::ValidationError, + "Quadratic constraint ROWS type must be 'L' (less-or-equal); got '%c'. " + "Only 'L' is supported for convex quadratic constraints.", + constraint_row_type); + + mps_parser_expects(linear_nnz == linear_indices_nnz, + error_type_t::ValidationError, + "linear_values and linear_indices must have the same nnz count"); + if (linear_nnz != 0) { + mps_parser_expects(linear_values != nullptr && linear_indices != nullptr, + error_type_t::ValidationError, + "linear_values and linear_indices cannot be null when linear_nnz > 0"); + } + + if (quadratic_size_values != 0) { + mps_parser_expects(quadratic_values != nullptr, + error_type_t::ValidationError, + "quadratic_values cannot be null"); + } + mps_parser_expects(quadratic_offsets != nullptr, + error_type_t::ValidationError, + "quadratic_offsets cannot be null"); + if (quadratic_size_indices != 0) { + mps_parser_expects(quadratic_indices != nullptr, + error_type_t::ValidationError, + "quadratic_indices cannot be null"); + } + mps_parser_expects(quadratic_size_offsets > 0, + error_type_t::ValidationError, + "quadratic_size_offsets cannot be empty"); + + quadratic_constraint_t qc; + qc.constraint_row_index = constraint_row_index; + qc.constraint_row_name = constraint_row_name; + qc.constraint_row_type = constraint_row_type; + qc.rhs_value = rhs_value; + + qc.linear_values.resize(linear_nnz); + qc.linear_indices.resize(linear_nnz); + if (linear_nnz > 0) { + std::copy(linear_values, linear_values + linear_nnz, qc.linear_values.data()); + std::copy(linear_indices, linear_indices + linear_nnz, qc.linear_indices.data()); + } + + qc.quadratic_values.resize(quadratic_size_values); + if (quadratic_size_values > 0) { + std::copy( + quadratic_values, quadratic_values + quadratic_size_values, qc.quadratic_values.data()); + } + qc.quadratic_indices.resize(quadratic_size_indices); + if (quadratic_size_indices > 0) { + std::copy( + quadratic_indices, quadratic_indices + quadratic_size_indices, qc.quadratic_indices.data()); + } + qc.quadratic_offsets.resize(quadratic_size_offsets); + std::copy( + quadratic_offsets, quadratic_offsets + quadratic_size_offsets, qc.quadratic_offsets.data()); + + quadratic_constraints_.push_back(std::move(qc)); +} + template const std::vector& mps_data_model_t::get_constraint_matrix_values() const { @@ -454,12 +537,25 @@ std::vector& mps_data_model_t::get_quadratic_objective_offsets() return Q_objective_offsets_; } +template +auto mps_data_model_t::get_quadratic_constraints() const + -> const std::vector& +{ + return quadratic_constraints_; +} + template bool mps_data_model_t::has_quadratic_objective() const noexcept { return !Q_objective_values_.empty(); } +template +bool mps_data_model_t::has_quadratic_constraints() const noexcept +{ + return !quadratic_constraints_.empty(); +} + // NOTE: Explicitly instantiate all types here in order to avoid linker error template class mps_data_model_t; diff --git a/cpp/libmps_parser/src/mps_parser.cpp b/cpp/libmps_parser/src/mps_parser.cpp index b73a09b2c2..f416bc37fb 100644 --- a/cpp/libmps_parser/src/mps_parser.cpp +++ b/cpp/libmps_parser/src/mps_parser.cpp @@ -14,11 +14,11 @@ #include #include #include -#include #include #include #include #include +#include #ifdef MPS_PARSER_WITH_BZIP2 #include @@ -272,21 +272,34 @@ ObjSenseType convert_to_obj_sense(const std::string& str) template void mps_parser_t::fill_problem(mps_data_model_t& problem) { + // Row indices that have QCMATRIX blocks (quadratic rows follow linear rows in ROWS under + // our MPS section rules; names are not required to be QC0..QCN) + std::unordered_set quadratic_row_ids{}; + for (const auto& block : qcmatrix_blocks_) { + quadratic_row_ids.insert(block.constraint_row_id); + } + const auto is_quadratic_row = [&quadratic_row_ids](i_t row) { + return quadratic_row_ids.count(row); + }; + { std::vector h_offsets{}, h_indices{}; std::vector h_values{}; h_offsets.push_back(0); + i_t num_linear_rows = 0; for (i_t i = 0; i < (i_t)A_indices.size(); ++i) { - i_t off = h_offsets.size() > 0 ? h_offsets[h_offsets.size() - 1] : 0; + // Quadratic constraint rows are omitted from the linear CSR; linear pieces live in each + // quadratic_constraint_t bundle. + if (is_quadratic_row(i)) { continue; } + ++num_linear_rows; for (const auto& idx_itr : A_indices[i]) { h_indices.push_back(idx_itr); } for (const auto& val_itr : A_values[i]) { h_values.push_back(val_itr); } - off += A_indices[i].size(); - h_offsets.push_back(off); + h_offsets.push_back(static_cast(h_indices.size())); } problem.set_csr_constraint_matrix(h_values.data(), @@ -296,11 +309,11 @@ void mps_parser_t::fill_problem(mps_data_model_t& problem) h_offsets.data(), h_offsets.size()); - mps_parser_expects(A_indices.size() + 1 == h_offsets.size(), + mps_parser_expects(static_cast(num_linear_rows) + 1 == h_offsets.size(), error_type_t::ValidationError, "The row indexing vector for the constraint matrix was not constructed " "successfully. Should be size %zu, but was size %zu", - A_indices.size() + 1, + static_cast(num_linear_rows) + 1, h_offsets.size()); mps_parser_expects( h_indices.size() == h_values.size(), @@ -320,8 +333,13 @@ void mps_parser_t::fill_problem(mps_data_model_t& problem) h_offsets[h_offsets.size() - 1]); } - // Set b & c - problem.set_constraint_bounds(b_values.data(), b_values.size()); + // Set b & c (RHS entries for quadratic rows are stored only on quadratic_constraint_t) + std::vector b_compacted{}; + b_compacted.reserve(b_values.size()); + for (i_t i = 0; i < (i_t)b_values.size(); ++i) { + if (!is_quadratic_row(i)) { b_compacted.push_back(b_values[i]); } + } + problem.set_constraint_bounds(b_compacted.data(), static_cast(b_compacted.size())); problem.set_objective_coefficients(c_values.data(), c_values.size()); // Set offset and scaling factor of objective function @@ -343,22 +361,25 @@ void mps_parser_t::fill_problem(mps_data_model_t& problem) problem.get_variable_lower_bounds().size(), problem.get_variable_upper_bounds().size()); - // Determine the constraint bounds based on row types + // Determine the constraint bounds based on row types (quadratic rows use bundles only, not + // counted here) { std::vector h_constraint_lower_bounds{}; std::vector h_constraint_upper_bounds{}; for (i_t i = 0; i < (i_t)row_types.size(); ++i) { + if (is_quadratic_row(i)) { continue; } if (row_types[i] == Equality) { h_constraint_lower_bounds.push_back(b_values[i]); h_constraint_upper_bounds.push_back(b_values[i]); + const size_t r = h_constraint_lower_bounds.size() - 1; if (ranges_values.size() > 0 && ranges_values[i] != unset_range_value) // Add range value if specified { - mps_parser_expects(!std::isnan(h_constraint_lower_bounds[i]), + mps_parser_expects(!std::isnan(h_constraint_lower_bounds[r]), error_type_t::ValidationError, "Constraints lower bound %d shouldn't be nan", i); - mps_parser_expects(!std::isnan(h_constraint_upper_bounds[i]), + mps_parser_expects(!std::isnan(h_constraint_upper_bounds[r]), error_type_t::ValidationError, "Constraints upper bound %d shouldn't be nan", i); @@ -367,17 +388,18 @@ void mps_parser_t::fill_problem(mps_data_model_t& problem) "Equality range value %d shouldn't be nan", i); if (ranges_values[i] < f_t(0)) - h_constraint_lower_bounds[i] = h_constraint_lower_bounds[i] + ranges_values[i]; + h_constraint_lower_bounds[r] = h_constraint_lower_bounds[r] + ranges_values[i]; else // Positive - h_constraint_upper_bounds[i] = h_constraint_upper_bounds[i] + ranges_values[i]; + h_constraint_upper_bounds[r] = h_constraint_upper_bounds[r] + ranges_values[i]; } } else if (row_types[i] == GreaterThanOrEqual) { h_constraint_lower_bounds.push_back(b_values[i]); h_constraint_upper_bounds.push_back(std::numeric_limits::infinity()); + const size_t r = h_constraint_lower_bounds.size() - 1; if (ranges_values.size() > 0 && ranges_values[i] != unset_range_value) // Add range value if specified { - mps_parser_expects(!std::isnan(h_constraint_lower_bounds[i]), + mps_parser_expects(!std::isnan(h_constraint_lower_bounds[r]), error_type_t::ValidationError, "Constraints lower bound %d shouldn't be nan", i); @@ -385,15 +407,16 @@ void mps_parser_t::fill_problem(mps_data_model_t& problem) error_type_t::ValidationError, "Greater range value %d shouldn't be nan", i); - h_constraint_upper_bounds[i] = h_constraint_lower_bounds[i] + std::abs(ranges_values[i]); + h_constraint_upper_bounds[r] = h_constraint_lower_bounds[r] + std::abs(ranges_values[i]); } } else if (row_types[i] == LesserThanOrEqual) { h_constraint_lower_bounds.push_back(-std::numeric_limits::infinity()); h_constraint_upper_bounds.push_back(b_values[i]); + const size_t r = h_constraint_lower_bounds.size() - 1; if (ranges_values.size() > 0 && ranges_values[i] != unset_range_value) // Add range value if specified { - mps_parser_expects(!std::isnan(h_constraint_upper_bounds[i]), + mps_parser_expects(!std::isnan(h_constraint_upper_bounds[r]), error_type_t::ValidationError, "Constraints upper bound %d shouldn't be nan", i); @@ -401,17 +424,18 @@ void mps_parser_t::fill_problem(mps_data_model_t& problem) error_type_t::ValidationError, "Lesser range value %d shouldn't be nan", i); - h_constraint_lower_bounds[i] = h_constraint_upper_bounds[i] - std::abs(ranges_values[i]); + h_constraint_lower_bounds[r] = h_constraint_upper_bounds[r] - std::abs(ranges_values[i]); } } else { mps_parser_expects(false, error_type_t::ValidationError, "Unsupported row type was passed to the Optimization Problem"); } + const size_t r = h_constraint_lower_bounds.size() - 1; mps_parser_expects( - !std::isnan(h_constraint_lower_bounds[i]), error_type_t::ValidationError, "Cannot be nan"); + !std::isnan(h_constraint_lower_bounds[r]), error_type_t::ValidationError, "Cannot be nan"); mps_parser_expects( - !std::isnan(h_constraint_upper_bounds[i]), error_type_t::ValidationError, "Cannot be nan"); + !std::isnan(h_constraint_upper_bounds[r]), error_type_t::ValidationError, "Cannot be nan"); } problem.set_constraint_lower_bounds(h_constraint_lower_bounds.data(), @@ -432,20 +456,26 @@ void mps_parser_t::fill_problem(mps_data_model_t& problem) problem.get_constraint_upper_bounds().size()); } + const i_t num_vars_for_quad = static_cast(var_names.size()); + problem.set_problem_name(problem_name); problem.set_objective_name(objective_name); problem.set_variable_names(std::move(var_names)); problem.set_variable_types(std::move(var_types)); - problem.set_row_names(std::move(row_names)); problem.set_maximize(maximize); // Helper function to build CSR format using double transpose (O(m+n+nnz) instead of // O(nnz*log(nnz))) For QUADOBJ: handles upper triangular input by expanding to full symmetric - // matrix + // matrix. + // + // @p value_scale: + // QUADOBJ/QMATRIX use 0.5 (MPS ½ xᵀQx vs internal xᵀQx); + // QCMATRIX uses 1.0 (symmetric Q defines xᵀQx directly in the constraint). auto build_csr_via_transpose = [](const std::vector>& entries, i_t num_rows, i_t num_cols, - bool is_quadobj = false) { + bool symmetrize_upper_triangular, + f_t value_scale) { struct CSRResult { std::vector values; std::vector indices; @@ -467,7 +497,7 @@ void mps_parser_t::fill_problem(mps_data_model_t& problem) // For QUADOBJ (upper triangular), add both (row,col) and (col,row) if off-diagonal csc_data[col].emplace_back(row, val); - if (is_quadobj && row != col) { csc_data[row].emplace_back(col, val); } + if (symmetrize_upper_triangular && row != col) { csc_data[row].emplace_back(col, val); } } // Second transpose: convert CSC to CSR (entries sorted by row, columns within rows sorted) @@ -485,9 +515,10 @@ void mps_parser_t::fill_problem(mps_data_model_t& problem) for (i_t row = 0; row < num_rows; ++row) { for (const auto& [col, val] : csr_data[row]) { - // While the mps format expects to optimize for 0.5 xT Q x, cuopt optimizes for xT Q x - // so we have to multiply the value by 0.5 to get the correct value. - result.values.push_back(val * 0.5); + // While the mps format expects to optimize for 0.5 xT Q x, cuopt optimizes for xT Q xExpand + // commentComment on line L488 so we have to multiply the value by value_scale=0.5 to get + // the correct value. + result.values.push_back(val * value_scale); result.indices.push_back(col); } result.offsets.push_back(result.values.size()); @@ -500,8 +531,9 @@ void mps_parser_t::fill_problem(mps_data_model_t& problem) if (!quadobj_entries.empty()) { // Convert quadratic objective entries to CSR format using double transpose // QUADOBJ stores upper triangular elements, so we expand to full symmetric matrix - i_t num_vars = static_cast(var_names.size()); - auto csr_result = build_csr_via_transpose(quadobj_entries, num_vars, num_vars, true); + constexpr f_t k_mps_quad_half_scale = f_t(0.5); // MPS ½ xᵀQx vs internal xᵀQx + auto csr_result = build_csr_via_transpose( + quadobj_entries, num_vars_for_quad, num_vars_for_quad, true, k_mps_quad_half_scale); // Use optimized double transpose method - O(m+n+nnz) instead of O(nnz*log(nnz)) problem.set_quadratic_objective_matrix(csr_result.values.data(), @@ -513,8 +545,9 @@ void mps_parser_t::fill_problem(mps_data_model_t& problem) } else if (!qmatrix_entries.empty()) { // Convert quadratic objective entries to CSR format using double transpose // QMATRIX stores full symmetric matrix - i_t num_vars = static_cast(var_names.size()); - auto csr_result = build_csr_via_transpose(qmatrix_entries, num_vars, num_vars, false); + constexpr f_t k_mps_quad_half_scale = f_t(0.5); + auto csr_result = build_csr_via_transpose( + qmatrix_entries, num_vars_for_quad, num_vars_for_quad, false, k_mps_quad_half_scale); // Use optimized double transpose method - O(m+n+nnz) instead of O(nnz*log(nnz)) problem.set_quadratic_objective_matrix(csr_result.values.data(), @@ -524,6 +557,58 @@ void mps_parser_t::fill_problem(mps_data_model_t& problem) csr_result.offsets.data(), csr_result.offsets.size()); } + + // QCMATRIX: one symmetric Q per constraint row (no extra ½ factor vs file coeffs). + // Bundle row metadata, row-linear coefficients (from COLUMNS), rhs, and quadratic part together. + constexpr f_t k_qcmatrix_value_scale = f_t(1); + const i_t linear_row_count = static_cast(row_types.size() - quadratic_row_ids.size()); + i_t quadratic_row_id = 0; + for (const auto& block : qcmatrix_blocks_) { + auto csr_result = build_csr_via_transpose( + block.entries, num_vars_for_quad, num_vars_for_quad, false, k_qcmatrix_value_scale); + const i_t row_id = block.constraint_row_id; + mps_parser_expects(row_id >= 0 && row_id < static_cast(row_types.size()), + error_type_t::ValidationError, + "QCMATRIX row index %d is out of range for constraints", + static_cast(row_id)); + problem.append_quadratic_constraint(linear_row_count + quadratic_row_id, + row_names[row_id], + static_cast(row_types[row_id]), + A_values[row_id].data(), + static_cast(A_values[row_id].size()), + A_indices[row_id].data(), + static_cast(A_indices[row_id].size()), + b_values[row_id], + csr_result.values.data(), + static_cast(csr_result.values.size()), + csr_result.indices.data(), + static_cast(csr_result.indices.size()), + csr_result.offsets.data(), + static_cast(csr_result.offsets.size())); + ++quadratic_row_id; + } + + if (!quadratic_row_ids.empty()) { + std::vector linear_row_names{}; + std::vector row_types_linear{}; + linear_row_names.reserve(row_names.size()); + row_types_linear.reserve(row_names.size()); + for (size_t i = 0; i < row_names.size(); ++i) { + if (!is_quadratic_row(static_cast(i))) { + linear_row_names.push_back(row_names[i]); + row_types_linear.push_back(static_cast(row_types[i])); + } + } + problem.set_row_names(std::move(linear_row_names)); + problem.set_row_types(row_types_linear.data(), static_cast(row_types_linear.size())); + } else { + std::vector row_types_host(row_types.size()); + for (size_t i = 0; i < row_types.size(); ++i) { + row_types_host[i] = static_cast(row_types[i]); + } + problem.set_row_names(std::move(row_names)); + problem.set_row_types(row_types_host.data(), static_cast(row_types_host.size())); + } } template @@ -594,6 +679,11 @@ void mps_parser_t::parse_string(char* buf) // these lines mark the start of a particular "section" if (line[0] != ' ') { skip_line = false; + // Leaving QCMATRIX: any non-QCMATRIX section header ends the current block + if (inside_qcmatrix_ && line.find("QCMATRIX", 0, 8) != 0) { + flush_qcmatrix_block(); + inside_qcmatrix_ = false; + } if (line.find("NAME", 0, 4) == 0) { encountered_sections.insert("NAME"); auto name_start = line.find_first_not_of(" \t", 4); @@ -704,6 +794,7 @@ void mps_parser_t::parse_string(char* buf) inside_objname_ = false; inside_objsense_ = false; inside_qmatrix_ = false; + inside_qcmatrix_ = false; inside_quadobj_ = true; } else if (line.find("QMATRIX", 0, 7) == 0) { encountered_sections.insert("QMATRIX"); @@ -716,6 +807,21 @@ void mps_parser_t::parse_string(char* buf) inside_objsense_ = false; inside_quadobj_ = false; inside_qmatrix_ = true; + inside_qcmatrix_ = false; + } else if (line.find("QCMATRIX", 0, 8) == 0) { + encountered_sections.insert("QCMATRIX"); + flush_qcmatrix_block(); + inside_rows_ = false; + inside_columns_ = false; + inside_rhs_ = false; + inside_bounds_ = false; + inside_ranges_ = false; + inside_objname_ = false; + inside_objsense_ = false; + inside_quadobj_ = false; + inside_qmatrix_ = false; + inside_qcmatrix_ = true; + parse_qcmatrix_header(line); } else if (line.find("ENDATA", 0, 6) == 0) { encountered_sections.insert("ENDATA"); break; @@ -732,6 +838,7 @@ void mps_parser_t::parse_string(char* buf) inside_objname_ = false; inside_quadobj_ = false; inside_qmatrix_ = false; + inside_qcmatrix_ = false; } else { mps_parser_expects(false, error_type_t::ValidationError, @@ -758,6 +865,8 @@ void mps_parser_t::parse_string(char* buf) parse_quad(line, true); } else if (inside_qmatrix_) { parse_quad(line, false); + } else if (inside_qcmatrix_) { + parse_qcmatrix_data(line); } else { mps_parser_expects(false, error_type_t::ValidationError, @@ -1277,6 +1386,123 @@ void mps_parser_t::parse_objname(std::string_view line) } } +template +void mps_parser_t::flush_qcmatrix_block() +{ + if (qcmatrix_active_row_id_ < 0) { return; } + if (qcmatrix_current_entries_.empty()) { + qcmatrix_active_row_id_ = -1; + return; + } + for (const auto& b : qcmatrix_blocks_) { + mps_parser_expects(b.constraint_row_id != qcmatrix_active_row_id_, + error_type_t::ValidationError, + "Duplicate QCMATRIX block for the same constraint row (index %d)", + static_cast(qcmatrix_active_row_id_)); + } + qcmatrix_raw_block_t block; + block.constraint_row_id = qcmatrix_active_row_id_; + block.entries = std::move(qcmatrix_current_entries_); + qcmatrix_blocks_.push_back(std::move(block)); + qcmatrix_active_row_id_ = -1; +} + +template +void mps_parser_t::parse_qcmatrix_header(std::string_view line) +{ + std::string row_name; + if (fixed_mps_format) { + mps_parser_expects(line.size() >= 19, + error_type_t::ValidationError, + "QCMATRIX header line too short! line=%s", + std::string(line).c_str()); + // fixed MPS: constraint name starts in column 12 (1-based) → 0-based index 11, 8 chars + row_name = std::string(trim(line.substr(11, 8))); + } else { + std::stringstream ss{std::string(line)}; + std::string kw; + ss >> kw; + mps_parser_expects(kw == "QCMATRIX", + error_type_t::ValidationError, + "Expected QCMATRIX keyword! line=%s", + std::string(line).c_str()); + ss >> row_name; + mps_parser_expects(!row_name.empty(), + error_type_t::ValidationError, + "QCMATRIX missing constraint row name! line=%s", + std::string(line).c_str()); + } + + auto row_it = row_names_map.find(row_name); + mps_parser_expects(row_it != row_names_map.end(), + error_type_t::ValidationError, + "Unknown constraint row name '%s' in QCMATRIX! line=%s", + row_name.c_str(), + std::string(line).c_str()); + + qcmatrix_active_row_id_ = row_it->second; +} + +template +void mps_parser_t::parse_qcmatrix_data(std::string_view line) +{ + mps_parser_expects(qcmatrix_active_row_id_ >= 0, + error_type_t::ValidationError, + "QCMATRIX data line before a valid QCMATRIX header! line=%s", + std::string(line).c_str()); + + std::string var1_name, var2_name; + f_t value; + + if (fixed_mps_format) { + mps_parser_expects(line.size() >= 25, + error_type_t::ValidationError, + "QCMATRIX data line should have at least 3 entities! line=%s", + std::string(line).c_str()); + + var1_name = std::string(trim(line.substr(4, 8))); + var2_name = std::string(trim(line.substr(14, 8))); + if (var1_name[0] == '$' || var2_name[0] == '$') return; + + i_t pos = 24; + value = get_numerical_bound(line, pos); + } else { + i_t pos = 0; + i_t end = 0; + const std::string_view var1_sv = get_next_string(line, pos, end); + mps_parser_expects(!var1_sv.empty(), + error_type_t::ValidationError, + "QCMATRIX data line missing first variable name! line=%s", + std::string(line).c_str()); + if (var1_sv[0] == '$') return; + const std::string_view var2_sv = get_next_string(line, pos, end); + mps_parser_expects(!var2_sv.empty(), + error_type_t::ValidationError, + "QCMATRIX data line missing second variable name! line=%s", + std::string(line).c_str()); + if (var2_sv[0] == '$') return; + value = get_numerical_bound(line, end); + var1_name = std::string(var1_sv); + var2_name = std::string(var2_sv); + } + + auto var1_it = var_names_map.find(var1_name); + auto var2_it = var_names_map.find(var2_name); + + mps_parser_expects(var1_it != var_names_map.end(), + error_type_t::ValidationError, + "Variable '%s' not found in QCMATRIX! line=%s", + var1_name.c_str(), + std::string(line).c_str()); + mps_parser_expects(var2_it != var_names_map.end(), + error_type_t::ValidationError, + "Variable '%s' not found in QCMATRIX! line=%s", + var2_name.c_str(), + std::string(line).c_str()); + + qcmatrix_current_entries_.emplace_back(var1_it->second, var2_it->second, value); +} + template void mps_parser_t::parse_quad(std::string_view line, bool is_quadobj) { @@ -1299,9 +1525,23 @@ void mps_parser_t::parse_quad(std::string_view line, bool is_quadobj) i_t pos = 24; value = get_numerical_bound(line, pos); } else { - std::stringstream ss{std::string(line)}; - ss >> var1_name >> var2_name >> value; - if (var1_name[0] == '$' || var2_name[0] == '$') return; + i_t pos = 0; + i_t end = 0; + const std::string_view var1_sv = get_next_string(line, pos, end); + mps_parser_expects(!var1_sv.empty(), + error_type_t::ValidationError, + "QUADOBJ/QMATRIX data line missing first variable name! line=%s", + std::string(line).c_str()); + if (var1_sv[0] == '$') return; + const std::string_view var2_sv = get_next_string(line, pos, end); + mps_parser_expects(!var2_sv.empty(), + error_type_t::ValidationError, + "QUADOBJ/QMATRIX data line missing second variable name! line=%s", + std::string(line).c_str()); + if (var2_sv[0] == '$') return; + value = get_numerical_bound(line, end); + var1_name = std::string(var1_sv); + var2_name = std::string(var2_sv); } // Find variable indices diff --git a/cpp/libmps_parser/src/mps_parser.hpp b/cpp/libmps_parser/src/mps_parser.hpp index facad14c66..6e56d4bce3 100644 --- a/cpp/libmps_parser/src/mps_parser.hpp +++ b/cpp/libmps_parser/src/mps_parser.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -130,6 +131,18 @@ class mps_parser_t { // QPS-specific parsing states bool inside_quadobj_{false}; bool inside_qmatrix_{false}; + bool inside_qcmatrix_{false}; + + /** (free-format) QCMATRIX: finalized blocks (row id + triples) */ + struct qcmatrix_raw_block_t { + i_t constraint_row_id{}; + std::vector> entries{}; + }; + std::vector qcmatrix_blocks_{}; + /** Triples for the QCMATRIX block currently being read (-1 row id means none) */ + i_t qcmatrix_active_row_id_{-1}; + std::vector> qcmatrix_current_entries_{}; + std::unordered_set encountered_sections{}; std::unordered_map row_names_map{}; std::unordered_map var_names_map{}; @@ -170,6 +183,11 @@ class mps_parser_t { // QPS-specific parsing methods void parse_quad(std::string_view line, bool is_quadobj); + // QCMATRIX-specific parsing methods + void flush_qcmatrix_block(); + void parse_qcmatrix_header(std::string_view line); + void parse_qcmatrix_data(std::string_view line); + }; // class mps_parser_t } // namespace cuopt::mps_parser diff --git a/cpp/libmps_parser/src/mps_writer.cpp b/cpp/libmps_parser/src/mps_writer.cpp index 3a0997774b..c5cbd4b3f9 100644 --- a/cpp/libmps_parser/src/mps_writer.cpp +++ b/cpp/libmps_parser/src/mps_writer.cpp @@ -12,16 +12,29 @@ #include #include +#include #include #include #include -#include #include #include #include +#include namespace cuopt::mps_parser { +namespace { + +template +char linear_row_type_from_bounds(f_t cl, f_t cu) +{ + if (cl == cu) { return 'E'; } + if (std::isinf(cu)) { return 'G'; } + return 'L'; +} + +} // namespace + template mps_writer_t::mps_writer_t(const data_model_view_t& problem) : problem_(problem) { @@ -103,6 +116,12 @@ data_model_view_t mps_writer_t::create_view( static_cast(Q_offsets.size())); } + if (model.has_quadratic_constraints()) { + view.set_quadratic_constraints( + std::vector::quadratic_constraint_t>( + model.get_quadratic_constraints())); + } + return view; } @@ -129,6 +148,8 @@ void mps_writer_t::write(const std::string& mps_file_path) n_constraints = problem_.get_constraint_bounds().size(); else n_constraints = problem_.get_constraint_lower_bounds().size(); + const auto& quadratic_constraints = problem_.get_quadratic_constraints(); + const i_t n_quadratic_constraints = static_cast(quadratic_constraints.size()); std::vector objective_coefficients(problem_.get_objective_coefficients().size()); std::vector constraint_lower_bounds(n_constraints); @@ -211,16 +232,20 @@ void mps_writer_t::write(const std::string& mps_file_path) mps_file << " N " << (problem_.get_objective_name().empty() ? "OBJ" : problem_.get_objective_name()) << "\n"; - for (size_t i = 0; i < (size_t)n_constraints; i++) { + for (size_t k = 0; k < static_cast(n_constraints); ++k) { std::string row_name = - i < problem_.get_row_names().size() ? problem_.get_row_names()[i] : "R" + std::to_string(i); - char type = 'L'; - if (constraint_lower_bounds[i] == constraint_upper_bounds[i]) - type = 'E'; - else if (std::isinf(constraint_upper_bounds[i])) - type = 'G'; + k < problem_.get_row_names().size() ? problem_.get_row_names()[k] : "R" + std::to_string(k); + char const type = + linear_row_type_from_bounds(constraint_lower_bounds[k], constraint_upper_bounds[k]); mps_file << " " << type << " " << row_name << "\n"; } + for (size_t q = 0; q < quadratic_constraints.size(); ++q) { + const auto& qc = quadratic_constraints[q]; + std::string row_name = + qc.constraint_row_name.empty() ? "QC" + std::to_string(q) : qc.constraint_row_name; + // Quadratic rows are currently restricted to MPS 'L' (<=). + mps_file << " L " << row_name << "\n"; + } // COLUMNS section mps_file << "COLUMNS\n"; @@ -230,9 +255,13 @@ void mps_writer_t::write(const std::string& mps_file_path) std::vector var_in_constraint(n_variables, false); std::map>> integral_col_nnzs; std::map>> continuous_col_nnzs; - for (size_t row_id = 0; row_id < (size_t)n_constraints; row_id++) { - for (size_t k = (size_t)constraint_matrix_offsets[row_id]; - k < (size_t)constraint_matrix_offsets[row_id + 1]; + + // iterate over the constraint matrix and add the nonzeros to the integral and continuous col_nnzs + // maps + for (size_t csr_row = 0; csr_row < (size_t)n_constraints; csr_row++) { + const i_t row_id = static_cast(csr_row); + for (size_t k = (size_t)constraint_matrix_offsets[csr_row]; + k < (size_t)constraint_matrix_offsets[csr_row + 1]; k++) { size_t var = (size_t)constraint_matrix_indices[k]; if (variable_types[var] == 'I') { @@ -244,6 +273,24 @@ void mps_writer_t::write(const std::string& mps_file_path) } } + // Quadratic constraint rows omit linear coefficients from global A; add them from QC bundles. + if (problem_.has_quadratic_constraints()) { + for (size_t q = 0; q < quadratic_constraints.size(); ++q) { + const auto& qc = quadratic_constraints[q]; + const size_t row_id = static_cast(n_constraints) + q; + for (size_t t = 0; t < qc.linear_indices.size(); ++t) { + size_t var = static_cast(qc.linear_indices[t]); + f_t val = qc.linear_values[t]; + if (variable_types[var] == 'I') { + integral_col_nnzs[var].emplace_back(row_id, val); + } else { + continuous_col_nnzs[var].emplace_back(row_id, val); + } + var_in_constraint[var] = true; + } + } + } + // Record and explicitely declared variables not contained in any constraint. std::vector orphan_continuous_vars; std::vector orphan_integer_vars; @@ -276,9 +323,18 @@ void mps_writer_t::write(const std::string& mps_file_path) ? problem_.get_variable_names()[var_id] : "C" + std::to_string(var_id); for (auto& nnz : nnzs) { - std::string row_name = nnz.first < problem_.get_row_names().size() - ? problem_.get_row_names()[nnz.first] - : "R" + std::to_string(nnz.first); + std::string row_name; + if (static_cast(nnz.first) < problem_.get_row_names().size()) { + row_name = problem_.get_row_names()[nnz.first]; + } else if (static_cast(nnz.first) < + static_cast(n_constraints) + quadratic_constraints.size()) { + const size_t q = static_cast(nnz.first) - static_cast(n_constraints); + row_name = quadratic_constraints[q].constraint_row_name.empty() + ? "QC" + std::to_string(q) + : quadratic_constraints[q].constraint_row_name; + } else { + row_name = "R" + std::to_string(nnz.first); + } mps_file << " " << col_name << " " << row_name << " " << nnz.second << "\n"; } // Write objective coefficients @@ -293,21 +349,28 @@ void mps_writer_t::write(const std::string& mps_file_path) // RHS section mps_file << "RHS\n"; - for (size_t i = 0; i < (size_t)n_constraints; i++) { + for (size_t k = 0; k < static_cast(n_constraints); ++k) { std::string row_name = - i < problem_.get_row_names().size() ? problem_.get_row_names()[i] : "R" + std::to_string(i); - - f_t rhs; + k < problem_.get_row_names().size() ? problem_.get_row_names()[k] : "R" + std::to_string(k); + f_t rhs{0}; if (constraint_bounds.size() > 0) - rhs = constraint_bounds[i]; - else if (std::isinf(constraint_lower_bounds[i])) { - rhs = constraint_upper_bounds[i]; - } else if (std::isinf(constraint_upper_bounds[i])) { - rhs = constraint_lower_bounds[i]; - } else { // RANGES, encode the lower bound - rhs = constraint_lower_bounds[i]; + rhs = constraint_bounds[k]; + else if (std::isinf(constraint_lower_bounds[k])) { + rhs = constraint_upper_bounds[k]; + } else if (std::isinf(constraint_upper_bounds[k])) { + rhs = constraint_lower_bounds[k]; + } else { + rhs = constraint_lower_bounds[k]; } - + if (std::isfinite(rhs) && rhs != 0.0) { + mps_file << " RHS1 " << row_name << " " << rhs << "\n"; + } + } + for (size_t q = 0; q < quadratic_constraints.size(); ++q) { + const auto& qc = quadratic_constraints[q]; + std::string row_name = + qc.constraint_row_name.empty() ? "QC" + std::to_string(q) : qc.constraint_row_name; + const f_t rhs = qc.rhs_value; if (std::isfinite(rhs) && rhs != 0.0) { mps_file << " RHS1 " << row_name << " " << rhs << "\n"; } @@ -427,6 +490,29 @@ void mps_writer_t::write(const std::string& mps_file_path) } } + // QCMATRIX sections for quadratic constraints (QCQP) + if (problem_.has_quadratic_constraints()) { + for (const auto& qc : problem_.get_quadratic_constraints()) { + mps_file << "QCMATRIX " << qc.constraint_row_name << "\n"; + const i_t n_quad_rows = static_cast(qc.quadratic_offsets.size()) - 1; + for (i_t i = 0; i < n_quad_rows; ++i) { + std::string row_var_name = static_cast(i) < problem_.get_variable_names().size() + ? problem_.get_variable_names()[i] + : "C" + std::to_string(i); + for (i_t p = qc.quadratic_offsets[i]; p < qc.quadratic_offsets[i + 1]; ++p) { + i_t j = qc.quadratic_indices[p]; + f_t v = qc.quadratic_values[p]; + std::string col_var_name = static_cast(j) < problem_.get_variable_names().size() + ? problem_.get_variable_names()[j] + : "C" + std::to_string(j); + if (v != f_t(0)) { + mps_file << " " << row_var_name << " " << col_var_name << " " << v << "\n"; + } + } + } + } + } + mps_file << "ENDATA\n"; mps_file.close(); } diff --git a/cpp/libmps_parser/tests/mps_parser_test.cpp b/cpp/libmps_parser/tests/mps_parser_test.cpp index f915fb2df5..669a8c5e05 100644 --- a/cpp/libmps_parser/tests/mps_parser_test.cpp +++ b/cpp/libmps_parser/tests/mps_parser_test.cpp @@ -13,10 +13,13 @@ #include +#include #include #include #include +#include #include +#include #include #include @@ -855,6 +858,173 @@ TEST(qps_parser, quadratic_objective_basic) EXPECT_EQ(1.0, model.get_quadratic_objective_values()[1]); } +// ================================================================================================ +// QCMATRIX Support Tests +// ================================================================================================ + +TEST(qps_parser, qcmatrix_append_api) +{ + using model_t = mps_data_model_t; + model_t model; + + // Validate default-constructed struct shape. + model_t::quadratic_constraint_t default_qcm; + EXPECT_EQ(0, default_qcm.constraint_row_index); + EXPECT_TRUE(default_qcm.quadratic_values.empty()); + EXPECT_TRUE(default_qcm.quadratic_indices.empty()); + EXPECT_TRUE(default_qcm.quadratic_offsets.empty()); + EXPECT_TRUE(default_qcm.linear_values.empty()); + EXPECT_TRUE(default_qcm.linear_indices.empty()); + EXPECT_EQ(0.0, default_qcm.rhs_value); + + // QC0: [[10, 2], [2, 2]] + const std::vector qc0_values = {10.0, 2.0, 2.0, 2.0}; + const std::vector qc0_indices = {0, 1, 0, 1}; + const std::vector qc0_offsets = {0, 2, 4}; + const std::vector qc0_linear_values = {1.0, 1.0}; + const std::vector qc0_linear_indices = {0, 1}; + model.append_quadratic_constraint(0, + "QC0", + 'L', + qc0_linear_values.data(), + qc0_linear_values.size(), + qc0_linear_indices.data(), + qc0_linear_indices.size(), + 5.0, + qc0_values.data(), + qc0_values.size(), + qc0_indices.data(), + qc0_indices.size(), + qc0_offsets.data(), + qc0_offsets.size()); + + // QC1: [[4, 1], [1, 6]] + const std::vector qc1_values = {4.0, 1.0, 1.0, 6.0}; + const std::vector qc1_indices = {0, 1, 0, 1}; + const std::vector qc1_offsets = {0, 2, 4}; + const std::vector qc1_linear_values = {3.0, 1.0}; + const std::vector qc1_linear_indices = {0, 1}; + model.append_quadratic_constraint(1, + "QC1", + 'L', + qc1_linear_values.data(), + qc1_linear_values.size(), + qc1_linear_indices.data(), + qc1_linear_indices.size(), + 10.0, + qc1_values.data(), + qc1_values.size(), + qc1_indices.data(), + qc1_indices.size(), + qc1_offsets.data(), + qc1_offsets.size()); + + ASSERT_TRUE(model.has_quadratic_constraints()); + const auto& qcs = model.get_quadratic_constraints(); + ASSERT_EQ(2u, qcs.size()); + + EXPECT_EQ(0, qcs[0].constraint_row_index); + EXPECT_EQ("QC0", qcs[0].constraint_row_name); + EXPECT_EQ('L', qcs[0].constraint_row_type); + EXPECT_EQ(qc0_linear_values, qcs[0].linear_values); + EXPECT_EQ(qc0_linear_indices, qcs[0].linear_indices); + EXPECT_EQ(5.0, qcs[0].rhs_value); + EXPECT_EQ(qc0_values, qcs[0].quadratic_values); + EXPECT_EQ(qc0_indices, qcs[0].quadratic_indices); + EXPECT_EQ(qc0_offsets, qcs[0].quadratic_offsets); + + EXPECT_EQ(1, qcs[1].constraint_row_index); + EXPECT_EQ("QC1", qcs[1].constraint_row_name); + EXPECT_EQ('L', qcs[1].constraint_row_type); + EXPECT_EQ(qc1_linear_values, qcs[1].linear_values); + EXPECT_EQ(qc1_linear_indices, qcs[1].linear_indices); + EXPECT_EQ(10.0, qcs[1].rhs_value); + EXPECT_EQ(qc1_values, qcs[1].quadratic_values); + EXPECT_EQ(qc1_indices, qcs[1].quadratic_indices); + EXPECT_EQ(qc1_offsets, qcs[1].quadratic_offsets); +} + +// QCQP MPS: each quadratic constraint bundles row + linear + rhs + quadratic. +TEST(qps_parser, qcmatrix_mps_linear_rhs_and_bounds) +{ + if (!file_exists("qcqp/QC_Test_1.mps")) { + GTEST_SKIP() << "qcqp/QC_Test_1.mps not in dataset root"; + } + const auto model = parse_mps( + cuopt::test::get_rapids_dataset_root_dir() + "/qcqp/QC_Test_1.mps", false); + + ASSERT_TRUE(model.has_quadratic_constraints()); + const auto& qcs = model.get_quadratic_constraints(); + ASSERT_EQ(2u, qcs.size()); + + ASSERT_EQ(1, model.get_n_constraints()); + ASSERT_EQ(1u, model.get_row_names().size()); + EXPECT_EQ("LIN0", model.get_row_names()[0]); + EXPECT_EQ('L', model.get_row_types()[0]); + + // LIN0: 2*x1 + x2 ≤ 15 (linear row only; not duplicated in quadratic_constraints) + EXPECT_DOUBLE_EQ(-std::numeric_limits::infinity(), + model.get_constraint_lower_bounds()[0]); + EXPECT_DOUBLE_EQ(15.0, model.get_constraint_upper_bounds()[0]); + const auto& A_off = model.get_constraint_matrix_offsets(); + const auto& A_val = model.get_constraint_matrix_values(); + const auto& A_idx = model.get_constraint_matrix_indices(); + ASSERT_EQ(2, A_off[1] - A_off[0]); + EXPECT_EQ(2.0, A_val[A_off[0] + 0]); + EXPECT_EQ(1.0, A_val[A_off[0] + 1]); + EXPECT_EQ(0, A_idx[A_off[0] + 0]); + EXPECT_EQ(1, A_idx[A_off[0] + 1]); + + // QC0: x1 + x2 + xᵀQ₀x ≤ 5 (MPS ROWS declaration index 1; OBJ 'N' rows are not counted) + EXPECT_EQ(1, qcs[0].constraint_row_index); + EXPECT_EQ("QC0", qcs[0].constraint_row_name); + EXPECT_EQ('L', qcs[0].constraint_row_type); + ASSERT_EQ(2u, qcs[0].linear_values.size()); + EXPECT_EQ(1.0, qcs[0].linear_values[0]); + EXPECT_EQ(1.0, qcs[0].linear_values[1]); + EXPECT_EQ(0, qcs[0].linear_indices[0]); + EXPECT_EQ(1, qcs[0].linear_indices[1]); + EXPECT_DOUBLE_EQ(5.0, qcs[0].rhs_value); + EXPECT_FALSE(qcs[0].quadratic_values.empty()); + + // QC1: 3*x1 + x2 + xᵀQ₁x ≤ 10 + EXPECT_EQ(2, qcs[1].constraint_row_index); + EXPECT_EQ("QC1", qcs[1].constraint_row_name); + EXPECT_EQ('L', qcs[1].constraint_row_type); + ASSERT_EQ(2u, qcs[1].linear_values.size()); + EXPECT_EQ(3.0, qcs[1].linear_values[0]); + EXPECT_EQ(1.0, qcs[1].linear_values[1]); + EXPECT_DOUBLE_EQ(10.0, qcs[1].rhs_value); +} + +TEST(qps_parser, qcqp_p0033_mps_sections) +{ + if (!file_exists("qcqp/p0033_qc1.mps")) { + GTEST_SKIP() << "qcqp/p0033_qc1.mps not in dataset root"; + } + const auto model = parse_mps( + cuopt::test::get_rapids_dataset_root_dir() + "/qcqp/p0033_qc1.mps", false); + + EXPECT_EQ(12, model.get_n_constraints()); + EXPECT_EQ(33, model.get_n_variables()); + ASSERT_EQ(12u, model.get_row_types().size()); + ASSERT_EQ(12u, model.get_row_names().size()); + + const auto& qcs = model.get_quadratic_constraints(); + ASSERT_EQ(4u, qcs.size()); + EXPECT_EQ(12, qcs[0].constraint_row_index); + ASSERT_EQ(1u, qcs[0].linear_values.size()); + EXPECT_DOUBLE_EQ(1.0, qcs[0].linear_values[0]); + + const auto& vnames = model.get_variable_names(); + auto c159_it = std::find(vnames.begin(), vnames.end(), std::string("C159")); + ASSERT_NE(c159_it, vnames.end()); + EXPECT_EQ(static_cast(c159_it - vnames.begin()), qcs[0].linear_indices[0]); + + EXPECT_DOUBLE_EQ(1.0, qcs[0].rhs_value); + EXPECT_FALSE(qcs[0].quadratic_values.empty()); +} + // Test actual QPS files from the dataset TEST(qps_parser, test_qps_files) { @@ -1017,6 +1187,37 @@ void compare_data_models(const mps_data_model_t& original, EXPECT_EQ(orig_Q_off[i], reload_Q_off[i]) << "Q offset mismatch at index " << i; } } + + EXPECT_EQ(original.has_quadratic_constraints(), reloaded.has_quadratic_constraints()); + if (original.has_quadratic_constraints() && reloaded.has_quadratic_constraints()) { + const auto& oqc = original.get_quadratic_constraints(); + const auto& rq = reloaded.get_quadratic_constraints(); + ASSERT_EQ(oqc.size(), rq.size()) << "Quadratic constraint count mismatch"; + for (size_t k = 0; k < oqc.size(); ++k) { + EXPECT_EQ(oqc[k].constraint_row_index, rq[k].constraint_row_index); + EXPECT_EQ(oqc[k].constraint_row_name, rq[k].constraint_row_name); + EXPECT_EQ(oqc[k].constraint_row_type, rq[k].constraint_row_type); + EXPECT_NEAR(oqc[k].rhs_value, rq[k].rhs_value, tol); + ASSERT_EQ(oqc[k].linear_values.size(), rq[k].linear_values.size()); + ASSERT_EQ(oqc[k].linear_indices.size(), rq[k].linear_indices.size()); + for (size_t i = 0; i < oqc[k].linear_values.size(); ++i) { + EXPECT_NEAR(oqc[k].linear_values[i], rq[k].linear_values[i], tol); + EXPECT_EQ(oqc[k].linear_indices[i], rq[k].linear_indices[i]); + } + ASSERT_EQ(oqc[k].quadratic_values.size(), rq[k].quadratic_values.size()); + ASSERT_EQ(oqc[k].quadratic_indices.size(), rq[k].quadratic_indices.size()); + ASSERT_EQ(oqc[k].quadratic_offsets.size(), rq[k].quadratic_offsets.size()); + for (size_t i = 0; i < oqc[k].quadratic_values.size(); ++i) { + EXPECT_NEAR(oqc[k].quadratic_values[i], rq[k].quadratic_values[i], tol); + } + for (size_t i = 0; i < oqc[k].quadratic_indices.size(); ++i) { + EXPECT_EQ(oqc[k].quadratic_indices[i], rq[k].quadratic_indices[i]); + } + for (size_t i = 0; i < oqc[k].quadratic_offsets.size(); ++i) { + EXPECT_EQ(oqc[k].quadratic_offsets[i], rq[k].quadratic_offsets[i]); + } + } + } } TEST(mps_roundtrip, linear_programming_basic) @@ -1127,4 +1328,29 @@ TEST(mps_roundtrip, quadratic_programming_qp_test_2) std::filesystem::remove(temp_file); } +TEST(mps_roundtrip, qcqp_p0033_qc1) +{ + if (!file_exists("qcqp/p0033_qc1.mps")) { GTEST_SKIP() << "Test file not found"; } + + std::string input_file = cuopt::test::get_rapids_dataset_root_dir() + "/qcqp/p0033_qc1.mps"; + std::string temp_file = "/tmp/mps_roundtrip_p0033_qc1.mps"; + std::string temp_file_2 = "/tmp/mps_roundtrip_p0033_qc1_r2.mps"; + + auto original = parse_mps(input_file, false); + ASSERT_TRUE(original.has_quadratic_objective()); + ASSERT_TRUE(original.has_quadratic_constraints()); + + mps_writer_t writer(original); + writer.write(temp_file); + + auto reloaded = parse_mps(temp_file, false); + mps_writer_t writer_r2(reloaded); + writer_r2.write(temp_file_2); + auto reloaded_2 = parse_mps(temp_file_2, false); + compare_data_models(reloaded, reloaded_2); + + std::filesystem::remove(temp_file); + std::filesystem::remove(temp_file_2); +} + } // namespace cuopt::mps_parser diff --git a/cpp/src/barrier/barrier.cu b/cpp/src/barrier/barrier.cu index 778038db1f..bdb571ec32 100644 --- a/cpp/src/barrier/barrier.cu +++ b/cpp/src/barrier/barrier.cu @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -34,6 +35,8 @@ #include #include +#include +#include #include #include @@ -43,6 +46,7 @@ #include #include #include +#include namespace cuopt::linear_programming::dual_simplex { @@ -163,6 +167,8 @@ class iteration_data_t { d_inv_diag(lp.num_cols, lp.handle_ptr->get_stream()), d_cols_to_remove(0, lp.handle_ptr->get_stream()), d_augmented_diagonal_indices_(0, lp.handle_ptr->get_stream()), + d_cone_csr_indices_(0, lp.handle_ptr->get_stream()), + d_cone_Q_values_(0, lp.handle_ptr->get_stream()), use_augmented(false), has_factorization(false), num_factorizations(0), @@ -206,9 +212,11 @@ class iteration_data_t { d_dw_residual_(0, lp.handle_ptr->get_stream()), d_wv_residual_(0, lp.handle_ptr->get_stream()), d_bound_rhs_(0, lp.handle_ptr->get_stream()), - d_complementarity_xz_rhs_(lp.num_cols, lp.handle_ptr->get_stream()), + d_complementarity_xz_rhs_(0, lp.handle_ptr->get_stream()), d_complementarity_wv_rhs_(0, lp.handle_ptr->get_stream()), d_dual_rhs_(lp.num_cols, lp.handle_ptr->get_stream()), + d_complementarity_target_(lp.num_cols, lp.handle_ptr->get_stream()), + d_cone_hinv2_dx_(0, lp.handle_ptr->get_stream()), d_Q_diag_(0, lp.handle_ptr->get_stream()), d_Qx_(Qin.m, lp.handle_ptr->get_stream()), restrict_u_(0), @@ -216,7 +224,9 @@ class iteration_data_t { sum_reduce_helper_(lp.handle_ptr->get_stream()), indefinite_Q(false), Q_diagonal(false), - symbolic_status(0) + symbolic_status(0), + cone_combined_step_(false), + cone_sigma_mu_(f_t(0)) { raft::common::nvtx::range fun_scope("Barrier: LP Data Creation"); @@ -258,6 +268,26 @@ class iteration_data_t { raft::copy(d_Q_diag_.data(), Qdiag.data(), Qdiag.size(), stream_view_); } + if (!lp.second_order_cone_dims.empty()) { + cone_var_start_ = lp.cone_var_start; + i_t total_cone_dim = + std::accumulate(lp.second_order_cone_dims.begin(), lp.second_order_cone_dims.end(), i_t(0)); + cuopt_assert(cone_var_start_ >= 0, "cone_var_start must be nonnegative"); + cuopt_assert(cone_var_start_ + total_cone_dim <= lp.num_cols, + "cone variables exceed problem dimension"); + cuopt_assert(cone_var_start_ + total_cone_dim == lp.num_cols, + "barrier expects [linear | cone] layout"); + cones_.emplace( + std::span(lp.second_order_cone_dims.data(), lp.second_order_cone_dims.size()), + raft::device_span{}, + raft::device_span{}, + stream_view_); + cuopt_assert(cone_count() > 0, "second-order cone topology must contain at least one cone"); + cuopt_assert(cone_entry_count() == total_cone_dim, "second-order cone entry count mismatch"); + } + const i_t linear_xz_rhs_size = linear_xz_size(lp.num_cols); + d_complementarity_xz_rhs_.resize(linear_xz_rhs_size, stream_view_); + // Allocating GPU flag data for Form ADAT RAFT_CUDA_TRY(cub::DeviceSelect::Flagged( nullptr, @@ -321,6 +351,11 @@ class iteration_data_t { use_augmented = !Q_diagonal; } + if (has_cones() && !use_augmented) { + n_dense_columns = 0; + use_augmented = true; + } + if (use_augmented) { settings.log.printf("Linear system : augmented\n"); } else { @@ -418,6 +453,51 @@ class iteration_data_t { } } + bool has_cones() const { return cones_.has_value(); } + + cone_data_t& cones() + { + cuopt_assert(cones_.has_value(), "second-order cone data is not initialized"); + return *cones_; + } + + const cone_data_t& cones() const + { + cuopt_assert(cones_.has_value(), "second-order cone data is not initialized"); + return *cones_; + } + + i_t cone_count() const { return has_cones() ? cones_->n_cones : i_t(0); } + + i_t cone_entry_count() const + { + return has_cones() ? static_cast(cones_->n_cone_entries) : i_t(0); + } + + i_t cone_start() const { return cone_var_start_; } + + i_t cone_end() const { return cone_start() + cone_entry_count(); } + + i_t linear_xz_size(std::size_t full_xz_size) const + { + return has_cones() ? cone_start() : static_cast(full_xz_size); + } + + bool is_cone_variable(i_t variable) const + { + return has_cones() && variable >= cone_start() && variable < cone_end(); + } + + f_t complementarity_degree(std::size_t num_primal_variables, i_t num_upper_bounds) const + { + f_t degree = static_cast(num_primal_variables) + static_cast(num_upper_bounds); + if (has_cones()) { + degree -= static_cast(cone_entry_count()); + degree += static_cast(cone_count()); + } + return degree; + } + void form_augmented(bool first_call = false) { i_t n = A.n; @@ -427,16 +507,92 @@ class iteration_data_t { i_t factorization_size = n + m; const f_t dual_perturb = 0.0; const f_t primal_perturb = 1e-6; + + const bool has_soc = has_cones(); + const i_t m_c = cone_entry_count(); + i_t total_block_nnz = 0; + + std::vector cone_offsets_host; + std::vector cone_block_offsets_host; + if (has_soc) { + const i_t n_cones = cone_count(); + cone_offsets_host.resize(n_cones + 1); + cone_block_offsets_host.resize(n_cones + 1); + raft::copy(cone_offsets_host.data(), cones().cone_offsets.data(), n_cones + 1, stream_view_); + handle_ptr->sync_stream(); + for (i_t k = 0; k < n_cones; ++k) { + const auto q_k = cone_offsets_host[k + 1] - cone_offsets_host[k]; + cone_block_offsets_host[k + 1] = cone_block_offsets_host[k] + q_k * q_k; + } + total_block_nnz = static_cast(cone_block_offsets_host[n_cones]); + } + if (first_call) { - i_t new_nnz = 2 * nnzA + n + m + nnzQ; + i_t new_nnz = 2 * nnzA + n + m + nnzQ + total_block_nnz; csr_matrix_t augmented_CSR(n + m, n + m, new_nnz); std::vector augmented_diagonal_indices(n + m, -1); + std::vector cone_csr_indices_host(total_block_nnz, -1); + std::vector cone_Q_values_host(total_block_nnz, f_t(0)); i_t q = 0; i_t off_diag_Qnz = 0; for (i_t i = 0; i < n; i++) { augmented_CSR.row_start[i] = q; - if (nnzQ == 0) { + + const bool is_cone_row = is_cone_variable(i); + + if (is_cone_row) { + // Determine which cone this variable belongs to and its local row + i_t local_idx = i - cone_start(); + i_t k = 0; + while (k + 1 < cone_count() && + cone_offsets_host[k + 1] <= static_cast(local_idx)) { + k++; + } + i_t local_r = + static_cast(static_cast(local_idx) - cone_offsets_host[k]); + i_t q_k = static_cast(cone_offsets_host[k + 1] - cone_offsets_host[k]); + i_t cone_col_start = cone_start() + static_cast(cone_offsets_host[k]); + i_t block_base = static_cast(cone_block_offsets_host[k]) + local_r * q_k; + + // Merge-join: Q entries (sorted) with dense cone block columns (contiguous) + i_t qp = (nnzQ > 0) ? Q.col_start[i] : 0; + i_t q_end = (nnzQ > 0) ? Q.col_start[i + 1] : 0; + + // Q entries before cone block + while (qp < q_end && Q.i[qp] < cone_col_start) { + augmented_CSR.j[q] = Q.i[qp]; + augmented_CSR.x[q++] = -Q.x[qp]; + off_diag_Qnz++; + qp++; + } + + // Dense cone block, absorbing any Q entries that fall inside + for (i_t c = 0; c < q_k; c++) { + i_t col = cone_col_start + c; + f_t q_contrib = f_t(0); + f_t initial_val = (c == local_r) ? f_t(1) : f_t(0); + + if (qp < q_end && Q.i[qp] == col) { + q_contrib = Q.x[qp]; + qp++; + } + + cone_csr_indices_host[block_base + c] = q; + cone_Q_values_host[block_base + c] = q_contrib; + if (col == i) { augmented_diagonal_indices[i] = q; } + augmented_CSR.j[q] = col; + augmented_CSR.x[q++] = initial_val - q_contrib; + } + + // Q entries after cone block + while (qp < q_end) { + augmented_CSR.j[q] = Q.i[qp]; + augmented_CSR.x[q++] = -Q.x[qp]; + off_diag_Qnz++; + qp++; + } + } else if (nnzQ == 0) { augmented_diagonal_indices[i] = q; augmented_CSR.j[q] = i; augmented_CSR.x[q++] = -diag[i] - dual_perturb; @@ -489,8 +645,9 @@ class iteration_data_t { augmented_CSR.nz_max = q; augmented_CSR.j.resize(q); augmented_CSR.x.resize(q); - settings_.log.debug("augmented nz %d predicted %d\n", q, off_diag_Qnz + nnzA + n); - cuopt_assert(q == 2 * nnzA + n + m + off_diag_Qnz, "augmented nnz != predicted"); + i_t expected_nnz = 2 * nnzA + (n - m_c) + total_block_nnz + m + off_diag_Qnz; + settings_.log.debug("augmented nz %d predicted %d\n", q, expected_nnz); + cuopt_assert(q == expected_nnz, "augmented nnz != predicted"); cuopt_assert(A.col_start[n] == AT.col_start[m], "A nz != AT nz"); device_augmented.copy(augmented_CSR, handle_ptr->get_stream()); @@ -500,6 +657,20 @@ class iteration_data_t { augmented_diagonal_indices.data(), augmented_diagonal_indices.size(), handle_ptr->get_stream()); + + if (has_soc) { + d_cone_csr_indices_.resize(total_block_nnz, handle_ptr->get_stream()); + raft::copy(d_cone_csr_indices_.data(), + cone_csr_indices_host.data(), + total_block_nnz, + handle_ptr->get_stream()); + d_cone_Q_values_.resize(total_block_nnz, handle_ptr->get_stream()); + raft::copy(d_cone_Q_values_.data(), + cone_Q_values_host.data(), + total_block_nnz, + handle_ptr->get_stream()); + } + handle_ptr->sync_stream(); #ifdef CHECK_SYMMETRY csc_matrix_t augmented_transpose(1, 1, 1); @@ -538,6 +709,15 @@ class iteration_data_t { span_x[span_diag_indices[j]] = primal_perturb_value; }); RAFT_CHECK_CUDA(handle_ptr->get_stream()); + + if (has_soc) { + scatter_hinv2_into_augmented(cones(), + device_augmented.x, + d_cone_csr_indices_, + d_cone_Q_values_, + handle_ptr->get_stream()); + RAFT_CHECK_CUDA(handle_ptr->get_stream()); + } } } @@ -1414,8 +1594,9 @@ class iteration_data_t { f_t beta, rmm::device_uvector& y) { - const i_t m = A.m; - const i_t n = A.n; + const i_t m = A.m; + const i_t n = A.n; + const bool has_soc = has_cones(); rmm::device_uvector d_x1(n, handle_ptr->get_stream()); rmm::device_uvector d_x2(m, handle_ptr->get_stream()); @@ -1434,12 +1615,24 @@ class iteration_data_t { // diag.pairwise_product(x1, r1); // r1 <- D * x_1 pairwise_multiply(d_x1.data(), d_diag_.data(), d_r1.data(), n, stream_view_); + if (has_soc) { + const i_t m_c = cone_entry_count(); + thrust::fill_n(rmm::exec_policy(stream_view_), d_r1.begin() + cone_start(), m_c, f_t(0)); + } // r1 <- Q x1 + D x1 if (Q.n > 0) { // matrix_vector_multiply(Q, 1.0, x1, 1.0, r1); cusparse_Q_view_.spmv(1.0, d_x1, 1.0, d_r1); } + if (has_soc) { + const i_t m_c = cone_entry_count(); + accumulate_cone_hinv2_matvec(raft::device_span(d_x1.data() + cone_start(), m_c), + cones(), + raft::device_span(d_r1.data() + cone_start(), m_c), + stream_view_); + RAFT_CHECK_CUDA(stream_view_); + } // y1 <- - alpha * r1 + beta * y1 // y1.axpy(-alpha, r1, beta); @@ -1533,9 +1726,14 @@ class iteration_data_t { std::vector Qdiag; bool Q_diagonal; rmm::device_uvector d_augmented_diagonal_indices_; + rmm::device_uvector d_cone_csr_indices_; + rmm::device_uvector d_cone_Q_values_; bool indefinite_Q; cusparse_view_t cusparse_Q_view_; + std::optional> cones_; + i_t cone_var_start_ = 0; + bool use_augmented; i_t symbolic_status; @@ -1633,6 +1831,8 @@ class iteration_data_t { rmm::device_uvector d_complementarity_xz_rhs_; rmm::device_uvector d_complementarity_wv_rhs_; rmm::device_uvector d_dual_rhs_; + rmm::device_uvector d_complementarity_target_; + rmm::device_uvector d_cone_hinv2_dx_; rmm::device_uvector d_Q_diag_; rmm::device_uvector d_Qx_; @@ -1642,6 +1842,9 @@ class iteration_data_t { transform_reduce_helper_t transform_reduce_helper_; sum_reduce_helper_t sum_reduce_helper_; + bool cone_combined_step_; + f_t cone_sigma_mu_; + rmm::cuda_stream_view stream_view_; const simplex_solver_settings_t& settings_; @@ -1740,6 +1943,7 @@ int barrier_solver_t::initial_point(iteration_data_t& data) { raft::common::nvtx::range fun_scope("Barrier: initial_point"); const bool use_augmented = data.use_augmented; + const bool has_soc = data.has_cones(); // Perform a numerical factorization i_t status; @@ -1898,8 +2102,10 @@ int barrier_solver_t::initial_point(iteration_data_t& data) data.v[k] = -c[j] + epsilon; } } - // Now hande the case with no upper bounds + // Now handle the case with no upper bounds (skip cone variables) + const i_t cone_end = data.cone_end(); for (i_t j = 0; j < lp.num_cols; j++) { + if (has_soc && j >= data.cone_start() && j < cone_end) continue; if (lp.upper[j] == inf) { if (c[j] > epsilon_adjust) { data.z[j] = c[j]; @@ -1928,7 +2134,7 @@ int barrier_solver_t::initial_point(iteration_data_t& data) data.v.multiply_scalar(-1.0); data.v.ensure_positive(epsilon_adjust); - data.z.ensure_positive(epsilon_adjust); + data.z.ensure_positive_skip_range(epsilon_adjust, data.cone_start(), data.cone_entry_count()); } else { // First compute rhs = A*Dinv*c dense_vector_t rhs(lp.num_rows); @@ -1952,7 +2158,7 @@ int barrier_solver_t::initial_point(iteration_data_t& data) data.gather_upper_bounds(data.z, data.v); data.v.multiply_scalar(-1.0); data.v.ensure_positive(epsilon_adjust); - data.z.ensure_positive(epsilon_adjust); + data.z.ensure_positive_skip_range(epsilon_adjust, data.cone_start(), data.cone_entry_count()); } // Verify A'*y + z - E*v - Q*x = c @@ -1969,9 +2175,32 @@ int barrier_solver_t::initial_point(iteration_data_t& data) settings.log.printf("||A^T y + z - E*v - Q*x - c ||: %e\n", vector_norm2(data.dual_residual)); #endif - // Make sure (w, x, v, z) > 0 + // Make sure (w, x, v, z) > 0; skip cone vars — handled by shift_into_interior below data.w.ensure_positive(epsilon_adjust); - data.x.ensure_positive(epsilon_adjust); + data.x.ensure_positive_skip_range(epsilon_adjust, data.cone_start(), data.cone_entry_count()); + + if (has_soc) { + const auto& dims = lp.second_order_cone_dims; + i_t cs = data.cone_start(); + i_t off = 0; + + for (i_t k = 0; k < static_cast(dims.size()); ++k) { + i_t q_k = dims[k]; + auto shift_into_interior = [&](auto& vec) { + f_t tail_sq = f_t(0); + for (i_t j = 1; j < q_k; ++j) { + f_t v = vec[cs + off + j]; + tail_sq += v * v; + } + f_t gap = std::sqrt(tail_sq) - vec[cs + off]; + if (gap >= f_t(0)) { vec[cs + off] += f_t(1) + gap; } + }; + shift_into_interior(data.x); + shift_into_interior(data.z); + off += q_k; + } + } + #ifdef PRINT_INFO settings.log.printf("min v %e min z %e\n", data.v.minimum(), data.z.minimum()); #endif @@ -2155,10 +2384,27 @@ void barrier_solver_t::gpu_compute_residual_norms(const rmm::device_uv primal_residual_norm = std::max(device_vector_norm_inf(data.d_primal_residual_, stream_view_), device_vector_norm_inf(data.d_bound_residual_, stream_view_)); - dual_residual_norm = device_vector_norm_inf(data.d_dual_residual_, stream_view_); + dual_residual_norm = device_vector_norm_inf(data.d_dual_residual_, stream_view_); + const bool has_soc = data.has_cones(); + const i_t linear_xz_size = data.linear_xz_size(data.d_complementarity_xz_residual_.size()); + auto linear_xz_span = + raft::device_span(data.d_complementarity_xz_residual_.data(), linear_xz_size); complementarity_residual_norm = - std::max(device_vector_norm_inf(data.d_complementarity_xz_residual_, stream_view_), + std::max(device_vector_norm_inf(linear_xz_span, stream_view_), device_vector_norm_inf(data.d_complementarity_wv_residual_, stream_view_)); + if (has_soc) { + f_t cone_complementarity_norm = f_t(0); + auto cone_dot = data.cones().scratch.template get_slot<0>(); + data.cones().segmented_sum( + data.d_complementarity_xz_residual_.data() + data.cone_start(), cone_dot, stream_view_); + cone_complementarity_norm = thrust::reduce(rmm::exec_policy(stream_view_), + cone_dot.begin(), + cone_dot.end(), + f_t(0), + thrust::maximum()); + complementarity_residual_norm = + std::max(complementarity_residual_norm, cone_complementarity_norm); + } } template @@ -2166,16 +2412,50 @@ f_t barrier_solver_t::gpu_max_step_to_boundary(iteration_data_t& x, const rmm::device_uvector& dx) { + const bool has_soc = data.has_cones(); + const bool skip_cone_range = has_soc && static_cast(x.size()) >= data.cone_end(); + + auto ratio_test = [] HD(const thrust::tuple t) { + const f_t dx = thrust::get<0>(t); + const f_t x = thrust::get<1>(t); + if (dx < f_t(0.0)) return -x / dx; + return f_t(1.0); + }; + + if (skip_cone_range) { + i_t cs = data.cone_start(); + i_t mc = data.cone_entry_count(); + f_t alpha = f_t(1); + + if (cs > 0) { + alpha = std::min(alpha, + data.transform_reduce_helper_.transform_reduce( + thrust::make_zip_iterator(dx.data(), x.data()), + thrust::minimum(), + ratio_test, + f_t(1), + cs, + stream_view_)); + } + i_t tail_start = cs + mc; + i_t tail_size = static_cast(x.size()) - tail_start; + if (tail_size > 0) { + alpha = std::min(alpha, + data.transform_reduce_helper_.transform_reduce( + thrust::make_zip_iterator(dx.data() + tail_start, x.data() + tail_start), + thrust::minimum(), + ratio_test, + f_t(1), + tail_size, + stream_view_)); + } + return alpha; + } + return data.transform_reduce_helper_.transform_reduce( thrust::make_zip_iterator(dx.data(), x.data()), thrust::minimum(), - [] HD(const thrust::tuple t) { - const f_t dx = thrust::get<0>(t); - const f_t x = thrust::get<1>(t); - - if (dx < f_t(0.0)) return -x / dx; - return f_t(1.0); - }, + ratio_test, f_t(1.0), x.size(), stream_view_); @@ -2194,6 +2474,40 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t target, + raft::device_span xz_rhs, + raft::device_span x) { + if (target.empty()) return; + cub::DeviceTransform::Transform( + cuda::std::make_tuple(xz_rhs.data(), x.data()), + target.data(), + target.size(), + [] HD(f_t complementarity_xz_rhs, f_t x_val) { return complementarity_xz_rhs / x_val; }, + stream_view_.value()); + RAFT_CHECK_CUDA(stream_view_); + }; + + auto recover_linear_dz = [&](raft::device_span target, + raft::device_span z, + raft::device_span dx_span, + raft::device_span x, + raft::device_span dz_span) { + if (dz_span.empty()) return; + cub::DeviceTransform::Transform( + cuda::std::make_tuple(target.data(), z.data(), dx_span.data(), x.data()), + dz_span.data(), + dz_span.size(), + [] HD(f_t target_val, f_t z_val, f_t dx_val, f_t x_val) { + return target_val - (z_val * dx_val) / x_val; + }, + stream_view_.value()); + RAFT_CHECK_CUDA(stream_view_); + }; { raft::common::nvtx::range fun_scope("Barrier: GPU allocation and copies"); @@ -2234,6 +2548,44 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t(data.d_x_.data() + cone_var_start, m_c); + cones.z = raft::device_span(data.d_z_.data() + cone_var_start, m_c); + launch_nt_scaling(cones, stream_view_); + + cuopt_assert(cone_var_start + m_c == lp.num_cols, "barrier expects [linear | cone] layout"); + fill_linear_target( + raft::device_span(data.d_complementarity_target_.data(), linear_size), + raft::device_span(data.d_complementarity_xz_rhs_.data(), linear_size), + raft::device_span(data.d_x_.data(), linear_size)); + + auto cone_target = + raft::device_span(data.d_complementarity_target_.data() + cone_var_start, m_c); + if (data.cone_combined_step_) { + compute_combined_cone_rhs_term( + raft::device_span(data.d_dx_aff_.data() + cone_var_start, m_c), + raft::device_span(data.d_dz_aff_.data() + cone_var_start, m_c), + cones, + data.cone_sigma_mu_, + cone_target, + stream_view_); + } else { + cub::DeviceTransform::Transform( + cones.z.data(), + cone_target.data(), + m_c, + [] HD(f_t z_val) { return -z_val; }, + stream_view_.value()); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + } + RAFT_CHECK_CUDA(stream_view_); + } else { + fill_linear_target(cuopt::make_span(data.d_complementarity_target_), + cuopt::make_span(data.d_complementarity_xz_rhs_), + cuopt::make_span(data.d_x_)); + } + // Solves the linear system // // dw dx dy dv dz @@ -2247,13 +2599,18 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t{}, stream_view_.value()); RAFT_CHECK_CUDA(stream_view_); + + if (has_soc) { + thrust::fill_n( + rmm::exec_policy(stream_view_), data.d_diag_.begin() + cone_var_start, m_c, f_t(1)); + } // diag = z ./ x + E * (v ./ w) * E' if (data.n_upper_bounds > 0) { @@ -2269,8 +2626,9 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t 0 && data.Q_diagonal) { + // In ADAT mode, diagonal Q is folded into D. In augmented mode, Q is an explicit block in the + // KKT matrix, so adding it here would double count the quadratic objective. + if (!use_augmented && data.Q.n > 0 && data.Q_diagonal) { cub::DeviceTransform::Transform( cuda::std::make_tuple(data.d_Q_diag_.data(), data.d_diag_.data()), data.d_diag_.data(), @@ -2335,7 +2693,7 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t thrust::tuple { - const f_t tmp = tmp3 + -(complementarity_xz_rhs / x) + dual_rhs; + [] HD(f_t inv_diag, f_t tmp3, f_t dual_rhs, f_t target) -> thrust::tuple { + const f_t tmp = tmp3 + dual_rhs - target; return {tmp, inv_diag * tmp}; }, stream_view_.value()); @@ -2655,18 +3011,29 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t(data.d_dx_.data() + cone_var_start, m_c), + data.cones(), + raft::device_span(data.d_complementarity_target_.data() + cone_var_start, m_c), + raft::device_span(data.d_dz_.data() + cone_var_start, m_c), + stream_view_); + + if (cone_var_start > 0) { + recover_linear_dz( + raft::device_span(data.d_complementarity_target_.data(), cone_var_start), + raft::device_span(data.d_z_.data(), cone_var_start), + raft::device_span(data.d_dx_.data(), cone_var_start), + raft::device_span(data.d_x_.data(), cone_var_start), + raft::device_span(data.d_dz_.data(), cone_var_start)); + } + } else { + recover_linear_dz(cuopt::make_span(data.d_complementarity_target_), + cuopt::make_span(data.d_z_), + cuopt::make_span(data.d_dx_), + cuopt::make_span(data.d_x_), + cuopt::make_span(data.d_dz_)); + } RAFT_CHECK_CUDA(stream_view_); raft::copy(dz.data(), data.d_dz_.data(), data.d_dz_.size(), stream_view_); } @@ -2675,18 +3042,29 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t out, + raft::device_span rhs, + raft::device_span z, + raft::device_span dz_span, + raft::device_span dx_span, + raft::device_span x) { + if (out.empty()) return; + cub::DeviceTransform::Transform( + cuda::std::make_tuple(rhs.data(), z.data(), dz_span.data(), dx_span.data(), x.data()), + out.data(), + out.size(), + [] HD(f_t complementarity_xz_rhs, f_t z_val, f_t dz_val, f_t dx_val, f_t x_val) { + return z_val * dx_val + x_val * dz_val - complementarity_xz_rhs; + }, + stream_view_.value()); + }; + compute_linear_xz_residual( + raft::device_span(data.d_xz_residual_.data(), linear_size), + raft::device_span(data.d_complementarity_xz_rhs_.data(), linear_size), + raft::device_span(data.d_z_.data(), linear_size), + raft::device_span(data.d_dz_.data(), linear_size), + raft::device_span(data.d_dx_.data(), linear_size), + raft::device_span(data.d_x_.data(), linear_size)); RAFT_CHECK_CUDA(stream_view_); const f_t xz_residual_norm = device_vector_norm_inf(data.d_xz_residual_, stream_view_); @@ -2850,27 +3228,31 @@ template void barrier_solver_t::compute_affine_rhs(iteration_data_t& data) { raft::common::nvtx::range fun_scope("Barrier: compute_affine_rhs"); + const i_t linear_size = data.linear_xz_size(lp.num_cols); - data.primal_rhs = data.primal_residual; - data.bound_rhs = data.bound_residual; - data.dual_rhs = data.dual_residual; + data.primal_rhs = data.primal_residual; + data.bound_rhs = data.bound_residual; + data.dual_rhs = data.dual_residual; + data.cone_combined_step_ = false; + data.cone_sigma_mu_ = f_t(0); - raft::copy(data.d_complementarity_xz_rhs_.data(), - data.d_complementarity_xz_residual_.data(), - data.d_complementarity_xz_residual_.size(), - stream_view_); raft::copy(data.d_complementarity_wv_rhs_.data(), data.d_complementarity_wv_residual_.data(), data.d_complementarity_wv_residual_.size(), stream_view_); - // x.*z -> -x .* z - cub::DeviceTransform::Transform( - data.d_complementarity_xz_rhs_.data(), - data.d_complementarity_xz_rhs_.data(), - data.d_complementarity_xz_rhs_.size(), - [] HD(f_t xz_rhs) { return -xz_rhs; }, - stream_view_.value()); + auto negate_linear_rhs = [&](raft::device_span out, raft::device_span residual) { + if (out.empty()) return; + cub::DeviceTransform::Transform( + residual.data(), + out.data(), + out.size(), + [] HD(f_t xz_rhs) { return -xz_rhs; }, + stream_view_.value()); + }; + negate_linear_rhs( + raft::device_span(data.d_complementarity_xz_rhs_.data(), linear_size), + raft::device_span(data.d_complementarity_xz_residual_.data(), linear_size)); RAFT_CHECK_CUDA(stream_view_); // w.*v -> -w .* v cub::DeviceTransform::Transform( @@ -2887,6 +3269,7 @@ void barrier_solver_t::compute_target_mu( iteration_data_t& data, f_t mu, f_t& mu_aff, f_t& sigma, f_t& new_mu) { raft::common::nvtx::range fun_scope("Barrier: compute_target_mu"); + const bool has_soc = data.has_cones(); f_t complementarity_aff_sum = 0.0; // TMP no copy and data should always be on the GPU @@ -2905,7 +3288,23 @@ void barrier_solver_t::compute_target_mu( f_t step_dual_aff = std::min(gpu_max_step_to_boundary(data, data.d_v_, data.d_dv_aff_), gpu_max_step_to_boundary(data, data.d_z_, data.d_dz_aff_)); - if (data.Q.n > 0) { step_primal_aff = step_dual_aff = std::min(step_primal_aff, step_dual_aff); } + if (has_soc) { + i_t cs = data.cone_start(); + i_t mc = data.cone_entry_count(); + auto [cone_p, cone_d] = + compute_cone_step_length(data.cones(), + raft::device_span(data.d_dx_aff_.data() + cs, mc), + raft::device_span(data.d_dz_aff_.data() + cs, mc), + std::min(step_primal_aff, step_dual_aff), + stream_view_); + f_t cone_aff = std::min(cone_p, cone_d); + step_primal_aff = std::min(step_primal_aff, cone_aff); + step_dual_aff = step_primal_aff; + } + + if (data.Q.n > 0 || has_soc) { + step_primal_aff = step_dual_aff = std::min(step_primal_aff, step_dual_aff); + } // Compute complementarity_xz_aff_sum = sum(x_aff * z_aff), // where x_aff = x + step_primal_aff * dx_aff and z_aff = z + step_dual_aff * dz_aff @@ -2956,24 +3355,33 @@ void barrier_solver_t::compute_target_mu( stream_view_); complementarity_aff_sum = complementarity_xz_aff_sum + complementarity_wv_aff_sum; - - mu_aff = (complementarity_aff_sum) / - (static_cast(data.x.size()) + static_cast(data.n_upper_bounds)); - sigma = std::max(0.0, std::min(1.0, std::pow(mu_aff / mu, 3.0))); - new_mu = sigma * mu_aff; + f_t mu_denom = data.complementarity_degree(data.x.size(), data.n_upper_bounds); + mu_aff = complementarity_aff_sum / mu_denom; + sigma = std::max(0.0, std::min(1.0, std::pow(mu_aff / mu, 3.0))); + new_mu = sigma * mu; } template void barrier_solver_t::compute_cc_rhs(iteration_data_t& data, f_t& new_mu) { raft::common::nvtx::range fun_scope("Barrier: compute_cc_rhs"); + const bool has_soc = data.has_cones(); + const i_t linear_size = data.linear_xz_size(lp.num_cols); - cub::DeviceTransform::Transform( - cuda::std::make_tuple(data.d_dx_aff_.data(), data.d_dz_aff_.data()), - data.d_complementarity_xz_rhs_.data(), - data.d_complementarity_xz_rhs_.size(), - [new_mu] HD(f_t dx_aff, f_t dz_aff) { return -(dx_aff * dz_aff) + new_mu; }, - stream_view_.value()); + auto fill_linear_cc_rhs = [&](raft::device_span out, + raft::device_span dx_aff, + raft::device_span dz_aff) { + if (out.empty()) return; + cub::DeviceTransform::Transform( + cuda::std::make_tuple(dx_aff.data(), dz_aff.data()), + out.data(), + out.size(), + [new_mu] HD(f_t dx_aff_val, f_t dz_aff_val) { return -(dx_aff_val * dz_aff_val) + new_mu; }, + stream_view_.value()); + }; + fill_linear_cc_rhs(raft::device_span(data.d_complementarity_xz_rhs_.data(), linear_size), + raft::device_span(data.d_dx_aff_.data(), linear_size), + raft::device_span(data.d_dz_aff_.data(), linear_size)); RAFT_CHECK_CUDA(stream_view_); cub::DeviceTransform::Transform( cuda::std::make_tuple(data.d_dw_aff_.data(), data.d_dv_aff_.data()), @@ -2982,11 +3390,12 @@ void barrier_solver_t::compute_cc_rhs(iteration_data_t& data [new_mu] HD(f_t dw_aff, f_t dv_aff) { return -(dw_aff * dv_aff) + new_mu; }, stream_view_.value()); RAFT_CHECK_CUDA(stream_view_); - // TMP should be CPU to 0 if CPU and GPU to 0 if GPU data.primal_rhs.set_scalar(0.0); data.bound_rhs.set_scalar(0.0); data.dual_rhs.set_scalar(0.0); + data.cone_combined_step_ = has_soc; + data.cone_sigma_mu_ = has_soc ? new_mu : f_t(0); } template @@ -3059,6 +3468,8 @@ void barrier_solver_t::compute_primal_dual_step_length(iteration_data_ f_t& step_dual) { raft::common::nvtx::range fun_scope("Barrier: compute_primal_dual_step_length"); + const bool has_soc = data.has_cones(); + f_t max_step_primal = 0.0; f_t max_step_dual = 0.0; max_step_primal = std::min(gpu_max_step_to_boundary(data, data.d_w_, data.d_dw_), @@ -3066,10 +3477,23 @@ void barrier_solver_t::compute_primal_dual_step_length(iteration_data_ max_step_dual = std::min(gpu_max_step_to_boundary(data, data.d_v_, data.d_dv_), gpu_max_step_to_boundary(data, data.d_z_, data.d_dz_)); + if (has_soc) { + i_t cs = data.cone_start(); + i_t mc = data.cone_entry_count(); + auto [cone_primal, cone_dual] = + compute_cone_step_length(data.cones(), + raft::device_span(data.d_dx_.data() + cs, mc), + raft::device_span(data.d_dz_.data() + cs, mc), + f_t(1), + stream_view_); + max_step_primal = std::min(max_step_primal, cone_primal); + max_step_dual = std::min(max_step_dual, cone_dual); + } + step_primal = step_scale * max_step_primal; step_dual = step_scale * max_step_dual; - if (data.Q.n > 0) { step_primal = step_dual = std::min(step_primal, step_dual); } + if (data.Q.n > 0 || has_soc) { step_primal = step_dual = std::min(step_primal, step_dual); } } template @@ -3159,6 +3583,7 @@ template void barrier_solver_t::compute_mu(iteration_data_t& data, f_t& mu) { raft::common::nvtx::range fun_scope("Barrier: compute_mu"); + f_t mu_denom = data.complementarity_degree(data.x.size(), data.n_upper_bounds); mu = (data.sum_reduce_helper_.sum(data.d_complementarity_xz_residual_.begin(), data.d_complementarity_xz_residual_.size(), @@ -3166,7 +3591,7 @@ void barrier_solver_t::compute_mu(iteration_data_t& data, f_ data.sum_reduce_helper_.sum(data.d_complementarity_wv_residual_.begin(), data.d_complementarity_wv_residual_.size(), stream_view_)) / - (static_cast(data.x.size()) + static_cast(data.n_upper_bounds)); + mu_denom; } template @@ -3427,6 +3852,10 @@ lp_status_t barrier_solver_t::solve(f_t start_time, if (lp.Q.n > 0) { settings.log.printf("Quadratic objective matrix: %d nonzeros\n", lp.Q.row_start[lp.Q.n]); } + if (lp.second_order_cone_dims.size() > 0) { + settings.log.printf("Second-order cones: %d\n", + static_cast(lp.second_order_cone_dims.size())); + } settings.log.printf("\n"); // Compute the number of free variables @@ -3487,12 +3916,31 @@ lp_status_t barrier_solver_t::solve(f_t start_time, f_t primal_residual_norm = std::max(vector_norm_inf(data.primal_residual, stream_view_), vector_norm_inf(data.bound_residual, stream_view_)); - f_t dual_residual_norm = vector_norm_inf(data.dual_residual, stream_view_); + f_t dual_residual_norm = vector_norm_inf(data.dual_residual, stream_view_); + const bool has_soc = data.has_cones(); + const i_t linear_xz_size = data.linear_xz_size(data.complementarity_xz_residual.size()); + auto linear_xz_span = + raft::host_span(data.complementarity_xz_residual.data(), linear_xz_size); f_t complementarity_residual_norm = - std::max(vector_norm_inf(data.complementarity_xz_residual, stream_view_), + std::max(vector_norm_inf(linear_xz_span, stream_view_), vector_norm_inf(data.complementarity_wv_residual, stream_view_)); - f_t mu = (data.complementarity_xz_residual.sum() + data.complementarity_wv_residual.sum()) / - (static_cast(n) + static_cast(num_upper_bounds)); + if (has_soc) { + f_t cone_complementarity_norm = f_t(0); + i_t off = data.cone_start(); + for (auto q_k : lp.second_order_cone_dims) { + f_t cone_dot = f_t(0); + for (i_t j = 0; j < q_k; ++j) { + cone_dot += data.complementarity_xz_residual[off + j]; + } + cone_complementarity_norm = std::max(cone_complementarity_norm, cone_dot); + off += q_k; + } + complementarity_residual_norm = + std::max(complementarity_residual_norm, cone_complementarity_norm); + } + f_t mu_denom = data.complementarity_degree(n, num_upper_bounds); + f_t mu = + (data.complementarity_xz_residual.sum() + data.complementarity_wv_residual.sum()) / mu_denom; f_t norm_b = vector_norm_inf(data.b, stream_view_); f_t norm_c = vector_norm_inf(data.c, stream_view_); @@ -3533,15 +3981,22 @@ lp_status_t barrier_solver_t::solve(f_t start_time, relative_complementarity_residual, elapsed_time); + f_t duality_gap_abs = std::abs(primal_objective - dual_objective); + f_t duality_gap_rel = + duality_gap_abs / + std::max(f_t(1), std::min(std::abs(primal_objective), std::abs(dual_objective))); bool converged = primal_residual_norm < settings.barrier_relative_feasibility_tol && dual_residual_norm < settings.barrier_relative_optimality_tol && - complementarity_residual_norm < settings.barrier_relative_complementarity_tol; + (duality_gap_abs < settings.barrier_relative_complementarity_tol || + duality_gap_rel < settings.barrier_relative_complementarity_tol); + + const i_t linear_xz_rhs_size = data.linear_xz_size(data.complementarity_xz_rhs.size()); data.d_complementarity_xz_residual_.resize(data.complementarity_xz_residual.size(), stream_view_); data.d_complementarity_wv_residual_.resize(data.complementarity_wv_residual.size(), stream_view_); - data.d_complementarity_xz_rhs_.resize(data.complementarity_xz_rhs.size(), stream_view_); + data.d_complementarity_xz_rhs_.resize(linear_xz_rhs_size, stream_view_); data.d_complementarity_wv_rhs_.resize(data.complementarity_wv_rhs.size(), stream_view_); raft::copy(data.d_complementarity_xz_residual_.data(), data.complementarity_xz_residual.data(), @@ -3553,7 +4008,7 @@ lp_status_t barrier_solver_t::solve(f_t start_time, stream_view_); raft::copy(data.d_complementarity_xz_rhs_.data(), data.complementarity_xz_rhs.data(), - data.complementarity_xz_rhs.size(), + linear_xz_rhs_size, stream_view_); raft::copy(data.d_complementarity_wv_rhs_.data(), data.complementarity_wv_rhs.data(), diff --git a/cpp/src/barrier/dense_vector.hpp b/cpp/src/barrier/dense_vector.hpp index f73a9a5fce..009b9ea955 100644 --- a/cpp/src/barrier/dense_vector.hpp +++ b/cpp/src/barrier/dense_vector.hpp @@ -184,6 +184,28 @@ class dense_vector_t : public std::vector { } } + void ensure_positive_skip_range(f_t epsilon_adjust, i_t skip_start, i_t skip_count) + { + if (skip_count == 0) { + ensure_positive(epsilon_adjust); + return; + } + const i_t n = this->size(); + const i_t skip_end = skip_start + skip_count; + f_t min_val = std::numeric_limits::max(); + for (i_t i = 0; i < n; i++) { + if (i >= skip_start && i < skip_end) continue; + min_val = std::min(min_val, (*this)[i]); + } + if (min_val <= 0.0) { + const f_t delta = -min_val + epsilon_adjust; + for (i_t i = 0; i < n; i++) { + if (i >= skip_start && i < skip_end) continue; + (*this)[i] += delta; + } + } + } + void bound_away_from_zero(f_t epsilon_adjust) { const i_t n = this->size(); diff --git a/cpp/src/barrier/second_order_cone_kernels.cuh b/cpp/src/barrier/second_order_cone_kernels.cuh new file mode 100644 index 0000000000..42568fcc9b --- /dev/null +++ b/cpp/src/barrier/second_order_cone_kernels.cuh @@ -0,0 +1,1004 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include + +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +// ============================================================================= +// SOC (second-order cone) kernels for the cuOpt barrier solver. +// +// x_soc : cone primal block +// z_soc : cone dual block +// W, W^{-1} : Nesterov-Todd scaling matrix and inverse. W is symmetric for +// SOC, so W^{-T} = W^{-1} +// H : W^{-1} W^{-T} = W^{-2}, the cone KKT block added to the +// primal-reduced system +// eta : sqrt(x_J / z_J), where x_J = sqrt(det_J(x_soc)) +// w : NT scaling direction with det_J(w) = 1 and +// w[head] = sqrt(1 + ||w_tail||^2) +// +// Cone vectors are packed flat: +// entries [cone_offsets[i], cone_offsets[i + 1]) belong to cone i. +// ============================================================================= + +namespace cuopt::linear_programming::dual_simplex { + +inline constexpr int soc_block_size = 256; + +/** + * Reusable device workspace for second-order cone kernels. + * + * The scratch object owns only temporary storage. Kernels may reuse the scalar + * slots and `temp_cone` sequentially inside a higher-level operation, but no + * persistent NT scaling or iterate state is stored here. + */ +template +struct cone_scratch_t { + i_t n_cones; // number of SOC blocks + std::size_t n_cone_entries; // total packed cone dimension + + rmm::device_uvector slots; // [n_slots * n_cones] + + // Per-cone step candidates before the final min reduction. + rmm::device_uvector step_alpha_primal; // [n_cones] + rmm::device_uvector step_alpha_dual; // [n_cones] + + // TODO: Consider moving this out to the barrier layer when we wire it in + rmm::device_uvector temp_cone; // [n_cone_entries] + + cone_scratch_t(i_t n_cones_in, std::size_t n_cone_entries_in, rmm::cuda_stream_view stream) + : n_cones(n_cones_in), + n_cone_entries(n_cone_entries_in), + slots(0, stream), + step_alpha_primal(0, stream), + step_alpha_dual(0, stream), + temp_cone(0, stream) + { + const auto n_cones_size = static_cast(n_cones); + + slots.resize(n_cones_size * static_cast(n_slots), stream); + step_alpha_primal.resize(n_cones_size, stream); + step_alpha_dual.resize(n_cones_size, stream); + temp_cone.resize(n_cone_entries, stream); + } + + template + raft::device_span get_slot() const + { + static_assert(slot_idx >= 0 && slot_idx < n_slots, "scratch slot index out of range"); + const auto n_cones_size = static_cast(n_cones); + const auto begin = static_cast(slot_idx) * n_cones_size; + const auto end = begin + n_cones_size; + return cuopt::make_span(slots, begin, end); + } + + template + raft::device_span get_slot() + { + const auto const_slot = static_cast(*this).template get_slot(); + return raft::device_span(const_cast(const_slot.data()), const_slot.size()); + } +}; + +struct to_size_t_t { + template + HD std::size_t operator()(value_t value) const + { + return value; + } +}; + +template +HD f_t cone_step_length_from_scalars( + f_t u0, f_t du0, f_t du_tail_sq, f_t u_tail_du_tail, f_t u_tail_sq, f_t alpha_max) +{ + const auto a = du0 * du0 - du_tail_sq; + const auto b = u0 * du0 - u_tail_du_tail; + const auto c_raw = u0 * u0 - u_tail_sq; + const auto c = c_raw > 0 ? c_raw : 0; + const auto disc = b * b - a * c; + auto alpha = alpha_max; + + if (du0 < 0) { alpha = cuda::std::min(alpha, -u0 / du0); } + + if ((a > 0 && b > 0) || disc < 0) { return alpha; } + + if (a == 0) { + if (b < 0) { alpha = cuda::std::min(alpha, c / (-2 * b)); } + } else if (c == 0) { + alpha = a >= 0 ? alpha : 0; + } else { + const auto t = -(b + copysign(sqrt(disc), b)); + auto r1 = c / t; + auto r2 = t / a; + if (r1 < 0) { r1 = alpha; } + if (r2 < 0) { r2 = alpha; } + alpha = cuda::std::min(alpha, cuda::std::min(r1, r2)); + } + + return alpha; +} + +template +__global__ void __launch_bounds__(soc_block_size) + step_length_single_kernel(raft::device_span u, + raft::device_span du, + raft::device_span alpha, + raft::device_span du_tail_sq, + raft::device_span u_tail_du_tail, + raft::device_span u_tail_sq, + raft::device_span cone_offsets, + f_t alpha_max, + i_t n_cones) +{ + const auto cone = static_cast(blockIdx.x * blockDim.x + threadIdx.x); + if (cone >= n_cones) { return; } + + const auto off = cone_offsets[cone]; + alpha[cone] = cone_step_length_from_scalars( + u[off], du[off], du_tail_sq[cone], u_tail_du_tail[cone], u_tail_sq[cone], alpha_max); +} + +/** + * Device storage for second-order cone topology, NT scaling, and iterate views. + * + * Flat arrays are packed by cone: entries + * [cone_offsets[i], cone_offsets[i + 1]) belong to cone i, whose dimension is + * cone_dimensions[i]. + * + * The primal/dual cone vectors are non-owning spans over the SOC slice of the + * solver's global x/z vectors. The caller must keep the underlying storage + * alive for the lifetime of this object. + */ +template +struct cone_data_t { + // Topology. This is immutable after construction. + i_t n_cones; // number of SOC blocks + std::size_t n_cone_entries; // total packed cone dimension = sum(cone_dimensions) + + rmm::device_uvector cone_offsets; // [n_cones + 1], prefix sum of dimensions + rmm::device_uvector cone_dimensions; // [n_cones], dimension q_i of each cone + // Owning cone per entry for upcoming flat per-entry SOC kernels. + rmm::device_uvector element_cone_ids; // [n_cone_entries] + segmented_sum_t segmented_sum; + + // Non-owning iterate views over the cone portion of x/z. + raft::device_span x; // [n_cone_entries], SOC primal block + raft::device_span z; // [n_cone_entries], SOC dual block + + // Persistent Nesterov-Todd scaling state, recomputed from x/z each iteration. + rmm::device_uvector eta; // [n_cones], sqrt(|x|_J / |z|_J) + rmm::device_uvector w; // [n_cone_entries], unit-J-norm NT direction + + cone_scratch_t scratch; + + cone_data_t(std::span cone_dimensions_host, + raft::device_span x_in, + raft::device_span z_in, + rmm::cuda_stream_view stream) + : n_cones(cone_dimensions_host.size()), + n_cone_entries( + std::reduce(cone_dimensions_host.begin(), cone_dimensions_host.end(), std::size_t{0})), + cone_offsets(n_cones + 1, stream), + cone_dimensions(n_cones, stream), + element_cone_ids(n_cone_entries, stream), + segmented_sum(cone_dimensions_host, cuopt::make_span(cone_offsets), stream), + x(x_in), + z(z_in), + eta(n_cones, stream), + w(n_cone_entries, stream), + scratch(n_cones, n_cone_entries, stream) + { + raft::copy(cone_dimensions.data(), cone_dimensions_host.data(), n_cones, stream); + cone_offsets.set_element_to_zero_async(0, stream); + auto policy = rmm::exec_policy(stream); + + auto cone_dimensions_as_offsets = + thrust::make_transform_iterator(cone_dimensions.begin(), to_size_t_t{}); + thrust::inclusive_scan(policy, + cone_dimensions_as_offsets, + cone_dimensions_as_offsets + n_cones, + cone_offsets.begin() + 1, + cuda::std::plus{}); + + thrust::upper_bound(policy, + cone_offsets.begin() + 1, + cone_offsets.end(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(n_cone_entries), + element_cone_ids.begin()); + segmented_sum.template prepare_workspace(stream); + } +}; + +template +__global__ void __launch_bounds__(soc_block_size) + nt_finalize_scaling_scalars_kernel(raft::device_span x, + raft::device_span z, + raft::device_span x_scale, + raft::device_span z_scale, + raft::device_span eta, + raft::device_span cone_offsets, + i_t n_cones) +{ + const auto cone = static_cast(blockIdx.x * blockDim.x + threadIdx.x); + if (cone >= n_cones) { return; } + + const auto off = cone_offsets[cone]; + const auto x_tail_norm = sqrt(x_scale[cone]); + const auto z_tail_norm = sqrt(z_scale[cone]); + const auto x_det = (x[off] - x_tail_norm) * (x[off] + x_tail_norm); + const auto z_det = (z[off] - z_tail_norm) * (z[off] + z_tail_norm); + + x_scale[cone] = sqrt(x_det); + z_scale[cone] = sqrt(z_det); + eta[cone] = sqrt(x_scale[cone] / z_scale[cone]); +} + +template +__global__ void __launch_bounds__(soc_block_size) + nt_finalize_w_scale_kernel(raft::device_span normalized_dot, i_t n_cones) +{ + const auto cone = static_cast(blockIdx.x * blockDim.x + threadIdx.x); + if (cone >= n_cones) { return; } + + normalized_dot[cone] = sqrt(2 + 2 * normalized_dot[cone]); +} + +/** + * Write normalized w_tail directly: + * + * w_tail = (x_tail / x_scale - z_tail / z_scale) / w_scale. + * + * The head is zeroed temporarily and overwritten after reducing + * ||w_tail||^2. + */ +template +__global__ void __launch_bounds__(soc_block_size) + nt_write_w_tail_kernel(raft::device_span x, + raft::device_span z, + raft::device_span x_scale, + raft::device_span z_scale, + raft::device_span w_scale, + raft::device_span w, + raft::device_span cone_offsets, + raft::device_span element_cone_ids) +{ + const auto idx = static_cast(blockIdx.x) * blockDim.x + threadIdx.x; + if (idx >= w.size()) { return; } + + const auto cone = element_cone_ids[idx]; + const auto cone_off = cone_offsets[cone]; + if (idx == cone_off) { + w[idx] = 0; + return; + } + + w[idx] = (x[idx] / x_scale[cone] - z[idx] / z_scale[cone]) / w_scale[cone]; +} + +template +__global__ void __launch_bounds__(soc_block_size) + nt_finalize_head_kernel(raft::device_span w, + raft::device_span normalized_tail_sq, + raft::device_span cone_offsets, + i_t n_cones) +{ + const auto cone = static_cast(blockIdx.x * blockDim.x + threadIdx.x); + if (cone >= n_cones) { return; } + + w[cone_offsets[cone]] = sqrt(1 + normalized_tail_sq[cone]); +} + +/** + * Build Nesterov-Todd scaling for packed SOC blocks. + * + * Given interior cone primal/dual blocks x and z: + * + * x_scale = sqrt(det_J(x)), z_scale = sqrt(det_J(z)) + * eta = sqrt(x_scale / z_scale) + * w_scale = sqrt(2 + 2 * dot(x / x_scale, z / z_scale)) + * w_tail = (x_tail / x_scale - z_tail / z_scale) / w_scale + * w_0 = sqrt(1 + ||w_tail||^2) to re-impose det_J(w) = 1 + * + * Scratch slots: + * 0: ||x_tail||^2 -> x_scale + * 1: ||z_tail||^2 -> z_scale + * 2: dot(x / x_scale, z / z_scale) -> w_scale -> ||w_tail||^2 + */ +template +void launch_nt_scaling(cone_data_t& cones, rmm::cuda_stream_view stream) +{ + auto x_scale = cones.scratch.template get_slot<0>(); + auto z_scale = cones.scratch.template get_slot<1>(); + auto w_scale = cones.scratch.template get_slot<2>(); + + const auto span_x = cones.x; + const auto span_z = cones.z; + const auto cone_offsets = cuopt::make_span(cones.cone_offsets); + const auto element_cone_ids = cuopt::make_span(cones.element_cone_ids); + + auto x_tail_sq_terms = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + [span_x, cone_offsets, element_cone_ids] HD(std::size_t idx) -> f_t { + const auto cone = element_cone_ids[idx]; + return idx == cone_offsets[cone] ? 0 : span_x[idx] * span_x[idx]; + }); + cones.segmented_sum(x_tail_sq_terms, x_scale, stream); + + auto z_tail_sq_terms = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + [span_z, cone_offsets, element_cone_ids] HD(std::size_t idx) -> f_t { + const auto cone = element_cone_ids[idx]; + return idx == cone_offsets[cone] ? 0 : span_z[idx] * span_z[idx]; + }); + cones.segmented_sum(z_tail_sq_terms, z_scale, stream); + + const auto cone_grid_dim = + raft::ceildiv(static_cast(cones.n_cones), soc_block_size); + nt_finalize_scaling_scalars_kernel + <<>>( + cones.x, cones.z, x_scale, z_scale, cuopt::make_span(cones.eta), cone_offsets, cones.n_cones); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + const auto element_grid_dim = raft::ceildiv(cones.n_cone_entries, soc_block_size); + auto normalized_dot_terms = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + [span_x, span_z, x_scale, z_scale, element_cone_ids] HD(std::size_t idx) -> f_t { + const auto cone = element_cone_ids[idx]; + return span_x[idx] * span_z[idx] / (x_scale[cone] * z_scale[cone]); + }); + cones.segmented_sum(normalized_dot_terms, w_scale, stream); + + nt_finalize_w_scale_kernel + <<>>(w_scale, cones.n_cones); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + auto w = cuopt::make_span(cones.w); + nt_write_w_tail_kernel<<>>( + cones.x, cones.z, x_scale, z_scale, w_scale, w, cone_offsets, element_cone_ids); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + // Reduce ||w_tail||^2 per cone. The head entries are zero, so the same + // flat iterator can feed the segmented reduction. + auto normalized_tail_terms = + thrust::make_transform_iterator(thrust::make_counting_iterator(0), + [cone_offsets, element_cone_ids, w] HD(std::size_t idx) -> f_t { + const auto cone = element_cone_ids[idx]; + return idx == cone_offsets[cone] ? 0 : w[idx] * w[idx]; + }); + cones.segmented_sum(normalized_tail_terms, w_scale, stream); + + nt_finalize_head_kernel<<>>( + cuopt::make_span(cones.w), w_scale, cone_offsets, cones.n_cones); + RAFT_CUDA_TRY(cudaPeekAtLastError()); +} + +template +__global__ void __launch_bounds__(soc_block_size) + apply_w_write_kernel(raft::device_span v, + raft::device_span out, + raft::device_span w, + raft::device_span eta, + raft::device_span tail_dot, + raft::device_span cone_offsets, + raft::device_span element_cone_ids) +{ + const auto idx = static_cast(blockIdx.x) * blockDim.x + threadIdx.x; + if (idx >= out.size()) { return; } + + const auto cone = element_cone_ids[idx]; + const auto cone_off = cone_offsets[cone]; + const auto local_idx = idx - cone_off; + + const auto w0 = w[cone_off]; + const auto zeta = tail_dot[cone]; + const auto inv_1pw0 = 1 / (1 + w0); + const auto v0 = v[cone_off]; + + if (local_idx == 0) { + out[idx] = eta[cone] * (w0 * v0 + zeta); + return; + } + + const auto coeff = v0 + zeta * inv_1pw0; + out[idx] = eta[cone] * (v[idx] + coeff * w[idx]); +} + +template +__global__ void __launch_bounds__(soc_block_size) + apply_w_inv_write_kernel(raft::device_span v, + raft::device_span out, + raft::device_span w, + raft::device_span eta, + raft::device_span tail_dot, + raft::device_span cone_offsets, + raft::device_span element_cone_ids) +{ + const auto idx = static_cast(blockIdx.x) * blockDim.x + threadIdx.x; + if (idx >= out.size()) { return; } + + const auto cone = element_cone_ids[idx]; + const auto cone_off = cone_offsets[cone]; + const auto local_idx = idx - cone_off; + + const auto w0 = w[cone_off]; + const auto zeta = tail_dot[cone]; + const auto inv_1pw0 = 1 / (1 + w0); + const auto v0 = v[cone_off]; + const auto inv_eta = 1 / eta[cone]; + + if (local_idx == 0) { + out[idx] = inv_eta * (w0 * v0 - zeta); + return; + } + + const auto coeff = -v0 + zeta * inv_1pw0; + out[idx] = inv_eta * (v[idx] + coeff * w[idx]); +} + +template +__global__ void __launch_bounds__(soc_block_size) + apply_hinv2_write_kernel(raft::device_span v, + raft::device_span out, + raft::device_span w, + raft::device_span eta, + raft::device_span tail_dot, + raft::device_span cone_offsets, + raft::device_span element_cone_ids, + raft::device_span bias, + f_t output_scale, + f_t bias_scale) +{ + const auto idx = static_cast(blockIdx.x) * blockDim.x + threadIdx.x; + if (idx >= out.size()) { return; } + + const auto cone = element_cone_ids[idx]; + const auto cone_off = cone_offsets[cone]; + const auto local_idx = idx - cone_off; + + const auto inv_eta_sq = 1 / (eta[cone] * eta[cone]); + const auto rho = w[cone_off] * v[cone_off] - tail_dot[cone]; + const auto coeff = 2 * rho * inv_eta_sq; + const int sign = (local_idx == 0) * 2 - 1; + const auto value = coeff * w[idx] - inv_eta_sq * v[idx]; + const auto h_value = output_scale * value * sign; + + out[idx] = bias.empty() ? h_value : bias_scale * bias[idx] + h_value; +} + +template +__global__ void __launch_bounds__(soc_block_size) + gather_cone_heads_kernel(raft::device_span values, + raft::device_span heads, + raft::device_span cone_offsets, + i_t n_cones) +{ + const auto cone = static_cast(blockIdx.x * blockDim.x + threadIdx.x); + if (cone >= n_cones) { return; } + + heads[cone] = values[cone_offsets[cone]]; +} + +/** + * Build the Mehrotra corrector shift: + * + * d = (W^{-1} dx_aff) o (W dz_aff) - sigma_mu e. + * + * On entry, `scaled_dx` is W^{-1} dx_aff and `scaled_dz` is W dz_aff. The + * cone head uses the full dot product, and tail entries use the SOC Jordan + * product: + * + * d_0 = - sigma_mu + * d_tail = scaled_dx_0 * scaled_dz_tail + scaled_dz_0 * scaled_dx_tail. + */ +template +__global__ void __launch_bounds__(soc_block_size) + combined_cone_shift_write_kernel(raft::device_span shift, + raft::device_span scaled_dx, + raft::device_span scaled_dz, + raft::device_span full_dot, + raft::device_span scaled_dx_head, + raft::device_span scaled_dz_head, + raft::device_span cone_offsets, + raft::device_span element_cone_ids, + f_t sigma_mu) +{ + const auto idx = static_cast(blockIdx.x) * blockDim.x + threadIdx.x; + if (idx >= shift.size()) { return; } + + const auto cone = element_cone_ids[idx]; + const auto cone_off = cone_offsets[cone]; + const auto local_idx = idx - cone_off; + + if (local_idx == 0) { + shift[idx] = full_dot[cone] - sigma_mu; + return; + } + + shift[idx] = scaled_dx_head[cone] * scaled_dz[idx] + scaled_dz_head[cone] * scaled_dx[idx]; +} + +/** + * Per-cone scalar stage for p = lambda \ d: + * + * p_0 = (lambda_0 d_0 - ) / det_J(lambda) + * inv_lambda_0 = 1 / lambda_0. + * + * A second flat kernel writes `-p`, which lets the final W^{-1} call produce + * q = -W^{-1} p without adding an output-scale argument to W^{-1}. + */ +template +__global__ void __launch_bounds__(soc_block_size) + jordan_divide_by_lambda_scalar_kernel(raft::device_span shift, + raft::device_span nt_point, + raft::device_span lambda_tail_dot, + raft::device_span lambda_tail_sq, + raft::device_span p0, + raft::device_span inv_lambda0, + raft::device_span cone_offsets, + i_t n_cones) +{ + const auto cone = static_cast(blockIdx.x * blockDim.x + threadIdx.x); + if (cone >= n_cones) { return; } + + const auto cone_off = cone_offsets[cone]; + const auto lambda0 = nt_point[cone_off]; + const auto lambda_tail_norm = sqrt(lambda_tail_sq[cone]); + const auto det_lambda = (lambda0 - lambda_tail_norm) * (lambda0 + lambda_tail_norm); + + p0[cone] = (lambda0 * shift[cone_off] - lambda_tail_dot[cone]) / det_lambda; + inv_lambda0[cone] = 1 / lambda0; +} + +template +__global__ void __launch_bounds__(soc_block_size) + jordan_divide_by_lambda_write_kernel(raft::device_span shift, + raft::device_span nt_point, + raft::device_span p0, + raft::device_span inv_lambda0, + raft::device_span cone_offsets, + raft::device_span element_cone_ids, + raft::device_span out) +{ + const auto idx = static_cast(blockIdx.x) * blockDim.x + threadIdx.x; + if (idx >= out.size()) { return; } + + const auto cone = element_cone_ids[idx]; + const auto cone_off = cone_offsets[cone]; + const auto local_idx = idx - cone_off; + + if (local_idx == 0) { + out[idx] = -p0[cone]; + return; + } + + out[idx] = (p0[cone] * nt_point[idx] - shift[idx]) * inv_lambda0[cone]; +} + +/** + * Apply the Nesterov-Todd scaling matrix: out = W v. + * + * For each cone: + * zeta = + * (Wv)_0 = eta * (w_0 v_0 + zeta) + * (Wv)_tail = eta * (v_tail + (v_0 + zeta / (1 + w_0)) w_tail) + */ +template +void apply_w(raft::device_span v, + raft::device_span out, + cone_data_t& cones, + rmm::cuda_stream_view stream) +{ + auto w = cuopt::make_span(cones.w); + auto eta = cuopt::make_span(cones.eta); + auto cone_offsets = cuopt::make_span(cones.cone_offsets); + auto element_cone_ids = cuopt::make_span(cones.element_cone_ids); + auto tail_dot = cones.scratch.template get_slot<0>(); + + auto tail_terms = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + [v, w, cone_offsets, element_cone_ids] HD(std::size_t idx) -> f_t { + const auto cone = element_cone_ids[idx]; + return idx == cone_offsets[cone] ? 0 : w[idx] * v[idx]; + }); + cones.segmented_sum(tail_terms, tail_dot, stream); + + const auto grid_dim = raft::ceildiv(out.size(), soc_block_size); + apply_w_write_kernel<<>>( + v, out, w, eta, tail_dot, cone_offsets, element_cone_ids); + RAFT_CUDA_TRY(cudaPeekAtLastError()); +} + +/** + * Apply the inverse Nesterov-Todd scaling matrix: + * out = W^{-1} v. + * + * For each cone, + * zeta = + * (W^{-1}v)_0 = eta^{-1} * (w_0 v_0 - zeta) + * (W^{-1}v)_tail = + * eta^{-1} * (v_tail + (-v_0 + zeta / (1 + w_0)) w_tail) + */ +template +void apply_w_inv(raft::device_span v, + raft::device_span out, + cone_data_t& cones, + rmm::cuda_stream_view stream) +{ + auto w = cuopt::make_span(cones.w); + auto eta = cuopt::make_span(cones.eta); + auto cone_offsets = cuopt::make_span(cones.cone_offsets); + auto element_cone_ids = cuopt::make_span(cones.element_cone_ids); + auto tail_dot = cones.scratch.template get_slot<0>(); + + auto tail_terms = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + [v, w, cone_offsets, element_cone_ids] HD(std::size_t idx) -> f_t { + const auto cone = element_cone_ids[idx]; + return idx == cone_offsets[cone] ? 0 : w[idx] * v[idx]; + }); + cones.segmented_sum(tail_terms, tail_dot, stream); + + const auto grid_dim = raft::ceildiv(out.size(), soc_block_size); + apply_w_inv_write_kernel<<>>( + v, out, w, eta, tail_dot, cone_offsets, element_cone_ids); + RAFT_CUDA_TRY(cudaPeekAtLastError()); +} + +/** + * Apply the cone KKT block H = W^{-1} W^{-T} = W^{-2}. + * + * With zeta = and rho = w_0 v_0 - zeta: + * (Hv)_0 = eta^{-2} (2 w_0 rho - v_0) + * (Hv)_tail = eta^{-2} (v_tail - 2 w_tail rho) + */ +template +void apply_hinv2(raft::device_span v, + raft::device_span out, + cone_data_t& cones, + rmm::cuda_stream_view stream, + f_t output_scale = 1, + raft::device_span bias = {}, + f_t bias_scale = 0) +{ + auto w = cuopt::make_span(cones.w); + auto eta = cuopt::make_span(cones.eta); + auto cone_offsets = cuopt::make_span(cones.cone_offsets); + auto element_cone_ids = cuopt::make_span(cones.element_cone_ids); + auto tail_dot = cones.scratch.template get_slot<0>(); + + auto tail_terms = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + [v, w, cone_offsets, element_cone_ids] HD(std::size_t idx) -> f_t { + const auto cone = element_cone_ids[idx]; + return idx == cone_offsets[cone] ? 0 : w[idx] * v[idx]; + }); + cones.segmented_sum(tail_terms, tail_dot, stream); + + const auto grid_dim = raft::ceildiv(out.size(), soc_block_size); + apply_hinv2_write_kernel<<>>( + v, out, w, eta, tail_dot, cone_offsets, element_cone_ids, bias, output_scale, bias_scale); + RAFT_CUDA_TRY(cudaPeekAtLastError()); +} + +/** + * Recover the SOC dual direction after the reduced KKT solve. + * + * The reduced solve gives `dx`; the cone equation supplies the target RHS. + * This function applies the cone block H = W^{-2} and writes: + * dz = cone_target - H dx. + */ +template +void recover_cone_dz_from_target(raft::device_span dx, + cone_data_t& cones, + raft::device_span cone_target, + raft::device_span dz, + rmm::cuda_stream_view stream) +{ + apply_hinv2(dx, dz, cones, stream, -1, cone_target, 1); +} + +/** + * Accumulate the SOC cone-block matvec into an existing output vector. + * + * Used by matrix-free products with the primal-reduced KKT block: + * out += H x, where H = W^{-2}. + */ +template +void accumulate_cone_hinv2_matvec(raft::device_span x, + cone_data_t& cones, + raft::device_span out, + rmm::cuda_stream_view stream) +{ + auto out_input = raft::device_span(out.data(), out.size()); + apply_hinv2(x, out, cones, stream, 1, out_input, 1); +} + +template +__global__ void __launch_bounds__(soc_block_size) + scatter_hinv2_into_augmented_kernel(raft::device_span augmented_x, + raft::device_span csr_indices, + raft::device_span q_values, + raft::device_span w, + raft::device_span eta, + raft::device_span cone_offsets, + raft::device_span block_offsets, + i_t n_cones) +{ + const auto e = static_cast(blockIdx.x) * blockDim.x + threadIdx.x; + if (e >= csr_indices.size()) { return; } + + i_t lo = 0; + i_t hi = n_cones; + while (lo < hi) { + const i_t mid = lo + (hi - lo) / 2; + if (block_offsets[mid + 1] <= e) { + lo = mid + 1; + } else { + hi = mid; + } + } + + const auto cone = lo; + const auto off = cone_offsets[cone]; + const auto q = cone_offsets[cone + 1] - off; + const auto blk_off = block_offsets[cone]; + const auto local = e - blk_off; + const auto r = local / q; + const auto c = local % q; + + const auto inv_eta_sq = 1 / (eta[cone] * eta[cone]); + const auto w0 = w[off]; + const auto u_r = (r == 0) ? w0 : -w[off + r]; + const auto u_c = (c == 0) ? w0 : -w[off + c]; + auto val = f_t{2} * u_r * inv_eta_sq * u_c; + const auto diag_correction = (r == 0) ? -inv_eta_sq : inv_eta_sq; + if (r == c) { val += diag_correction; } + + augmented_x[csr_indices[e]] = -val - q_values[e]; +} + +template +void scatter_hinv2_into_augmented(const cone_data_t& cones, + rmm::device_uvector& augmented_x, + const rmm::device_uvector& csr_indices, + const rmm::device_uvector& q_values, + rmm::cuda_stream_view stream) +{ + const auto count = csr_indices.size(); + if (count == 0) { return; } + cuopt_assert(count == q_values.size(), "cone CSR index and Q-value arrays must match"); + + rmm::device_uvector block_offsets(cones.n_cones + 1, stream); + block_offsets.set_element_to_zero_async(0, stream); + + auto block_sizes = thrust::make_transform_iterator( + cones.cone_dimensions.begin(), + [] HD(i_t q) -> std::size_t { return static_cast(q) * q; }); + thrust::inclusive_scan( + rmm::exec_policy(stream), block_sizes, block_sizes + cones.n_cones, block_offsets.begin() + 1); + + const auto grid = raft::ceildiv(count, soc_block_size); + scatter_hinv2_into_augmented_kernel + <<>>(cuopt::make_span(augmented_x), + cuopt::make_span(csr_indices), + cuopt::make_span(q_values), + cuopt::make_span(cones.w), + cuopt::make_span(cones.eta), + cuopt::make_span(cones.cone_offsets), + cuopt::make_span(block_offsets), + cones.n_cones); + RAFT_CUDA_TRY(cudaPeekAtLastError()); +} + +/** + * Compute the maximum primal and dual step lengths that keep SOC blocks + * feasible: + * + * x + alpha dx in Q, z + alpha dz in Q, alpha <= alpha_max. + * + * For one cone u + alpha du, feasibility is + * + * u_0 + alpha du_0 >= ||u_tail + alpha du_tail||. + * + * Squaring gives the quadratic + * + * c + 2 b alpha + a alpha^2 >= 0, + * + * where c = det_J(u), b = u_0 du_0 - , and + * a = det_J(du). The per-cone kernel below solves for the first boundary + * crossing, and the final reductions take the global minimum over cones. + */ +template +std::pair compute_cone_step_length(cone_data_t& cones, + raft::device_span dx, + raft::device_span dz, + f_t alpha_max, + rmm::cuda_stream_view stream) +{ + auto cone_offsets = cuopt::make_span(cones.cone_offsets); + auto element_cone_ids = cuopt::make_span(cones.element_cone_ids); + auto slot_0 = cones.scratch.template get_slot<0>(); + auto slot_1 = cones.scratch.template get_slot<1>(); + auto slot_2 = cones.scratch.template get_slot<2>(); + + auto run_pass = [&](raft::device_span u, + raft::device_span du, + raft::device_span alpha) { + auto du_tail_sq_terms = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + [du, cone_offsets, element_cone_ids] HD(std::size_t idx) -> f_t { + const auto cone = element_cone_ids[idx]; + return idx == cone_offsets[cone] ? 0 : du[idx] * du[idx]; + }); + cones.segmented_sum(du_tail_sq_terms, slot_0, stream); + + auto u_tail_du_tail_terms = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + [u, du, cone_offsets, element_cone_ids] HD(std::size_t idx) -> f_t { + const auto cone = element_cone_ids[idx]; + return idx == cone_offsets[cone] ? 0 : u[idx] * du[idx]; + }); + cones.segmented_sum(u_tail_du_tail_terms, slot_1, stream); + + auto u_tail_sq_terms = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + [u, cone_offsets, element_cone_ids] HD(std::size_t idx) -> f_t { + const auto cone = element_cone_ids[idx]; + return idx == cone_offsets[cone] ? 0 : u[idx] * u[idx]; + }); + cones.segmented_sum(u_tail_sq_terms, slot_2, stream); + + const auto grid_dim = + raft::ceildiv(static_cast(cones.n_cones), soc_block_size); + step_length_single_kernel<<>>( + u, du, alpha, slot_0, slot_1, slot_2, cone_offsets, alpha_max, cones.n_cones); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + }; + + auto alpha_primal = cuopt::make_span(cones.scratch.step_alpha_primal); + auto alpha_dual = cuopt::make_span(cones.scratch.step_alpha_dual); + + run_pass(cones.x, dx, alpha_primal); + run_pass(cones.z, dz, alpha_dual); + + const auto primal = thrust::reduce(rmm::exec_policy(stream), + alpha_primal.begin(), + alpha_primal.end(), + alpha_max, + thrust::minimum()); + const auto dual = thrust::reduce(rmm::exec_policy(stream), + alpha_dual.begin(), + alpha_dual.end(), + alpha_max, + thrust::minimum()); + + return {primal, dual}; +} + +/** + * Build the SOC corrector target for the reduced KKT solve. + * + * Mehrotra's corrector uses affine cone directions to form + * + * d = (W^{-1} dx_aff) o (W dz_aff) - sigma_mu e, + * + * where `o` is the SOC Jordan product and `e = (1, 0, ..., 0)` per cone. + * The reduced KKT solve needs the cone target + * + * q = -W^{-1} p, where p = lambda \ d and lambda = W z. + * + * On return, `out` holds `q`. Internally, `out` is reused for `W dz_aff` and + * then `d`; `scratch.temp_cone` is reused for `W^{-1} dx_aff`, then `lambda`, + * then `-p`. + */ +template +void compute_combined_cone_rhs_term(raft::device_span dx_aff, + raft::device_span dz_aff, + cone_data_t& cones, + f_t sigma_mu, + raft::device_span out, + rmm::cuda_stream_view stream) +{ + auto cone_offsets = cuopt::make_span(cones.cone_offsets); + auto element_cone_ids = cuopt::make_span(cones.element_cone_ids); + + auto scratch_cone = cuopt::make_span(cones.scratch.temp_cone); + auto scaled_dx = raft::device_span(scratch_cone.data(), scratch_cone.size()); + auto scaled_dz = raft::device_span(out.data(), out.size()); + auto slot_0 = cones.scratch.template get_slot<0>(); + auto slot_1 = cones.scratch.template get_slot<1>(); + auto slot_2 = cones.scratch.template get_slot<2>(); + + apply_w_inv(dx_aff, scratch_cone, cones, stream); + apply_w(dz_aff, out, cones, stream); + + auto full_product_terms = thrust::make_transform_iterator( + thrust::make_zip_iterator(scaled_dx.begin(), scaled_dz.begin()), + thrust::make_zip_function([] HD(f_t dx, f_t dz) -> f_t { return dx * dz; })); + cones.segmented_sum(full_product_terms, slot_0, stream); + + // `out` currently aliases W dz_aff and is about to be overwritten with d. + // Stage both head vectors first because every tail entry needs them. + const auto cone_grid_dim = + raft::ceildiv(static_cast(cones.n_cones), soc_block_size); + gather_cone_heads_kernel<<>>( + scaled_dx, slot_1, cone_offsets, cones.n_cones); + gather_cone_heads_kernel<<>>( + scaled_dz, slot_2, cone_offsets, cones.n_cones); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + const auto element_grid_dim = raft::ceildiv(cones.n_cone_entries, soc_block_size); + combined_cone_shift_write_kernel + <<>>( + out, scaled_dx, scaled_dz, slot_0, slot_1, slot_2, cone_offsets, element_cone_ids, sigma_mu); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + // Form lambda = W z in scratch_cone. At this point W^{-1} dx_aff is dead. + apply_w(cones.z, scratch_cone, cones, stream); + + auto shift = raft::device_span(out.data(), out.size()); + auto nt_point = raft::device_span(scratch_cone.data(), scratch_cone.size()); + + auto lambda_tail_dot_terms = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + [shift, nt_point, cone_offsets, element_cone_ids] HD(std::size_t idx) -> f_t { + const auto cone = element_cone_ids[idx]; + return idx == cone_offsets[cone] ? 0 : nt_point[idx] * shift[idx]; + }); + cones.segmented_sum(lambda_tail_dot_terms, slot_0, stream); + + auto lambda_tail_sq_terms = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + [nt_point, cone_offsets, element_cone_ids] HD(std::size_t idx) -> f_t { + const auto cone = element_cone_ids[idx]; + return idx == cone_offsets[cone] ? 0 : nt_point[idx] * nt_point[idx]; + }); + cones.segmented_sum(lambda_tail_sq_terms, slot_1, stream); + + jordan_divide_by_lambda_scalar_kernel + <<>>( + shift, nt_point, slot_0, slot_1, slot_0, slot_1, cone_offsets, cones.n_cones); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + jordan_divide_by_lambda_write_kernel + <<>>( + shift, nt_point, slot_0, slot_1, cone_offsets, element_cone_ids, scratch_cone); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + apply_w_inv(scratch_cone, out, cones, stream); +} + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/barrier/second_order_cone_reduction.cuh b/cpp/src/barrier/second_order_cone_reduction.cuh new file mode 100644 index 0000000000..ada9fcfb6a --- /dev/null +++ b/cpp/src/barrier/second_order_cone_reduction.cuh @@ -0,0 +1,261 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace cuopt::linear_programming::dual_simplex { + +template +__global__ void __launch_bounds__(warps_per_cta* raft::WarpSize) + warp_per_cone_reduce_kernel(InputIt input, + raft::device_span small_cone_ids, + raft::device_span cone_offsets, + OutputIt output, + value_t init); + +/** + * Segmented-sum dispatcher for packed second-order cone vectors. + * + * Cone dimensions are fixed for a solve, so the constructor partitions cone + * ids once by reduction strategy. Each call then reuses those partitions: + * small cones use one warp per cone, medium cones use CUB DeviceSegmentedReduce, + * and large cones use CUB DeviceReduce one cone at a time. The object owns the + * CUB workspace for those medium/large paths. Call `prepare_workspace` once + * before using a CUB-backed path. + */ +template +struct segmented_sum_t { + static_assert(warp_cone_dim > 0); + static_assert(large_cone_cutoff > warp_cone_dim); + + raft::device_span cone_offsets; + rmm::device_uvector small_cone_ids; // cone dimension <= warp_cone_dim + rmm::device_uvector medium_cone_ids; // warp_cone_dim < cone dimension <= large_cone_cutoff + + std::vector large_cone_offsets; + std::vector large_cone_ids; + std::vector large_cone_dimensions; + + // Maximum CUB temporary storage needed by prepared medium/large reductions. + std::size_t cub_workspace_bytes = 0; + rmm::device_buffer cub_workspace; + + private: + template + void prepare_workspace_for_type(rmm::cuda_stream_view stream) + { + auto input = thrust::make_constant_iterator(value_t{}); + auto output = thrust::make_discard_iterator(); + + if (!medium_cone_ids.is_empty()) { + const auto medium_begin_offsets = + thrust::make_permutation_iterator(cone_offsets.data(), medium_cone_ids.begin()); + const auto medium_end_offsets = + thrust::make_permutation_iterator(cone_offsets.data() + 1, medium_cone_ids.begin()); + + std::size_t temp_storage_bytes = 0; + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Sum(nullptr, + temp_storage_bytes, + input, + output, + medium_cone_ids.size(), + medium_begin_offsets, + medium_end_offsets, + stream.value())); + cub_workspace_bytes = std::max(cub_workspace_bytes, temp_storage_bytes); + } + + for (std::size_t i = 0; i < large_cone_ids.size(); ++i) { + std::size_t temp_storage_bytes = 0; + RAFT_CUDA_TRY(cub::DeviceReduce::Sum(nullptr, + temp_storage_bytes, + input + large_cone_offsets[i], + output + large_cone_ids[i], + large_cone_dimensions[i], + stream.value())); + cub_workspace_bytes = std::max(cub_workspace_bytes, temp_storage_bytes); + } + + if (cub_workspace.size() < cub_workspace_bytes) { + cub_workspace.resize(cub_workspace_bytes, stream); + } + } + + public: + template + void prepare_workspace(rmm::cuda_stream_view stream) + { + prepare_workspace_for_type(stream); + (prepare_workspace_for_type(stream), ...); + } + + template + void operator()(InputIt input, OutputIt output, value_t init, rmm::cuda_stream_view stream) + { + if (!small_cone_ids.is_empty()) { + // Each warp reduces one small cone. `warps_per_cta` only controls how + // many independent cone reductions are packed into one CTA; the default + // of 8 gives a conventional 256-thread block. + const auto n_small = small_cone_ids.size(); + const auto grid = (n_small + warps_per_cta - 1) / warps_per_cta; + warp_per_cone_reduce_kernel + <<>>( + input, cuopt::make_span(small_cone_ids), cone_offsets, output, init); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + } + + if (!medium_cone_ids.is_empty()) { + cuopt_assert(cub_workspace_bytes > 0 && cub_workspace.size() >= cub_workspace_bytes, + "segmented_sum_t::prepare_workspace must be called before reducing medium or " + "large cones"); + + const auto medium_output = thrust::make_permutation_iterator(output, medium_cone_ids.begin()); + const auto medium_begin_offsets = + thrust::make_permutation_iterator(cone_offsets.data(), medium_cone_ids.begin()); + const auto medium_end_offsets = + thrust::make_permutation_iterator(cone_offsets.data() + 1, medium_cone_ids.begin()); + + std::size_t temp_storage_bytes = cub_workspace_bytes; + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Sum(cub_workspace.data(), + temp_storage_bytes, + input, + medium_output, + medium_cone_ids.size(), + medium_begin_offsets, + medium_end_offsets, + stream.value())); + } + + if (!large_cone_ids.empty()) { + cuopt_assert(cub_workspace_bytes > 0 && cub_workspace.size() >= cub_workspace_bytes, + "segmented_sum_t::prepare_workspace must be called before reducing medium or " + "large cones"); + + for (std::size_t i = 0; i < large_cone_ids.size(); ++i) { + std::size_t temp_storage_bytes = cub_workspace_bytes; + RAFT_CUDA_TRY(cub::DeviceReduce::Sum(cub_workspace.data(), + temp_storage_bytes, + input + large_cone_offsets[i], + output + large_cone_ids[i], + large_cone_dimensions[i], + stream.value())); + } + } + } + + template + void operator()(InputIt input, raft::device_span output, rmm::cuda_stream_view stream) + { + operator()(input, output.data(), f_t{0}, stream); + } + + segmented_sum_t(std::span cone_dimensions_host, + raft::device_span cone_offsets_in, + rmm::cuda_stream_view stream) + : cone_offsets(cone_offsets_in), + small_cone_ids(0, stream), + medium_cone_ids(0, stream), + cub_workspace(0, stream) + { + std::vector small_cone_ids_host; + std::vector medium_cone_ids_host; + + std::size_t cone_offset = 0; + i_t cone = 0; + for (const auto cone_dimension : cone_dimensions_host) { + if (cone_dimension <= warp_cone_dim) { + small_cone_ids_host.push_back(cone); + } else if (cone_dimension <= large_cone_cutoff) { + medium_cone_ids_host.push_back(cone); + } else { + large_cone_ids.push_back(cone); + large_cone_offsets.push_back(cone_offset); + large_cone_dimensions.push_back(cone_dimension); + } + cone_offset += cone_dimension; + ++cone; + } + + bool need_sync = false; + if (!small_cone_ids_host.empty()) { + cuopt::device_copy(small_cone_ids, small_cone_ids_host, stream); + need_sync = true; + } + if (!medium_cone_ids_host.empty()) { + cuopt::device_copy(medium_cone_ids, medium_cone_ids_host, stream); + need_sync = true; + } + if (need_sync) { stream.synchronize(); } + } +}; + +template +__global__ void __launch_bounds__(warps_per_cta* raft::WarpSize) + warp_per_cone_reduce_kernel(InputIt input, + raft::device_span small_cone_ids, + raft::device_span cone_offsets, + OutputIt output, + value_t init) +{ + static_assert(warps_per_cta > 0); + static_assert(warps_per_cta * raft::WarpSize <= 1024); + + using warp_reduce_t = cub::WarpReduce; + __shared__ typename warp_reduce_t::TempStorage temp_storage[warps_per_cta]; + + const auto lane_id = raft::laneId(); + const auto warp_idx = threadIdx.x / raft::WarpSize; + const auto slot = blockIdx.x * warps_per_cta + warp_idx; + if (slot >= small_cone_ids.size()) { return; } + + const auto cone = small_cone_ids[slot]; + const auto off = cone_offsets[cone]; + const auto dim = cone_offsets[cone + 1] - off; + + auto sum = init; + for (std::size_t i = lane_id; i < dim; i += raft::WarpSize) { + sum = sum + input[off + i]; + } + + sum = warp_reduce_t(temp_storage[warp_idx]).Sum(sum); + if (lane_id == 0) { output[cone] = sum; } +} + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index c5ef847106..ca98dc9da5 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -15,10 +15,15 @@ #include #include -#include namespace cuopt::linear_programming::dual_simplex { +template +static i_t linear_var_count(const lp_problem_t& problem) +{ + return problem.second_order_cone_dims.empty() ? problem.num_cols : problem.cone_var_start; +} + template i_t remove_empty_cols(lp_problem_t& problem, i_t& num_empty_cols, @@ -26,26 +31,18 @@ i_t remove_empty_cols(lp_problem_t& problem, { constexpr bool verbose = false; if (verbose) { printf("Removing %d empty columns\n", num_empty_cols); } - // We have a variable x_j that does not appear in any rows - // The cost function - // sum_{k != j} c_k * x_k + c_j * x_j - // becomes - // sum_{k != j} c_k * x_k + c_j * l_j if c_j > 0 - // or - // sum_{k != j} c_k * x_k + c_j * u_j if c_j < 0 presolve_info.removed_variables.reserve(num_empty_cols); presolve_info.removed_values.reserve(num_empty_cols); presolve_info.removed_reduced_costs.reserve(num_empty_cols); - // Check to see if a variable participates in a quadratic objective std::vector has_quadratic_term(problem.num_cols, false); + i_t linear_cols = linear_var_count(problem); if (problem.Q.n > 0) { - for (i_t j = 0; j < problem.num_cols; ++j) { + for (i_t j = 0; j < linear_cols; ++j) { const i_t row_start = problem.Q.row_start[j]; const i_t row_end = problem.Q.row_start[j + 1]; if (row_end - row_start == 0) { continue; } - // Q is symmetric, so its sufficient to check only the row size has_quadratic_term[j] = true; } } @@ -54,12 +51,13 @@ i_t remove_empty_cols(lp_problem_t& problem, i_t new_cols = 0; for (i_t j = 0; j < problem.num_cols; ++j) { bool remove_var = false; - if ((problem.A.col_start[j + 1] - problem.A.col_start[j]) == 0) { - if (problem.objective[j] >= 0 && problem.lower[j] > -inf && !has_quadratic_term[j]) { + if (j < linear_cols && (problem.A.col_start[j + 1] - problem.A.col_start[j]) == 0) { + bool non_removable = has_quadratic_term[j]; + if (problem.objective[j] >= 0 && problem.lower[j] > -inf && !non_removable) { presolve_info.removed_values.push_back(problem.lower[j]); problem.obj_constant += problem.objective[j] * problem.lower[j]; remove_var = true; - } else if (problem.objective[j] <= 0 && problem.upper[j] < inf && !has_quadratic_term[j]) { + } else if (problem.objective[j] <= 0 && problem.upper[j] < inf && !non_removable) { presolve_info.removed_values.push_back(problem.upper[j]); problem.obj_constant += problem.objective[j] * problem.upper[j]; remove_var = true; @@ -122,6 +120,12 @@ i_t remove_empty_cols(lp_problem_t& problem, problem.Q.check_matrix("After removing empty columns"); } + if (!problem.second_order_cone_dims.empty()) { + i_t new_cone_start = col_old_to_new[problem.cone_var_start]; + assert(new_cone_start != -1); + problem.cone_var_start = new_cone_start; + } + problem.objective = objective; problem.lower = lower; problem.upper = upper; @@ -271,6 +275,113 @@ i_t convert_less_than_to_equal(const user_problem_t& user_problem, // We must convert rows in the form: a_i^T x <= beta // into: a_i^T x + s_i = beta, s_i >= 0 + if (!problem.second_order_cone_dims.empty()) { + const i_t old_num_cols = problem.num_cols; + const i_t linear_cols = linear_var_count(problem); + const i_t num_slacks = less_rows; + const i_t num_cols = old_num_cols + num_slacks; + const i_t old_nnz = problem.A.col_start[old_num_cols]; + const i_t nnz = old_nnz + num_slacks; + const i_t new_cone_start = linear_cols + num_slacks; + + auto old_A = problem.A; + csc_matrix_t expanded_A(problem.A.m, num_cols, nnz); + + std::vector objective(num_cols, 0.0); + std::vector lower(num_cols, 0.0); + std::vector upper(num_cols, INFINITY); + std::vector old_to_new(old_num_cols, -1); + + for (i_t j = 0; j < linear_cols; ++j) { + old_to_new[j] = j; + objective[j] = problem.objective[j]; + lower[j] = problem.lower[j]; + upper[j] = problem.upper[j]; + } + for (i_t j = linear_cols; j < old_num_cols; ++j) { + old_to_new[j] = j + num_slacks; + objective[old_to_new[j]] = problem.objective[j]; + lower[old_to_new[j]] = problem.lower[j]; + upper[old_to_new[j]] = problem.upper[j]; + } + + i_t nz = 0; + for (i_t j = 0; j < linear_cols; ++j) { + expanded_A.col_start[j] = nz; + for (i_t p = old_A.col_start[j]; p < old_A.col_start[j + 1]; ++p) { + expanded_A.i[nz] = old_A.i[p]; + expanded_A.x[nz] = old_A.x[p]; + ++nz; + } + } + + i_t slack_col = linear_cols; + for (i_t i = 0; i < problem.num_rows; i++) { + if (row_sense[i] == 'L') { + expanded_A.col_start[slack_col] = nz; + expanded_A.i[nz] = i; + expanded_A.x[nz] = 1.0; + new_slacks.push_back(slack_col); + row_sense[i] = 'E'; + ++slack_col; + ++nz; + --less_rows; + } + } + + for (i_t j = linear_cols; j < old_num_cols; ++j) { + i_t new_j = old_to_new[j]; + expanded_A.col_start[new_j] = nz; + for (i_t p = old_A.col_start[j]; p < old_A.col_start[j + 1]; ++p) { + expanded_A.i[nz] = old_A.i[p]; + expanded_A.x[nz] = old_A.x[p]; + ++nz; + } + } + expanded_A.col_start[num_cols] = nz; + assert(less_rows == 0); + assert(slack_col == new_cone_start); + assert(nz == nnz); + + if (problem.Q.n > 0) { + const auto old_Q = problem.Q; + const i_t q_nnz = old_Q.row_start[old_num_cols]; + + problem.Q.row_start.assign(num_cols + 1, 0); + for (i_t row = 0; row < old_num_cols; ++row) { + i_t new_row = old_to_new[row]; + problem.Q.row_start[new_row + 1] = old_Q.row_start[row + 1] - old_Q.row_start[row]; + } + for (i_t row = 0; row < num_cols; ++row) { + problem.Q.row_start[row + 1] += problem.Q.row_start[row]; + } + + problem.Q.j.resize(q_nnz); + problem.Q.x.resize(q_nnz); + auto row_starts = problem.Q.row_start; + for (i_t row = 0; row < old_num_cols; ++row) { + i_t new_row = old_to_new[row]; + for (i_t p = old_Q.row_start[row]; p < old_Q.row_start[row + 1]; ++p) { + problem.Q.j[row_starts[new_row]] = old_to_new[old_Q.j[p]]; + problem.Q.x[row_starts[new_row]] = old_Q.x[p]; + ++row_starts[new_row]; + } + } + problem.Q.m = num_cols; + problem.Q.n = num_cols; + problem.Q.nz_max = q_nnz; + } + + problem.A = expanded_A; + problem.A.n = num_cols; + problem.objective = objective; + problem.lower = lower; + problem.upper = upper; + problem.num_cols = num_cols; + problem.cone_var_start = new_cone_start; + return 0; + } + i_t num_cols = problem.num_cols + less_rows; i_t nnz = problem.A.col_start[problem.num_cols] + less_rows; problem.A.col_start.resize(num_cols + 1); @@ -570,16 +681,18 @@ void convert_user_problem(const user_problem_t& user_problem, } // Copy info from user_problem to problem - problem.num_rows = user_problem.num_rows; - problem.num_cols = user_problem.num_cols; - problem.A = user_problem.A; - problem.objective = user_problem.objective; - problem.obj_scale = user_problem.obj_scale; - problem.obj_constant = user_problem.obj_constant; - problem.objective_is_integral = user_problem.objective_is_integral; - problem.rhs = user_problem.rhs; - problem.lower = user_problem.lower; - problem.upper = user_problem.upper; + problem.num_rows = user_problem.num_rows; + problem.num_cols = user_problem.num_cols; + problem.A = user_problem.A; + problem.objective = user_problem.objective; + problem.obj_scale = user_problem.obj_scale; + problem.obj_constant = user_problem.obj_constant; + problem.objective_is_integral = user_problem.objective_is_integral; + problem.rhs = user_problem.rhs; + problem.lower = user_problem.lower; + problem.upper = user_problem.upper; + problem.cone_var_start = user_problem.cone_var_start; + problem.second_order_cone_dims = user_problem.second_order_cone_dims; // Make a copy of row_sense so we can modify it std::vector row_sense = user_problem.row_sense; @@ -636,6 +749,7 @@ void convert_user_problem(const user_problem_t& user_problem, settings.log.debug( "equality rows %d less rows %d columns %d\n", equal_rows, less_rows, problem.num_cols); if (settings.barrier && settings.dualize != 0 && user_problem.Q_values.size() == 0 && + problem.second_order_cone_dims.empty() && (settings.dualize == 1 || (settings.dualize == -1 && less_rows > 1.2 * problem.num_cols && equal_rows < 2e4))) { settings.log.debug("Dualizing in presolve\n"); @@ -823,7 +937,7 @@ i_t presolve(const lp_problem_t& original, std::vector row_sense(problem.num_rows, '='); // Check for free variables i_t free_variables = 0; - for (i_t j = 0; j < problem.num_cols; j++) { + for (i_t j = 0; j < linear_var_count(problem); j++) { if (problem.lower[j] == -inf && problem.upper[j] == inf) { free_variables++; } } @@ -834,7 +948,7 @@ i_t presolve(const lp_problem_t& original, std::vector row_marked(problem.num_rows, 0); current_free_variables.reserve(problem.num_cols); constraints_to_check.reserve(problem.num_rows); - for (i_t j = 0; j < problem.num_cols; j++) { + for (i_t j = 0; j < linear_var_count(problem); j++) { if (problem.lower[j] == -inf && problem.upper[j] == inf) { current_free_variables.push_back(j); const i_t col_start = problem.A.col_start[j]; @@ -974,7 +1088,7 @@ i_t presolve(const lp_problem_t& original, } i_t new_free_variables = 0; - for (i_t j = 0; j < problem.num_cols; j++) { + for (i_t j = 0; j < linear_var_count(problem); j++) { if (problem.lower[j] == -inf && problem.upper[j] == inf) { new_free_variables++; } } if (removed_free_variables != 0) { @@ -983,7 +1097,6 @@ i_t presolve(const lp_problem_t& original, assert(new_free_variables == free_variables - removed_free_variables); free_variables = new_free_variables; } - // The original problem may have a variable without a lower bound // but a finite upper bound // -inf < x_j <= u_j @@ -1128,7 +1241,7 @@ i_t presolve(const lp_problem_t& original, // Check for empty cols i_t num_empty_cols = 0; { - for (i_t j = 0; j < problem.num_cols; ++j) { + for (i_t j = 0; j < linear_var_count(problem); ++j) { if ((problem.A.col_start[j + 1] - problem.A.col_start[j]) == 0) { num_empty_cols++; } } } @@ -1137,6 +1250,11 @@ i_t presolve(const lp_problem_t& original, remove_empty_cols(problem, num_empty_cols, presolve_info); } + // Check for free variables (exclude cone variables — they are naturally unbounded) + free_variables = 0; + for (i_t j = 0; j < linear_var_count(problem); j++) { + if (problem.lower[j] == -inf && problem.upper[j] == inf) { free_variables++; } + } problem.Q.check_matrix("Before free variable expansion"); if (settings.barrier_presolve && free_variables > 0) { @@ -1152,123 +1270,164 @@ i_t presolve(const lp_problem_t& original, // becomes // sum_{k != j} c_k x_k + c_j v - c_j w - std::vector pair_index(problem.num_cols, -1); - i_t num_cols = problem.num_cols + free_variables; - i_t nnz = problem.A.col_start[problem.num_cols]; - for (i_t j = 0; j < problem.num_cols; j++) { - if (problem.lower[j] == -inf && problem.upper[j] == inf) { - nnz += (problem.A.col_start[j + 1] - problem.A.col_start[j]); + const i_t old_num_cols = problem.num_cols; + const i_t linear_cols = linear_var_count(problem); + const i_t new_cone_start = + problem.second_order_cone_dims.empty() ? 0 : linear_cols + free_variables; + const i_t num_cols = old_num_cols + free_variables; + + auto old_A = problem.A; + auto old_Q = problem.Q; + auto old_objective = problem.objective; + auto old_lower = problem.lower; + auto old_upper = problem.upper; + + std::vector partner_index(old_num_cols, -1); + std::vector orig_to_new(old_num_cols, -1); + std::vector is_free(old_num_cols, false); + + i_t next_partner = linear_cols; + for (i_t j = 0; j < linear_cols; ++j) { + orig_to_new[j] = j; + if (old_lower[j] == -inf && old_upper[j] == inf) { + is_free[j] = true; + partner_index[j] = next_partner++; } } + for (i_t j = linear_cols; j < old_num_cols; ++j) { + orig_to_new[j] = j + free_variables; + } + assert(next_partner == new_cone_start || problem.second_order_cone_dims.empty()); - problem.A.col_start.resize(num_cols + 1); - problem.A.i.resize(nnz); - problem.A.x.resize(nnz); - problem.lower.resize(num_cols); - problem.upper.resize(num_cols); - problem.objective.resize(num_cols); + i_t nnz = old_A.col_start[old_num_cols]; + for (i_t j = 0; j < linear_cols; ++j) { + if (is_free[j]) { nnz += old_A.col_start[j + 1] - old_A.col_start[j]; } + } - presolve_info.free_variable_pairs.resize(free_variables * 2); - i_t pair_count = 0; - i_t q = problem.A.col_start[problem.num_cols]; - i_t col = problem.num_cols; - for (i_t j = 0; j < problem.num_cols; j++) { - if (problem.lower[j] == -inf && problem.upper[j] == inf) { - for (i_t p = problem.A.col_start[j]; p < problem.A.col_start[j + 1]; p++) { - i_t i = problem.A.i[p]; - f_t aij = problem.A.x[p]; - problem.A.i[q] = i; - problem.A.x[q] = -aij; - q++; + csc_matrix_t expanded_A(problem.A.m, num_cols, nnz); + i_t nz = 0; + for (i_t j = 0; j < linear_cols; ++j) { + expanded_A.col_start[j] = nz; + for (i_t p = old_A.col_start[j]; p < old_A.col_start[j + 1]; ++p) { + expanded_A.i[nz] = old_A.i[p]; + expanded_A.x[nz] = old_A.x[p]; + ++nz; + } + } + for (i_t j = 0; j < linear_cols; ++j) { + if (partner_index[j] != -1) { + expanded_A.col_start[partner_index[j]] = nz; + for (i_t p = old_A.col_start[j]; p < old_A.col_start[j + 1]; ++p) { + expanded_A.i[nz] = old_A.i[p]; + expanded_A.x[nz] = -old_A.x[p]; + ++nz; } - problem.lower[col] = 0.0; - problem.upper[col] = inf; - problem.objective[col] = -problem.objective[j]; - presolve_info.free_variable_pairs[pair_count++] = j; - presolve_info.free_variable_pairs[pair_count++] = col; - pair_index[j] = col; - problem.A.col_start[++col] = q; - problem.lower[j] = 0.0; } } + for (i_t j = linear_cols; j < old_num_cols; ++j) { + i_t new_j = orig_to_new[j]; + expanded_A.col_start[new_j] = nz; + for (i_t p = old_A.col_start[j]; p < old_A.col_start[j + 1]; ++p) { + expanded_A.i[nz] = old_A.i[p]; + expanded_A.x[nz] = old_A.x[p]; + ++nz; + } + } + expanded_A.col_start[num_cols] = nz; + + std::vector objective(num_cols); + std::vector lower(num_cols, -INFINITY); + std::vector upper(num_cols, INFINITY); + presolve_info.free_variable_pairs.clear(); + presolve_info.free_variable_pairs.reserve(free_variables * 2); + + for (i_t j = 0; j < linear_cols; ++j) { + objective[j] = old_objective[j]; + if (is_free[j]) { + lower[j] = 0.0; + upper[j] = inf; + objective[partner_index[j]] = -old_objective[j]; + lower[partner_index[j]] = 0.0; + upper[partner_index[j]] = inf; + presolve_info.free_variable_pairs.push_back(j); + presolve_info.free_variable_pairs.push_back(partner_index[j]); + } else { + lower[j] = old_lower[j]; + upper[j] = old_upper[j]; + } + } + for (i_t j = linear_cols; j < old_num_cols; ++j) { + i_t new_j = orig_to_new[j]; + objective[new_j] = old_objective[j]; + lower[new_j] = old_lower[j]; + upper[new_j] = old_upper[j]; + } - if (problem.Q.n > 0) { + if (old_Q.n > 0) { std::vector row_counts(num_cols, 0); - i_t nz_count = problem.Q.row_start[problem.num_cols]; - for (i_t row = 0; row < problem.Q.n; row++) { - i_t q_start = problem.Q.row_start[row]; - i_t q_end = problem.Q.row_start[row + 1]; - row_counts[row] = q_end - q_start; - for (i_t qj = q_start; qj < q_end; qj++) { - i_t col = problem.Q.j[qj]; - if (pair_index[row] != -1 && pair_index[col] != -1) { - assert(pair_index[row] >= problem.num_cols); - assert(pair_index[col] >= problem.num_cols); - row_counts[row]++; - row_counts[pair_index[row]] += 2; - nz_count += 3; - } else if (pair_index[col] != -1) { - assert(pair_index[col] >= problem.num_cols); - row_counts[row]++; + i_t nz_count = 0; + for (i_t row = 0; row < old_Q.n; ++row) { + i_t new_row = orig_to_new[row]; + i_t partner_row = partner_index[row]; + i_t q_start = old_Q.row_start[row]; + i_t q_end = old_Q.row_start[row + 1]; + for (i_t qj = q_start; qj < q_end; ++qj) { + i_t col = old_Q.j[qj]; + i_t partner_col = partner_index[col]; + row_counts[new_row]++; + nz_count++; + if (partner_col != -1) { + row_counts[new_row]++; nz_count++; - } else if (pair_index[row] != -1) { - assert(pair_index[row] >= problem.num_cols); - row_counts[pair_index[row]]++; + } + if (partner_row != -1) { + row_counts[partner_row]++; nz_count++; + if (partner_col != -1) { + row_counts[partner_row]++; + nz_count++; + } } } } std::vector Q_row_start(num_cols + 1); Q_row_start[0] = 0; - for (i_t row = 0; row < num_cols; row++) { + for (i_t row = 0; row < num_cols; ++row) { Q_row_start[row + 1] = Q_row_start[row] + row_counts[row]; } std::vector Q_j(nz_count); std::vector Q_x(nz_count); auto row_starts = Q_row_start; - // First copy the original Q ma - for (i_t row = 0; row < problem.Q.n; row++) { - i_t q_start = problem.Q.row_start[row]; - i_t q_end = problem.Q.row_start[row + 1]; - i_t q_nz = Q_row_start[row]; - for (i_t qj = q_start; qj < q_end; qj++) { - i_t col = problem.Q.j[qj]; - f_t qij = problem.Q.x[qj]; - Q_j[q_nz] = col; - Q_x[q_nz] = qij; - q_nz++; - } - row_starts[row] = q_nz; - } - - // Expand the Q matrix for the free variables - for (i_t row = 0; row < problem.Q.n; row++) { - i_t q_start = problem.Q.row_start[row]; - i_t q_end = problem.Q.row_start[row + 1]; - for (i_t qj = q_start; qj < q_end; qj++) { - i_t col = problem.Q.j[qj]; - f_t qij = problem.Q.x[qj]; - if (pair_index[row] != -1 && pair_index[col] != -1) { - Q_j[row_starts[row]] = pair_index[col]; - Q_x[row_starts[row]] = -qij; - row_starts[row]++; - - Q_j[row_starts[pair_index[row]]] = col; - Q_x[row_starts[pair_index[row]]] = -qij; - row_starts[pair_index[row]]++; - - Q_j[row_starts[pair_index[row]]] = pair_index[col]; - Q_x[row_starts[pair_index[row]]] = qij; - row_starts[pair_index[row]]++; - } else if (pair_index[col] != -1) { - Q_j[row_starts[row]] = pair_index[col]; - Q_x[row_starts[row]] = -qij; - row_starts[row]++; - } else if (pair_index[row] != -1) { - Q_j[row_starts[pair_index[row]]] = col; - Q_x[row_starts[pair_index[row]]] = -qij; - row_starts[pair_index[row]]++; + for (i_t row = 0; row < old_Q.n; ++row) { + i_t new_row = orig_to_new[row]; + i_t partner_row = partner_index[row]; + i_t q_start = old_Q.row_start[row]; + i_t q_end = old_Q.row_start[row + 1]; + for (i_t qj = q_start; qj < q_end; ++qj) { + i_t col = old_Q.j[qj]; + f_t qij = old_Q.x[qj]; + i_t new_col = orig_to_new[col]; + i_t partner_col = partner_index[col]; + + Q_j[row_starts[new_row]] = new_col; + Q_x[row_starts[new_row]] = qij; + row_starts[new_row]++; + + if (partner_col != -1) { + Q_j[row_starts[new_row]] = partner_col; + Q_x[row_starts[new_row]] = -qij; + row_starts[new_row]++; + } + if (partner_row != -1) { + Q_j[row_starts[partner_row]] = new_col; + Q_x[row_starts[partner_row]] = -qij; + row_starts[partner_row]++; + if (partner_col != -1) { + Q_j[row_starts[partner_row]] = partner_col; + Q_x[row_starts[partner_row]] = qij; + row_starts[partner_row]++; + } } } } @@ -1281,12 +1440,17 @@ i_t presolve(const lp_problem_t& original, problem.Q.check_matrix("After free variable expansion"); } - // assert(problem.A.p[num_cols] == nnz); - problem.A.n = num_cols; - problem.num_cols = num_cols; + problem.A = expanded_A; + problem.A.n = num_cols; + problem.objective = objective; + problem.lower = lower; + problem.upper = upper; + problem.num_cols = num_cols; + if (!problem.second_order_cone_dims.empty()) { problem.cone_var_start = new_cone_start; } } - if (settings.barrier_presolve && settings.folding != 0 && problem.Q.n == 0) { + if (settings.barrier_presolve && settings.folding != 0 && problem.Q.n == 0 && + problem.second_order_cone_dims.empty()) { folding(problem, settings, presolve_info); } @@ -1364,7 +1528,7 @@ void crush_primal_solution(const user_problem_t& user_problem, // including previously added slacks, are reset before writing new values. solution.assign(problem.num_cols, 0.0); for (i_t j = 0; j < user_problem.num_cols; j++) { - solution[j] = user_solution[j]; + solution[user_col_to_problem_col(user_problem, problem, j)] = user_solution[j]; } std::vector primal_residual(problem.num_rows); @@ -1404,7 +1568,7 @@ void crush_primal_solution_with_slack(const user_problem_t& user_probl // Re-crush can be called with a reused output vector; clear stale entries first. solution.assign(problem.num_cols, 0.0); for (i_t j = 0; j < user_problem.num_cols; j++) { - solution[j] = user_solution[j]; + solution[user_col_to_problem_col(user_problem, problem, j)] = user_solution[j]; } std::vector primal_residual(problem.num_rows); @@ -1451,9 +1615,9 @@ f_t crush_dual_solution(const user_problem_t& user_problem, for (i_t i = 0; i < user_problem.num_rows; i++) { y[i] = user_y[i]; } - z.resize(problem.num_cols); + z.assign(problem.num_cols, 0.0); for (i_t j = 0; j < user_problem.num_cols; j++) { - z[j] = user_z[j]; + z[user_col_to_problem_col(user_problem, problem, j)] = user_z[j]; } std::vector is_range_row(problem.num_rows, false); @@ -1520,6 +1684,17 @@ f_t crush_dual_solution(const user_problem_t& user_problem, return dual_res_inf; } +template +static i_t user_col_to_problem_col(const user_problem_t& user_problem, + const lp_problem_t& problem, + i_t user_col) +{ + if (user_problem.second_order_cone_dims.empty()) { return user_col; } + if (problem.cone_var_start <= user_problem.cone_var_start) { return user_col; } + if (user_col < user_problem.cone_var_start) { return user_col; } + return problem.cone_var_start + (user_col - user_problem.cone_var_start); +} + template void uncrush_primal_solution(const user_problem_t& user_problem, const lp_problem_t& problem, @@ -1529,9 +1704,9 @@ void uncrush_primal_solution(const user_problem_t& user_problem, user_solution.resize(user_problem.num_cols); assert(problem.num_cols >= user_problem.num_cols); assert(solution.size() >= user_problem.num_cols); - std::copy(solution.begin(), - solution.begin() + std::min((i_t)solution.size(), user_problem.num_cols), - user_solution.data()); + for (i_t j = 0; j < user_problem.num_cols; ++j) { + user_solution[j] = solution[user_col_to_problem_col(user_problem, problem, j)]; + } } template @@ -1634,13 +1809,25 @@ void uncrush_solution(const presolve_info_t& presolve_info, if (num_free_variables > 0) { settings.log.printf("Post-solve: Handling free variables %d\n", num_free_variables); // We added free variables so we need to map the crushed solution back to the original variables + std::vector remove_partner(input_x.size(), false); for (i_t k = 0; k < 2 * num_free_variables; k += 2) { const i_t u = free_variable_pairs[k]; const i_t v = free_variable_pairs[k + 1]; input_x[u] -= input_x[v]; + remove_partner[v] = true; + } + std::vector compact_x; + std::vector compact_z; + compact_x.reserve(input_x.size() - num_free_variables); + compact_z.reserve(input_z.size() - num_free_variables); + for (i_t j = 0; j < static_cast(input_x.size()); ++j) { + if (!remove_partner[j]) { + compact_x.push_back(input_x[j]); + compact_z.push_back(input_z[j]); + } } - input_z.resize(input_z.size() - num_free_variables); - input_x.resize(input_x.size() - num_free_variables); + input_x = compact_x; + input_z = compact_z; } if (presolve_info.removed_variables.size() > 0) { diff --git a/cpp/src/dual_simplex/presolve.hpp b/cpp/src/dual_simplex/presolve.hpp index d0e2d52812..cd22e4b4be 100644 --- a/cpp/src/dual_simplex/presolve.hpp +++ b/cpp/src/dual_simplex/presolve.hpp @@ -49,6 +49,8 @@ struct lp_problem_t { f_t obj_constant; f_t obj_scale; // 1.0 for min, -1.0 for max bool objective_is_integral{false}; + i_t cone_var_start{0}; + std::vector second_order_cone_dims; void write_mps(const std::string& path) const { diff --git a/cpp/src/dual_simplex/scaling.cpp b/cpp/src/dual_simplex/scaling.cpp index 1531c91486..65dcb49bfe 100644 --- a/cpp/src/dual_simplex/scaling.cpp +++ b/cpp/src/dual_simplex/scaling.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -22,13 +22,14 @@ i_t column_scaling(const lp_problem_t& unscaled, i_t m = scaled.num_rows; i_t n = scaled.num_cols; - if (!settings.scale_columns || unscaled.Q.n > 0) { + if (!settings.scale_columns || unscaled.Q.n > 0 || !unscaled.second_order_cone_dims.empty()) { settings.log.printf("Skipping column scaling\n"); column_scaling.resize(n, 1.0); return 0; } column_scaling.resize(n); + f_t max = 0; f_t min = std::numeric_limits::max(); for (i_t j = 0; j < n; ++j) { diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index 82d922eec3..3755931a12 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -61,6 +61,42 @@ void write_matlab(const std::string& filename, const dual_simplex::lp_problem_t< fclose(fid); } +template +bool validate_barrier_cone_layout(const lp_problem_t& problem, + const simplex_solver_settings_t& settings) +{ + if (problem.second_order_cone_dims.empty()) { return true; } + + i_t cone_end = problem.cone_var_start; + for (auto q_k : problem.second_order_cone_dims) { + if (q_k <= 1) { + settings.log.printf( + "Error: second-order cone dimensions must be at least 2; use linear variables instead of " + "Q^1\n"); + return false; + } + cone_end += q_k; + } + + if (cone_end != problem.num_cols) { + settings.log.printf("Error: conic variables must form a trailing block [linear | cone]\n"); + return false; + } + + for (i_t j = problem.cone_var_start; j < cone_end; ++j) { + if (problem.lower[j] != 0.0 && problem.lower[j] > -inf) { + settings.log.printf("Error: explicit lower bound on conic variable %d is not supported\n", j); + return false; + } + if (problem.upper[j] < inf) { + settings.log.printf("Error: explicit upper bound on conic variable %d is not supported\n", j); + return false; + } + } + + return true; +} + } // namespace template @@ -352,6 +388,10 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us barrier_settings.barrier_presolve = true; dualize_info_t dualize_info; convert_user_problem(user_problem, barrier_settings, original_lp, new_slacks, dualize_info); + if (!validate_barrier_cone_layout(original_lp, settings)) { + return lp_status_t::NUMERICAL_ISSUES; + } + lp_solution_t lp_solution(original_lp.num_rows, original_lp.num_cols); // Presolve the linear program @@ -581,7 +621,8 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us uncrush_primal_solution(user_problem, original_lp, lp_solution.x, solution.x); uncrush_dual_solution( user_problem, original_lp, lp_solution.y, lp_solution.z, solution.y, solution.z); - solution.objective = barrier_solution.objective; + solution.objective = + barrier_solution.user_objective / user_problem.obj_scale - user_problem.obj_constant; solution.user_objective = barrier_solution.user_objective; solution.l2_primal_residual = barrier_solution.l2_primal_residual; solution.l2_dual_residual = barrier_solution.l2_dual_residual; diff --git a/cpp/src/dual_simplex/user_problem.hpp b/cpp/src/dual_simplex/user_problem.hpp index 73c4c391be..8b0588064c 100644 --- a/cpp/src/dual_simplex/user_problem.hpp +++ b/cpp/src/dual_simplex/user_problem.hpp @@ -52,6 +52,8 @@ struct user_problem_t { std::vector Q_offsets; std::vector Q_indices; std::vector Q_values; + i_t cone_var_start{0}; + std::vector second_order_cone_dims; }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/vector_math.cuh b/cpp/src/dual_simplex/vector_math.cuh index abc7263858..32c75ea366 100644 --- a/cpp/src/dual_simplex/vector_math.cuh +++ b/cpp/src/dual_simplex/vector_math.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -9,6 +9,10 @@ #include +#include +#include +#include + #include #include @@ -28,6 +32,7 @@ struct norm_inf_max { template f_t device_custom_vector_norm_inf(InputIteratorT in, i_t size, rmm::cuda_stream_view stream_view) { + if (size == 0) { return 0; } // FIXME: Tmp storage stored in vector_math class. auto d_out = rmm::device_scalar(stream_view); rmm::device_uvector d_temp_storage(0, stream_view); @@ -62,6 +67,12 @@ f_t device_vector_norm_inf(const rmm::device_uvector& in, rmm::cuda_stream_ return device_custom_vector_norm_inf(in.data(), in.size(), stream_view); } +template +f_t device_vector_norm_inf(raft::device_span in, rmm::cuda_stream_view stream_view) +{ + return device_custom_vector_norm_inf(in.data(), in.size(), stream_view); +} + // TMP we should just have a CPU and GPU version to do the comparison // Should never have to norm inf a CPU vector if we are using the GPU template @@ -71,4 +82,12 @@ f_t vector_norm_inf(const std::vector& x, rmm::cuda_stream_view return device_vector_norm_inf(d_x, stream_view); } +template +f_t vector_norm_inf(raft::host_span x, rmm::cuda_stream_view stream_view) +{ + rmm::device_uvector d_x(x.size(), stream_view); + raft::copy(d_x.data(), x.data(), x.size(), stream_view); + return device_vector_norm_inf(d_x, stream_view); +} + } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/pdlp/cpu_optimization_problem.cpp b/cpp/src/pdlp/cpu_optimization_problem.cpp index 406b0b6541..de1f74ed47 100644 --- a/cpp/src/pdlp/cpu_optimization_problem.cpp +++ b/cpp/src/pdlp/cpu_optimization_problem.cpp @@ -133,6 +133,14 @@ void cpu_optimization_problem_t::set_quadratic_objective_matrix( std::copy(Q_offsets, Q_offsets + size_offsets, Q_offsets_.begin()); } +template +void cpu_optimization_problem_t::set_quadratic_constraints( + std::vector::quadratic_constraint_t> + constraints) +{ + quadratic_constraints_ = std::move(constraints); +} + template void cpu_optimization_problem_t::set_variable_lower_bounds( const f_t* variable_lower_bounds, i_t size) @@ -494,6 +502,19 @@ bool cpu_optimization_problem_t::has_quadratic_objective() const return !Q_values_.empty(); } +template +const std::vector::quadratic_constraint_t>& +cpu_optimization_problem_t::get_quadratic_constraints() const +{ + return quadratic_constraints_; +} + +template +bool cpu_optimization_problem_t::has_quadratic_constraints() const +{ + return !quadratic_constraints_.empty(); +} + // ============================================================================== // Host Getters (return references to CPU memory) // ============================================================================== @@ -621,6 +642,12 @@ cpu_optimization_problem_t::to_optimization_problem(raft::handle_t con Q_offsets_.size()); } + if (!quadratic_constraints_.empty()) { + gpu_problem->set_quadratic_constraints( + std::vector::quadratic_constraint_t>( + quadratic_constraints_)); + } + // Set variable bounds if (!variable_lower_bounds_.empty()) { gpu_problem->set_variable_lower_bounds(variable_lower_bounds_.data(), @@ -740,6 +767,10 @@ void cpu_optimization_problem_t::write_to_mps(const std::string& mps_f false); } + if (!quadratic_constraints_.empty()) { + data_model_view.set_quadratic_constraints(quadratic_constraints_); + } + cuopt::mps_parser::write_mps(data_model_view, mps_file_path); } diff --git a/cpp/src/pdlp/optimization_problem.cu b/cpp/src/pdlp/optimization_problem.cu index 87ff9dab08..634fd86c60 100644 --- a/cpp/src/pdlp/optimization_problem.cu +++ b/cpp/src/pdlp/optimization_problem.cu @@ -97,7 +97,8 @@ optimization_problem_t::optimization_problem_t( problem_name_{other.get_problem_name()}, problem_category_{other.get_problem_category()}, var_names_{other.get_variable_names()}, - row_names_{other.get_row_names()} + row_names_{other.get_row_names()}, + quadratic_constraints_{other.get_quadratic_constraints()} { } @@ -197,6 +198,14 @@ void optimization_problem_t::set_quadratic_objective_matrix( // FIX ME:: check for positive semi definite matrix } +template +void optimization_problem_t::set_quadratic_constraints( + std::vector::quadratic_constraint_t> + constraints) +{ + quadratic_constraints_ = std::move(constraints); +} + template void optimization_problem_t::set_variable_lower_bounds(const f_t* variable_lower_bounds, i_t size) @@ -548,6 +557,19 @@ bool optimization_problem_t::has_quadratic_objective() const return !Q_values_.empty(); } +template +const std::vector::quadratic_constraint_t>& +optimization_problem_t::get_quadratic_constraints() const +{ + return quadratic_constraints_; +} + +template +bool optimization_problem_t::has_quadratic_constraints() const +{ + return !quadratic_constraints_.empty(); +} + template raft::handle_t const* optimization_problem_t::get_handle_ptr() const noexcept { @@ -820,6 +842,10 @@ void optimization_problem_t::write_to_mps(const std::string& mps_file_ is_symmetrized); } + if (!quadratic_constraints_.empty()) { + data_model_view.set_quadratic_constraints(quadratic_constraints_); + } + cuopt::mps_parser::write_mps(data_model_view, mps_file_path); } diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu index f0345368a7..585bfe4476 100644 --- a/cpp/src/pdlp/solve.cu +++ b/cpp/src/pdlp/solve.cu @@ -1378,12 +1378,17 @@ optimization_problem_solution_t solve_lp( // This needs to be called before pdlp is initialized init_handler(op_problem.get_handle_ptr()); - if (op_problem.has_quadratic_objective()) { - CUOPT_LOG_INFO("Problem has a quadratic objective. Using Barrier."); + if (op_problem.has_quadratic_objective() || op_problem.has_quadratic_constraints()) { + if (op_problem.has_quadratic_objective()) { + CUOPT_LOG_INFO("Problem has a quadratic objective. Using Barrier."); + } + if (op_problem.has_quadratic_constraints()) { + CUOPT_LOG_INFO("Problem has %d quadratic constraints. Using Barrier with SOC conversion.", static_cast(op_problem.get_quadratic_constraints().size())); + } settings.method = method_t::Barrier; settings.presolver = presolver_t::None; - // check for sense of the problem - if (op_problem.get_sense()) { + // Quadratic objective support is minimization-only. + if (op_problem.has_quadratic_objective() && op_problem.get_sense()) { CUOPT_LOG_ERROR("Quadratic problems must be minimized"); return optimization_problem_solution_t(pdlp_termination_status_t::NumericalError, op_problem.get_handle_ptr()->get_stream()); diff --git a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu index f795d2c4ca..772105a1b4 100644 --- a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu +++ b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu @@ -26,13 +26,7 @@ #include #include -#include -#include -#include #include -#include -#include -#include namespace cuopt::linear_programming::detail { template diff --git a/cpp/src/pdlp/translate.hpp b/cpp/src/pdlp/translate.hpp index b143a206d4..1b39dffcaa 100644 --- a/cpp/src/pdlp/translate.hpp +++ b/cpp/src/pdlp/translate.hpp @@ -7,6 +7,7 @@ #pragma once +#include #include #include @@ -14,6 +15,10 @@ #include +#include +#include +#include + namespace cuopt::linear_programming { template @@ -34,8 +39,6 @@ static dual_simplex::user_problem_t cuopt_problem_to_simplex_problem( csr_A.j = std::vector(cuopt::host_copy(model.variables, handle_ptr->get_stream())); csr_A.row_start = std::vector(cuopt::host_copy(model.offsets, handle_ptr->get_stream())); - csr_A.to_compressed_col(user_problem.A); - user_problem.rhs.resize(m); user_problem.row_sense.resize(m); user_problem.range_rows.clear(); @@ -105,6 +108,291 @@ static dual_simplex::user_problem_t cuopt_problem_to_simplex_problem( user_problem.Q_indices = model.Q_indices; user_problem.Q_values = model.Q_values; + if (model.original_problem_ptr->has_quadratic_constraints()) { + const auto& qcs = model.original_problem_ptr->get_quadratic_constraints(); + cuopt_expects(!qcs.empty(), + error_type_t::ValidationError, + "Quadratic-constraint flag is set, but no constraints were provided"); + + // Use a practical tolerance for text-parsed MPS numeric values. + const f_t tol = std::numeric_limits::epsilon() * 2; + + // SOC conversion accepts only diagonal Lorentz-form QCMATRIX rows: + // -x_head^2 + sum_i x_tail_i^2 <= 0. + // The barrier consumes SOCs as trailing variable blocks [head, tails...], so we validate all + // QCMATRIX blocks first, then apply a single column permutation to the linear model. + std::vector> cone_vars; + std::vector cone_dims; + std::vector is_cone_var(static_cast(n), 0); + cone_vars.reserve(qcs.size()); + cone_dims.reserve(qcs.size()); + + for (const auto& qc : qcs) { + cuopt_expects(qc.constraint_row_type == 'L', + error_type_t::ValidationError, + "Only <= quadratic constraints are supported for SOC conversion"); + cuopt_expects(qc.linear_values.empty(), + error_type_t::ValidationError, + "SOC conversion currently requires zero linear terms in quadratic constraints"); + cuopt_expects(qc.rhs_value < tol && qc.rhs_value > -tol, + error_type_t::ValidationError, + "SOC conversion currently requires rhs = 0 for quadratic constraints"); + + cuopt_expects(qc.quadratic_offsets.size() >= 2, + error_type_t::ValidationError, + "Quadratic constraint '%s' has invalid CSR offsets (need at least 2 entries)", + qc.constraint_row_name.c_str()); + cuopt_expects(qc.quadratic_values.size() == qc.quadratic_indices.size(), + error_type_t::ValidationError, + "Quadratic constraint '%s' quadratic_values and quadratic_indices length " + "mismatch for CSR Q", + qc.constraint_row_name.c_str()); + + const i_t q_n = static_cast(qc.quadratic_values.size()); + cuopt_expects(q_n >= 2, + error_type_t::ValidationError, + "Quadratic constraint '%s' SOC must have at least 2 diagonal entries in Q (nnz " + "%d)", + qc.constraint_row_name.c_str(), + static_cast(q_n)); + + cuopt_expects( + qc.quadratic_offsets.size() == static_cast(n) + 1, + error_type_t::ValidationError, + "Quadratic constraint '%s' Q must be n by n in CSR: expected %zu CSR row pointers (offsets " + "length n+1), got %zu (n = %d)", + qc.constraint_row_name.c_str(), + static_cast(n) + 1, + qc.quadratic_offsets.size(), + static_cast(n)); + cuopt_expects( + qc.quadratic_offsets[static_cast(n)] == q_n, + error_type_t::ValidationError, + "Quadratic constraint '%s' Q last CSR offset %d must equal number of nonzeros (nnz) %d for " + "this diagonal Q", + qc.constraint_row_name.c_str(), + static_cast(qc.quadratic_offsets[static_cast(n)]), + static_cast(q_n)); + cuopt_expects(qc.quadratic_offsets[0] == 0, + error_type_t::ValidationError, + "Quadratic constraint '%s' Q CSR offsets[0] must be 0", + qc.constraint_row_name.c_str()); + + // Verify Q: n by n CSR, diagonal entries only, Lorentz pattern. + // Scan each row r: empty or one nnz on (r,r) with value -1 (head) or +1 (tail); + // tail order follows this scan; no requirement that diagonal indices be sorted. + i_t head = static_cast(-1); + i_t n_head_m = 0; + std::vector tail_row_vars{}; + tail_row_vars.reserve(static_cast(q_n - 1)); + + for (i_t r = 0; r < n; ++r) { + const i_t p_beg = qc.quadratic_offsets[static_cast(r)]; + const i_t p_end = qc.quadratic_offsets[static_cast(r + 1)]; + cuopt_expects(p_beg >= 0 && p_beg <= p_end && p_end <= q_n, + error_type_t::ValidationError, + "Quadratic constraint '%s' Q row %d has invalid CSR offsets [%d, %d)", + qc.constraint_row_name.c_str(), + static_cast(r), + static_cast(p_beg), + static_cast(p_end)); + + if (p_beg == p_end) { continue; } + + cuopt_expects(p_beg + 1 == p_end, + error_type_t::ValidationError, + "Quadratic constraint '%s' Q row %d: expected at most one stored entry on " + "the diagonal per " + "row (got end - beg = %d)", + qc.constraint_row_name.c_str(), + static_cast(r), + static_cast(p_end - p_beg)); + + const i_t col = qc.quadratic_indices[static_cast(p_beg)]; + const f_t v = qc.quadratic_values[static_cast(p_beg)]; + cuopt_expects( + col == r, + error_type_t::ValidationError, + "Quadratic constraint '%s' Q row %d: only main diagonal (j,j) entries are allowed; got " + "column %d", + qc.constraint_row_name.c_str(), + static_cast(r), + static_cast(col)); + + const f_t neg_one_delta = v + f_t(1); + const f_t pos_one_delta = v - f_t(1); + const bool is_neg_one = (neg_one_delta >= -tol && neg_one_delta <= tol); + const bool is_pos_one = (pos_one_delta >= -tol && pos_one_delta <= tol); + if (is_neg_one) { + ++n_head_m; + head = r; + } else if (is_pos_one) { + tail_row_vars.push_back(r); + } else { + cuopt_expects(false, + error_type_t::ValidationError, + "Quadratic constraint '%s' Q row %d: diagonal for SOC must be -1 (head) or " + "+1 (tail); got " + "%.17g", + qc.constraint_row_name.c_str(), + static_cast(r), + static_cast(v)); + } + } + cuopt_expects( + n_head_m == 1, + error_type_t::ValidationError, + "Quadratic constraint '%s' SOC Q: expected exactly one diagonal with value -1 (cone head), " + "found %d", + qc.constraint_row_name.c_str(), + static_cast(n_head_m)); + cuopt_expects( + static_cast(tail_row_vars.size()) == q_n - 1, + error_type_t::ValidationError, + "Quadratic constraint '%s' SOC Q: expected %d diagonals with value +1 (tails), found %zu", + qc.constraint_row_name.c_str(), + static_cast(q_n - 1), + tail_row_vars.size()); + cuopt_expects(head >= 0, + error_type_t::ValidationError, + "Quadratic constraint '%s' SOC Q: internal error (head index invalid)", + qc.constraint_row_name.c_str()); + + std::vector cone; + cone.reserve(static_cast(q_n)); + cone.push_back(head); + cone.insert(cone.end(), tail_row_vars.begin(), tail_row_vars.end()); + for (const i_t var : cone) { + cuopt_expects(!is_cone_var[static_cast(var)], + error_type_t::ValidationError, + "Variable %d appears in more than one SOC QCMATRIX block; overlapping cones " + "are not supported", + static_cast(var)); + is_cone_var[static_cast(var)] = 1; + } + cone_dims.push_back(q_n); + cone_vars.push_back(std::move(cone)); + } + + std::vector old_to_new(static_cast(n), i_t{-1}); + std::vector new_to_old; + new_to_old.reserve(static_cast(n)); + for (i_t j = 0; j < n; ++j) { + if (is_cone_var[static_cast(j)]) { continue; } + old_to_new[static_cast(j)] = static_cast(new_to_old.size()); + new_to_old.push_back(j); + } + const i_t cone_var_start = static_cast(new_to_old.size()); + for (const auto& cone : cone_vars) { + for (const i_t old_j : cone) { + old_to_new[static_cast(old_j)] = static_cast(new_to_old.size()); + new_to_old.push_back(old_j); + } + } + cuopt_expects(static_cast(new_to_old.size()) == n, + error_type_t::RuntimeError, + "Internal error while building SOC variable permutation"); + + for (i_t row = 0; row < csr_A.m; ++row) { + for (i_t p = csr_A.row_start[static_cast(row)]; + p < csr_A.row_start[static_cast(row + 1)]; + ++p) { + const i_t old_j = csr_A.j[static_cast(p)]; + cuopt_expects(old_j >= 0 && old_j < n, + error_type_t::ValidationError, + "Linear constraint matrix column index %d is outside [0, %d)", + static_cast(old_j), + static_cast(n)); + csr_A.j[static_cast(p)] = old_to_new[static_cast(old_j)]; + } + } + + auto permute_dense_by_old_to_new = [&](auto& values, const char* name) { + if (values.empty()) { return; } + using value_t = typename std::decay_t::value_type; + cuopt_expects(values.size() == static_cast(n), + error_type_t::ValidationError, + "%s length %zu does not match number of variables %d", + name, + values.size(), + static_cast(n)); + std::vector permuted(values.size()); + for (i_t old_j = 0; old_j < n; ++old_j) { + permuted[static_cast(old_to_new[static_cast(old_j)])] = + std::move(values[static_cast(old_j)]); + } + values = std::move(permuted); + }; + + permute_dense_by_old_to_new(user_problem.objective, "objective"); + permute_dense_by_old_to_new(user_problem.lower, "lower bounds"); + permute_dense_by_old_to_new(user_problem.upper, "upper bounds"); + permute_dense_by_old_to_new(user_problem.var_types, "variable types"); + permute_dense_by_old_to_new(user_problem.col_names, "column names"); + + if (!user_problem.Q_values.empty()) { + cuopt_expects(user_problem.Q_indices.size() == user_problem.Q_values.size(), + error_type_t::ValidationError, + "Quadratic objective indices and values length mismatch"); + cuopt_expects(user_problem.Q_offsets.size() == static_cast(n) + 1, + error_type_t::ValidationError, + "Quadratic objective CSR offsets length must be n+1 when SOC QCMATRIX " + "conversion permutes variables"); + cuopt_expects(user_problem.Q_offsets[0] == 0, + error_type_t::ValidationError, + "Quadratic objective CSR offsets[0] must be 0"); + cuopt_expects(user_problem.Q_offsets[static_cast(n)] == + static_cast(user_problem.Q_values.size()), + error_type_t::ValidationError, + "Quadratic objective CSR last offset must equal number of nonzeros"); + + std::vector q_offsets(static_cast(n) + 1, 0); + for (i_t old_row = 0; old_row < n; ++old_row) { + const i_t p_beg = user_problem.Q_offsets[static_cast(old_row)]; + const i_t p_end = user_problem.Q_offsets[static_cast(old_row + 1)]; + cuopt_expects( + p_beg >= 0 && p_beg <= p_end && p_end <= static_cast(user_problem.Q_values.size()), + error_type_t::ValidationError, + "Quadratic objective CSR offsets are invalid at row %d", + static_cast(old_row)); + const i_t new_row = old_to_new[static_cast(old_row)]; + q_offsets[static_cast(new_row + 1)] = p_end - p_beg; + } + for (i_t row = 0; row < n; ++row) { + q_offsets[static_cast(row + 1)] += q_offsets[static_cast(row)]; + } + + std::vector q_indices(user_problem.Q_indices.size()); + std::vector q_values(user_problem.Q_values.size()); + auto q_write = q_offsets; + for (i_t old_row = 0; old_row < n; ++old_row) { + const i_t new_row = old_to_new[static_cast(old_row)]; + for (i_t p = user_problem.Q_offsets[static_cast(old_row)]; + p < user_problem.Q_offsets[static_cast(old_row + 1)]; + ++p) { + const i_t old_col = user_problem.Q_indices[static_cast(p)]; + cuopt_expects(old_col >= 0 && old_col < n, + error_type_t::ValidationError, + "Quadratic objective column index %d is outside [0, %d)", + static_cast(old_col), + static_cast(n)); + const i_t dst = q_write[static_cast(new_row)]++; + q_indices[static_cast(dst)] = old_to_new[static_cast(old_col)]; + q_values[static_cast(dst)] = user_problem.Q_values[static_cast(p)]; + } + } + + user_problem.Q_offsets = std::move(q_offsets); + user_problem.Q_indices = std::move(q_indices); + user_problem.Q_values = std::move(q_values); + } + + user_problem.cone_var_start = cone_var_start; + user_problem.second_order_cone_dims = std::move(cone_dims); + } + + csr_A.to_compressed_col(user_problem.A); + return user_problem; } diff --git a/cpp/tests/dual_simplex/CMakeLists.txt b/cpp/tests/dual_simplex/CMakeLists.txt index dc4ab35b73..f6dff93227 100644 --- a/cpp/tests/dual_simplex/CMakeLists.txt +++ b/cpp/tests/dual_simplex/CMakeLists.txt @@ -6,4 +6,5 @@ ConfigureTest(DUAL_SIMPLEX_TEST ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/solve.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/solve_barrier.cu - LABELS numopt) + ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/second_order_cone_kernels.cu +) diff --git a/cpp/tests/dual_simplex/unit_tests/second_order_cone_kernels.cu b/cpp/tests/dual_simplex/unit_tests/second_order_cone_kernels.cu new file mode 100644 index 0000000000..b8e6eef487 --- /dev/null +++ b/cpp/tests/dual_simplex/unit_tests/second_order_cone_kernels.cu @@ -0,0 +1,625 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#include + +#include + +#include + +#include +#include +#include +#include + +namespace cuopt::linear_programming::dual_simplex::test { + +double host_cone_step_length_from_scalars(double u0, + double du0, + double du_tail_sq, + double u_tail_du_tail, + double u_tail_sq, + double alpha_max) +{ + const auto a = du0 * du0 - du_tail_sq; + const auto b = u0 * du0 - u_tail_du_tail; + const auto c_raw = u0 * u0 - u_tail_sq; + const auto c = c_raw > 0.0 ? c_raw : 0.0; + const auto disc = b * b - a * c; + auto alpha = alpha_max; + + if (du0 < 0.0) { alpha = std::min(alpha, -u0 / du0); } + + if ((a > 0.0 && b > 0.0) || disc < 0.0) { return alpha; } + + if (a == 0.0) { + if (b < 0.0) { alpha = std::min(alpha, c / (-2.0 * b)); } + } else if (c == 0.0) { + alpha = a >= 0.0 ? alpha : 0.0; + } else { + const auto t = -(b + std::copysign(std::sqrt(disc), b)); + auto r1 = c / t; + auto r2 = t / a; + if (r1 < 0.0) { r1 = alpha; } + if (r2 < 0.0) { r2 = alpha; } + alpha = std::min(alpha, std::min(r1, r2)); + } + + return alpha; +} + +TEST(second_order_cone_kernels, topology_and_scratch_layout) +{ + auto stream = rmm::cuda_stream_default; + + std::vector cone_dimensions{3, 2, 5}; + rmm::device_uvector x(10, stream); + rmm::device_uvector z(10, stream); + + cone_data_t cones(cone_dimensions, cuopt::make_span(x), cuopt::make_span(z), stream); + + EXPECT_EQ(cones.n_cones, std::size_t{3}); + EXPECT_EQ(cones.n_cone_entries, std::size_t{10}); + EXPECT_EQ(cones.x.data(), x.data()); + EXPECT_EQ(cones.z.data(), z.data()); + + EXPECT_EQ(cuopt::host_copy(cones.cone_offsets, stream), (std::vector{0, 3, 5, 10})); + EXPECT_EQ(cuopt::host_copy(cones.cone_dimensions, stream), cone_dimensions); + EXPECT_EQ(cuopt::host_copy(cones.element_cone_ids, stream), + (std::vector{0, 0, 0, 1, 1, 2, 2, 2, 2, 2})); + EXPECT_EQ(cuopt::host_copy(cones.segmented_sum.small_cone_ids, stream), + (std::vector{0, 1, 2})); + EXPECT_TRUE(cuopt::host_copy(cones.segmented_sum.medium_cone_ids, stream).empty()); + EXPECT_TRUE(cones.segmented_sum.large_cone_ids.empty()); + EXPECT_TRUE(cones.segmented_sum.large_cone_offsets.empty()); + EXPECT_TRUE(cones.segmented_sum.large_cone_dimensions.empty()); + + EXPECT_EQ(cones.eta.size(), 3); + EXPECT_EQ(cones.w.size(), 10); + + auto& scratch = cones.scratch; + EXPECT_EQ(scratch.n_cones, cones.n_cones); + EXPECT_EQ(scratch.n_cone_entries, cones.n_cone_entries); + EXPECT_EQ(scratch.slots.size(), 3 * cone_dimensions.size()); + EXPECT_EQ(scratch.step_alpha_primal.size(), cone_dimensions.size()); + EXPECT_EQ(scratch.step_alpha_dual.size(), cone_dimensions.size()); + EXPECT_EQ(scratch.temp_cone.size(), x.size()); + + EXPECT_EQ(scratch.get_slot<0>().size(), cone_dimensions.size()); + EXPECT_EQ(scratch.get_slot<1>().data(), scratch.get_slot<0>().data() + cones.n_cones); + EXPECT_EQ(scratch.get_slot<2>().data(), scratch.get_slot<1>().data() + cones.n_cones); +} + +TEST(second_order_cone_kernels, segmented_sum_uses_all_cone_size_buckets) +{ + auto stream = rmm::cuda_stream_default; + + std::vector cone_dimensions{65, 3, 66, 32769}; + rmm::device_uvector x(32903, stream); + rmm::device_uvector z(32903, stream); + cone_data_t cones(cone_dimensions, cuopt::make_span(x), cuopt::make_span(z), stream); + + EXPECT_EQ(cuopt::host_copy(cones.segmented_sum.small_cone_ids, stream), (std::vector{1})); + EXPECT_EQ(cuopt::host_copy(cones.segmented_sum.medium_cone_ids, stream), + (std::vector{0, 2})); + EXPECT_EQ(cones.segmented_sum.large_cone_ids, (std::vector{3})); + EXPECT_EQ(cones.segmented_sum.large_cone_offsets, (std::vector{134})); + EXPECT_EQ(cones.segmented_sum.large_cone_dimensions, (std::vector{32769})); + + std::vector values_host(cones.n_cone_entries, 1.0); + rmm::device_uvector values(values_host.size(), stream); + rmm::device_uvector sums(cone_dimensions.size(), stream); + raft::copy(values.data(), values_host.data(), values_host.size(), stream); + + EXPECT_GT(cones.segmented_sum.cub_workspace_bytes, 0); + const auto workspace_size = cones.segmented_sum.cub_workspace.size(); + EXPECT_GT(workspace_size, 0); + + cones.segmented_sum(values.data(), cuopt::make_span(sums), stream); + + EXPECT_EQ(cuopt::host_copy(sums, stream), (std::vector{65.0, 3.0, 66.0, 32769.0})); + EXPECT_EQ(cones.segmented_sum.cub_workspace.size(), workspace_size); +} + +TEST(second_order_cone_kernels, nt_scaling_matches_host_reference) +{ + auto stream = rmm::cuda_stream_default; + + std::vector cone_dimensions{3, 65, 32769}; + std::size_t n_cone_entries = 0; + for (const auto dim : cone_dimensions) { + n_cone_entries += static_cast(dim); + } + + std::vector x_host(n_cone_entries); + std::vector z_host(n_cone_entries); + std::size_t offset = 0; + for (std::size_t cone = 0; cone < cone_dimensions.size(); ++cone) { + const auto dim = cone_dimensions[cone]; + x_host[offset] = 100.0 + static_cast(cone); + z_host[offset] = 80.0 + static_cast(cone); + for (int local_idx = 1; local_idx < dim; ++local_idx) { + x_host[offset + local_idx] = 0.001 * static_cast((local_idx % 5) + 1); + z_host[offset + local_idx] = 0.0015 * static_cast((local_idx % 7) + 1); + } + offset += static_cast(dim); + } + + auto x = cuopt::device_copy(x_host, stream); + auto z = cuopt::device_copy(z_host, stream); + cone_data_t cones(cone_dimensions, cuopt::make_span(x), cuopt::make_span(z), stream); + const auto workspace_size = cones.segmented_sum.cub_workspace.size(); + EXPECT_GT(workspace_size, 0); + + launch_nt_scaling(cones, stream); + EXPECT_EQ(cones.segmented_sum.cub_workspace.size(), workspace_size); + + auto eta_host = cuopt::host_copy(cones.eta, stream); + auto w_host = cuopt::host_copy(cones.w, stream); + + std::vector expected_eta(cone_dimensions.size()); + std::vector expected_w(n_cone_entries); + + offset = 0; + for (std::size_t cone = 0; cone < cone_dimensions.size(); ++cone) { + const auto dim = cone_dimensions[cone]; + + double x_tail_sq = 0.0; + double z_tail_sq = 0.0; + for (int local_idx = 1; local_idx < dim; ++local_idx) { + const auto idx = offset + local_idx; + x_tail_sq += x_host[idx] * x_host[idx]; + z_tail_sq += z_host[idx] * z_host[idx]; + } + + const auto x_tail_norm = std::sqrt(x_tail_sq); + const auto z_tail_norm = std::sqrt(z_tail_sq); + const auto x_det = (x_host[offset] - x_tail_norm) * (x_host[offset] + x_tail_norm); + const auto z_det = (z_host[offset] - z_tail_norm) * (z_host[offset] + z_tail_norm); + ASSERT_GT(x_det, 0.0) << "cone " << cone; + ASSERT_GT(z_det, 0.0) << "cone " << cone; + + const auto x_scale = std::sqrt(x_det); + const auto z_scale = std::sqrt(z_det); + + expected_eta[cone] = std::sqrt(x_scale / z_scale); + + double normalized_xz_dot = 0.0; + for (int local_idx = 0; local_idx < dim; ++local_idx) { + const auto idx = offset + local_idx; + normalized_xz_dot += x_host[idx] * z_host[idx] / (x_scale * z_scale); + } + const auto w_det = 2.0 + 2.0 * normalized_xz_dot; + ASSERT_GT(w_det, 0.0) << "cone " << cone; + const auto w_scale = std::sqrt(w_det); + + expected_w[offset] = 0.0; + for (int local_idx = 1; local_idx < dim; ++local_idx) { + const auto idx = offset + local_idx; + expected_w[idx] = (x_host[idx] / x_scale - z_host[idx] / z_scale) / w_scale; + } + + double normalized_tail_sq = 0.0; + for (int local_idx = 1; local_idx < dim; ++local_idx) { + const auto idx = offset + local_idx; + normalized_tail_sq += expected_w[idx] * expected_w[idx]; + } + expected_w[offset] = std::sqrt(1.0 + normalized_tail_sq); + + offset += static_cast(dim); + } + + for (std::size_t i = 0; i < expected_eta.size(); ++i) { + EXPECT_NEAR(eta_host[i], expected_eta[i], 1e-10) << "cone " << i; + } + + for (std::size_t i = 0; i < expected_w.size(); ++i) { + EXPECT_NEAR(w_host[i], expected_w[i], 1e-10) << "entry " << i; + } + + offset = 0; + for (std::size_t cone = 0; cone < cone_dimensions.size(); ++cone) { + const auto dim = cone_dimensions[cone]; + + double tail_sq = 0.0; + for (int local_idx = 1; local_idx < dim; ++local_idx) { + const auto idx = offset + local_idx; + tail_sq += w_host[idx] * w_host[idx]; + } + + EXPECT_NEAR(w_host[offset] * w_host[offset] - tail_sq, 1.0, 1e-10) << "cone " << cone; + offset += static_cast(dim); + } +} + +TEST(second_order_cone_kernels, cone_step_length_matches_host_reference) +{ + auto stream = rmm::cuda_stream_default; + + std::vector cone_dimensions{3, 65, 32769}; + std::size_t n_cone_entries = 0; + for (const auto dim : cone_dimensions) { + n_cone_entries += static_cast(dim); + } + + std::vector x_host(n_cone_entries); + std::vector z_host(n_cone_entries); + std::vector dx_host(n_cone_entries); + std::vector dz_host(n_cone_entries); + + std::size_t offset = 0; + for (std::size_t cone = 0; cone < cone_dimensions.size(); ++cone) { + const auto dim = cone_dimensions[cone]; + + x_host[offset] = 12.0 + static_cast(cone); + z_host[offset] = 14.0 + static_cast(cone); + dx_host[offset] = (cone == 0) ? -30.0 : 0.2; + dz_host[offset] = (cone == 1) ? -25.0 : 0.15; + + for (int local_idx = 1; local_idx < dim; ++local_idx) { + const auto idx = offset + local_idx; + + x_host[idx] = 0.001 * static_cast((local_idx % 5) + 1); + z_host[idx] = 0.0015 * static_cast((local_idx % 7) + 1); + dx_host[idx] = 0.02 * static_cast((local_idx % 5) - 2); + dz_host[idx] = -0.015 * static_cast((local_idx % 7) - 3); + } + + offset += static_cast(dim); + } + + auto compute_expected_step = [&](std::vector const& u, + std::vector const& du, + double alpha_max, + std::vector& per_cone_alpha) { + auto global_alpha = alpha_max; + std::size_t off = 0; + + for (std::size_t cone = 0; cone < cone_dimensions.size(); ++cone) { + const auto dim = cone_dimensions[cone]; + + double du_tail_sq = 0.0; + double u_tail_du_tail = 0.0; + double u_tail_sq = 0.0; + for (int local_idx = 1; local_idx < dim; ++local_idx) { + const auto idx = off + local_idx; + du_tail_sq += du[idx] * du[idx]; + u_tail_du_tail += u[idx] * du[idx]; + u_tail_sq += u[idx] * u[idx]; + } + + per_cone_alpha[cone] = host_cone_step_length_from_scalars( + u[off], du[off], du_tail_sq, u_tail_du_tail, u_tail_sq, alpha_max); + global_alpha = std::min(global_alpha, per_cone_alpha[cone]); + + off += static_cast(dim); + } + + return global_alpha; + }; + + constexpr double alpha_max = 0.99; + std::vector expected_primal_per_cone(cone_dimensions.size()); + std::vector expected_dual_per_cone(cone_dimensions.size()); + const auto expected_primal = + compute_expected_step(x_host, dx_host, alpha_max, expected_primal_per_cone); + const auto expected_dual = + compute_expected_step(z_host, dz_host, alpha_max, expected_dual_per_cone); + + auto x = cuopt::device_copy(x_host, stream); + auto z = cuopt::device_copy(z_host, stream); + auto dx = cuopt::device_copy(dx_host, stream); + auto dz = cuopt::device_copy(dz_host, stream); + + cone_data_t cones(cone_dimensions, cuopt::make_span(x), cuopt::make_span(z), stream); + const auto [step_primal, step_dual] = + compute_cone_step_length(cones, + raft::device_span(dx.data(), dx.size()), + raft::device_span(dz.data(), dz.size()), + alpha_max, + stream); + + EXPECT_LT(expected_primal, alpha_max); + EXPECT_LT(expected_dual, alpha_max); + EXPECT_NEAR(step_primal, expected_primal, 1e-12); + EXPECT_NEAR(step_dual, expected_dual, 1e-12); + + const auto primal_per_cone = cuopt::host_copy(cones.scratch.step_alpha_primal, stream); + const auto dual_per_cone = cuopt::host_copy(cones.scratch.step_alpha_dual, stream); + for (std::size_t cone = 0; cone < cone_dimensions.size(); ++cone) { + EXPECT_NEAR(primal_per_cone[cone], expected_primal_per_cone[cone], 1e-12) + << "primal cone " << cone; + EXPECT_NEAR(dual_per_cone[cone], expected_dual_per_cone[cone], 1e-12) << "dual cone " << cone; + } +} + +TEST(second_order_cone_kernels, scaling_operators_match_host_reference) +{ + auto stream = rmm::cuda_stream_default; + + std::vector cone_dimensions{3, 65, 32769}; + std::size_t n_cone_entries = 0; + for (const auto dim : cone_dimensions) { + n_cone_entries += static_cast(dim); + } + + std::vector x_host(n_cone_entries); + std::vector z_host(n_cone_entries); + std::vector v_host(n_cone_entries); + std::vector cone_target_host(n_cone_entries); + std::vector accum_initial_host(n_cone_entries); + std::size_t offset = 0; + for (std::size_t cone = 0; cone < cone_dimensions.size(); ++cone) { + const auto dim = cone_dimensions[cone]; + x_host[offset] = 100.0 + static_cast(cone); + z_host[offset] = 80.0 + static_cast(cone); + v_host[offset] = 0.75 + 0.1 * static_cast(cone); + cone_target_host[offset] = 0.4 + 0.03 * static_cast(cone); + accum_initial_host[offset] = -0.2 + 0.02 * static_cast(cone); + for (int local_idx = 1; local_idx < dim; ++local_idx) { + const auto idx = offset + local_idx; + x_host[idx] = 0.001 * static_cast((local_idx % 5) + 1); + z_host[idx] = 0.0015 * static_cast((local_idx % 7) + 1); + v_host[idx] = 0.002 * static_cast((local_idx % 11) - 5); + cone_target_host[idx] = 0.003 * static_cast((local_idx % 13) - 6); + accum_initial_host[idx] = 0.004 * static_cast((local_idx % 17) - 8); + } + offset += static_cast(dim); + } + + auto x = cuopt::device_copy(x_host, stream); + auto z = cuopt::device_copy(z_host, stream); + auto v = cuopt::device_copy(v_host, stream); + auto cone_target = cuopt::device_copy(cone_target_host, stream); + auto accum = cuopt::device_copy(accum_initial_host, stream); + rmm::device_uvector w_out(n_cone_entries, stream); + rmm::device_uvector w_inv_out(n_cone_entries, stream); + rmm::device_uvector h_out(n_cone_entries, stream); + rmm::device_uvector w_inv_tmp(n_cone_entries, stream); + rmm::device_uvector h_from_w_inv(n_cone_entries, stream); + rmm::device_uvector recovered_dz(n_cone_entries, stream); + + cone_data_t cones(cone_dimensions, cuopt::make_span(x), cuopt::make_span(z), stream); + launch_nt_scaling(cones, stream); + + auto v_span = raft::device_span(v.data(), v.size()); + apply_w(v_span, cuopt::make_span(w_out), cones, stream); + apply_w_inv(v_span, cuopt::make_span(w_inv_out), cones, stream); + apply_hinv2(v_span, cuopt::make_span(h_out), cones, stream); + recover_cone_dz_from_target( + v_span, + cones, + raft::device_span(cone_target.data(), cone_target.size()), + cuopt::make_span(recovered_dz), + stream); + accumulate_cone_hinv2_matvec(v_span, cones, cuopt::make_span(accum), stream); + apply_w_inv(v_span, cuopt::make_span(w_inv_tmp), cones, stream); + apply_w_inv(raft::device_span(w_inv_tmp.data(), w_inv_tmp.size()), + cuopt::make_span(h_from_w_inv), + cones, + stream); + + auto eta_host = cuopt::host_copy(cones.eta, stream); + auto w_host = cuopt::host_copy(cones.w, stream); + auto w_out_host = cuopt::host_copy(w_out, stream); + auto w_inv_out_host = cuopt::host_copy(w_inv_out, stream); + auto h_out_host = cuopt::host_copy(h_out, stream); + auto h_identity_host = cuopt::host_copy(h_from_w_inv, stream); + auto recovered_dz_host = cuopt::host_copy(recovered_dz, stream); + auto accum_host = cuopt::host_copy(accum, stream); + + std::vector expected_w(n_cone_entries); + std::vector expected_w_inv(n_cone_entries); + std::vector expected_h(n_cone_entries); + std::vector expected_h_unscaled(n_cone_entries); + + offset = 0; + for (std::size_t cone = 0; cone < cone_dimensions.size(); ++cone) { + const auto dim = cone_dimensions[cone]; + const auto w0 = w_host[offset]; + const auto v0 = v_host[offset]; + const auto eta = eta_host[cone]; + + double tail_dot = 0.0; + for (int local_idx = 1; local_idx < dim; ++local_idx) { + const auto idx = offset + local_idx; + tail_dot += w_host[idx] * v_host[idx]; + } + + expected_w[offset] = eta * (w0 * v0 + tail_dot); + expected_w_inv[offset] = (w0 * v0 - tail_dot) / eta; + + const auto rho = w0 * v0 - tail_dot; + expected_h_unscaled[offset] = (2.0 * w0 * rho - v0) / (eta * eta); + expected_h[offset] = expected_h_unscaled[offset]; + + for (int local_idx = 1; local_idx < dim; ++local_idx) { + const auto idx = offset + local_idx; + + expected_w[idx] = eta * (v_host[idx] + (v0 + tail_dot / (1.0 + w0)) * w_host[idx]); + expected_w_inv[idx] = (v_host[idx] + (-v0 + tail_dot / (1.0 + w0)) * w_host[idx]) / eta; + expected_h_unscaled[idx] = (v_host[idx] - 2.0 * w_host[idx] * rho) / (eta * eta); + expected_h[idx] = expected_h_unscaled[idx]; + } + + offset += static_cast(dim); + } + + for (std::size_t i = 0; i < n_cone_entries; ++i) { + EXPECT_NEAR(w_out_host[i], expected_w[i], 1e-9) << "W entry " << i; + EXPECT_NEAR(w_inv_out_host[i], expected_w_inv[i], 1e-9) << "W inverse entry " << i; + EXPECT_NEAR(h_out_host[i], expected_h[i], 1e-9) << "H entry " << i; + EXPECT_NEAR(h_out_host[i], h_identity_host[i], 1e-9) << "H identity entry " << i; + EXPECT_NEAR(recovered_dz_host[i], cone_target_host[i] - expected_h_unscaled[i], 1e-9) + << "recovered dz entry " << i; + EXPECT_NEAR(accum_host[i], accum_initial_host[i] + expected_h_unscaled[i], 1e-9) + << "accumulated H entry " << i; + } +} + +TEST(second_order_cone_kernels, combined_cone_rhs_matches_host_reference) +{ + auto stream = rmm::cuda_stream_default; + + std::vector cone_dimensions{3, 65, 32769}; + std::size_t n_cone_entries = 0; + for (const auto dim : cone_dimensions) { + n_cone_entries += static_cast(dim); + } + + std::vector x_host(n_cone_entries); + std::vector z_host(n_cone_entries); + std::vector dx_aff_host(n_cone_entries); + std::vector dz_aff_host(n_cone_entries); + std::size_t offset = 0; + for (std::size_t cone = 0; cone < cone_dimensions.size(); ++cone) { + const auto dim = cone_dimensions[cone]; + x_host[offset] = 120.0 + static_cast(cone); + z_host[offset] = 90.0 + static_cast(cone); + dx_aff_host[offset] = 0.25 + 0.05 * static_cast(cone); + dz_aff_host[offset] = -0.3 + 0.04 * static_cast(cone); + for (int local_idx = 1; local_idx < dim; ++local_idx) { + const auto idx = offset + local_idx; + x_host[idx] = 0.001 * static_cast((local_idx % 5) + 1); + z_host[idx] = 0.0015 * static_cast((local_idx % 7) + 1); + dx_aff_host[idx] = 0.002 * static_cast((local_idx % 11) - 5); + dz_aff_host[idx] = 0.001 * static_cast((local_idx % 13) - 6); + } + offset += static_cast(dim); + } + + auto x = cuopt::device_copy(x_host, stream); + auto z = cuopt::device_copy(z_host, stream); + auto dx_aff = cuopt::device_copy(dx_aff_host, stream); + auto dz_aff = cuopt::device_copy(dz_aff_host, stream); + rmm::device_uvector out(n_cone_entries, stream); + + cone_data_t cones(cone_dimensions, cuopt::make_span(x), cuopt::make_span(z), stream); + launch_nt_scaling(cones, stream); + + constexpr double sigma_mu = 0.37; + compute_combined_cone_rhs_term(raft::device_span(dx_aff.data(), dx_aff.size()), + raft::device_span(dz_aff.data(), dz_aff.size()), + cones, + sigma_mu, + cuopt::make_span(out), + stream); + + auto eta_host = cuopt::host_copy(cones.eta, stream); + auto w_host = cuopt::host_copy(cones.w, stream); + auto out_host = cuopt::host_copy(out, stream); + + auto apply_w_ref = [&](std::vector const& v) { + std::vector result(n_cone_entries); + std::size_t off = 0; + for (std::size_t cone = 0; cone < cone_dimensions.size(); ++cone) { + const auto dim = cone_dimensions[cone]; + const auto w0 = w_host[off]; + const auto v0 = v[off]; + + double tail_dot = 0.0; + for (int local_idx = 1; local_idx < dim; ++local_idx) { + const auto idx = off + local_idx; + tail_dot += w_host[idx] * v[idx]; + } + + result[off] = eta_host[cone] * (w0 * v0 + tail_dot); + for (int local_idx = 1; local_idx < dim; ++local_idx) { + const auto idx = off + local_idx; + result[idx] = eta_host[cone] * (v[idx] + (v0 + tail_dot / (1.0 + w0)) * w_host[idx]); + } + + off += static_cast(dim); + } + return result; + }; + + auto apply_w_inv_ref = [&](std::vector const& v) { + std::vector result(n_cone_entries); + std::size_t off = 0; + for (std::size_t cone = 0; cone < cone_dimensions.size(); ++cone) { + const auto dim = cone_dimensions[cone]; + const auto w0 = w_host[off]; + const auto v0 = v[off]; + + double tail_dot = 0.0; + for (int local_idx = 1; local_idx < dim; ++local_idx) { + const auto idx = off + local_idx; + tail_dot += w_host[idx] * v[idx]; + } + + result[off] = (w0 * v0 - tail_dot) / eta_host[cone]; + for (int local_idx = 1; local_idx < dim; ++local_idx) { + const auto idx = off + local_idx; + result[idx] = (v[idx] + (-v0 + tail_dot / (1.0 + w0)) * w_host[idx]) / eta_host[cone]; + } + + off += static_cast(dim); + } + return result; + }; + + auto scaled_dx = apply_w_inv_ref(dx_aff_host); + auto scaled_dz = apply_w_ref(dz_aff_host); + auto nt_point = apply_w_ref(z_host); + + std::vector shift(n_cone_entries); + offset = 0; + for (std::size_t cone = 0; cone < cone_dimensions.size(); ++cone) { + const auto dim = cone_dimensions[cone]; + + double head_dot = 0.0; + for (int local_idx = 0; local_idx < dim; ++local_idx) { + const auto idx = offset + local_idx; + head_dot += scaled_dx[idx] * scaled_dz[idx]; + } + + shift[offset] = head_dot - sigma_mu; + for (int local_idx = 1; local_idx < dim; ++local_idx) { + const auto idx = offset + local_idx; + shift[idx] = scaled_dx[offset] * scaled_dz[idx] + scaled_dz[offset] * scaled_dx[idx]; + } + + offset += static_cast(dim); + } + + std::vector minus_p(n_cone_entries); + offset = 0; + for (std::size_t cone = 0; cone < cone_dimensions.size(); ++cone) { + const auto dim = cone_dimensions[cone]; + const auto lambda0 = nt_point[offset]; + + double lambda_tail_dot = 0.0; + double lambda_tail_sq = 0.0; + for (int local_idx = 1; local_idx < dim; ++local_idx) { + const auto idx = offset + local_idx; + lambda_tail_dot += nt_point[idx] * shift[idx]; + lambda_tail_sq += nt_point[idx] * nt_point[idx]; + } + + const auto lambda_tail_norm = std::sqrt(lambda_tail_sq); + const auto det_lambda = (lambda0 - lambda_tail_norm) * (lambda0 + lambda_tail_norm); + ASSERT_GT(lambda0, 0.0) << "cone " << cone; + ASSERT_GT(det_lambda, 0.0) << "cone " << cone; + + const auto p_head = (lambda0 * shift[offset] - lambda_tail_dot) / det_lambda; + minus_p[offset] = -p_head; + for (int local_idx = 1; local_idx < dim; ++local_idx) { + const auto idx = offset + local_idx; + minus_p[idx] = (p_head * nt_point[idx] - shift[idx]) / lambda0; + } + + offset += static_cast(dim); + } + + auto expected = apply_w_inv_ref(minus_p); + for (std::size_t i = 0; i < n_cone_entries; ++i) { + EXPECT_NEAR(out_host[i], expected[i], 1e-8) << "entry " << i; + } +} + +} // namespace cuopt::linear_programming::dual_simplex::test diff --git a/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu b/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu index abfe37c9fd..d630bfac18 100644 --- a/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu +++ b/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu @@ -5,13 +5,12 @@ */ /* clang-format on */ -#include - #include #include #include +#include #include #include #include @@ -139,7 +138,6 @@ TEST(barrier, dual_variable_greater_than) user_problem.A.x[nnz++] = 1.0; user_problem.A.i[nnz] = 1; user_problem.A.x[nnz++] = 2.0; - user_problem.A.print_matrix(); EXPECT_EQ(nnz, nz); user_problem.rhs.resize(m); @@ -174,4 +172,819 @@ TEST(barrier, dual_variable_greater_than) EXPECT_NEAR(solution.z[1], 0.0, 1e-5); } +TEST(barrier, cone_metadata_reindexed_when_slack_is_inserted_before_cones) +{ + raft::handle_t handle{}; + init_handler(&handle); + + using namespace cuopt::linear_programming::dual_simplex; + user_problem_t user_problem(&handle); + + constexpr int m = 1; + constexpr int n = 5; + constexpr int nz = 5; + user_problem.num_rows = m; + user_problem.num_cols = n; + user_problem.objective.assign(n, 0.0); + user_problem.A.m = m; + user_problem.A.n = n; + user_problem.A.nz_max = nz; + user_problem.A.reallocate(nz); + user_problem.A.col_start.resize(n + 1); + for (int j = 0; j < n; ++j) { + user_problem.A.col_start[j] = j; + user_problem.A.i[j] = 0; + user_problem.A.x[j] = 1.0; + } + user_problem.A.col_start[n] = nz; + user_problem.rhs = {1.0}; + user_problem.row_sense = {'L'}; + user_problem.lower.assign(n, 0.0); + user_problem.upper.assign(n, inf); + user_problem.num_range_rows = 0; + user_problem.second_order_cone_dims = {2, 2}; + user_problem.cone_var_start = 1; + + simplex_solver_settings_t settings; + settings.barrier = true; + settings.barrier_presolve = true; + settings.dualize = 0; + settings.scale_columns = false; + + std::vector new_slacks; + dualize_info_t dualize_info; + lp_problem_t original_lp(user_problem.handle_ptr, 1, 1, 1); + convert_user_problem(user_problem, settings, original_lp, new_slacks, dualize_info); + + ASSERT_EQ(new_slacks.size(), 1); + EXPECT_EQ(new_slacks[0], 1); + EXPECT_EQ(original_lp.num_cols, 6); + EXPECT_EQ(original_lp.second_order_cone_dims, user_problem.second_order_cone_dims); + EXPECT_EQ(original_lp.cone_var_start, 2); + + lp_problem_t barrier_lp(user_problem.handle_ptr, + original_lp.num_rows, + original_lp.num_cols, + original_lp.A.col_start[original_lp.num_cols]); + std::vector column_scales; + column_scaling(original_lp, settings, barrier_lp, column_scales); + + EXPECT_EQ(barrier_lp.second_order_cone_dims, user_problem.second_order_cone_dims); + EXPECT_EQ(barrier_lp.cone_var_start, 2); +} + +TEST(barrier, presolve_reindexes_cone_start_after_empty_column_removal) +{ + raft::handle_t handle{}; + init_handler(&handle); + + using namespace cuopt::linear_programming::dual_simplex; + user_problem_t user_problem(&handle); + + constexpr int m = 1; + constexpr int n = 4; + constexpr int nz = 3; + + user_problem.num_rows = m; + user_problem.num_cols = n; + user_problem.objective = {1.0, 0.0, 0.0, 0.0}; + + user_problem.A.m = m; + user_problem.A.n = n; + user_problem.A.nz_max = nz; + user_problem.A.reallocate(nz); + user_problem.A.col_start = {0, 0, 1, 2, 3}; + user_problem.A.i[0] = 0; + user_problem.A.x[0] = 1.0; + user_problem.A.i[1] = 0; + user_problem.A.x[1] = -1.0; + user_problem.A.i[2] = 0; + user_problem.A.x[2] = 0.5; + + user_problem.rhs = {1.0}; + user_problem.row_sense = {'E'}; + user_problem.lower.assign(n, 0.0); + user_problem.upper.assign(n, inf); + user_problem.num_range_rows = 0; + user_problem.cone_var_start = 1; + user_problem.second_order_cone_dims = {3}; + user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); + + simplex_solver_settings_t settings; + settings.barrier = true; + settings.barrier_presolve = true; + settings.dualize = 0; + settings.scale_columns = false; + + std::vector new_slacks; + dualize_info_t dualize_info; + lp_problem_t original_lp(user_problem.handle_ptr, 1, 1, 1); + convert_user_problem(user_problem, settings, original_lp, new_slacks, dualize_info); + + presolve_info_t presolve_info; + lp_problem_t presolved_lp(user_problem.handle_ptr, 1, 1, 1); + ASSERT_EQ(presolve(original_lp, settings, presolved_lp, presolve_info), 0); + + EXPECT_EQ(presolved_lp.num_cols, 3); + EXPECT_EQ(presolved_lp.second_order_cone_dims, std::vector({3})); + EXPECT_EQ(presolved_lp.cone_var_start, 0); + + lp_problem_t barrier_lp(user_problem.handle_ptr, + presolved_lp.num_rows, + presolved_lp.num_cols, + presolved_lp.A.col_start[presolved_lp.num_cols]); + std::vector column_scales; + ASSERT_EQ(column_scaling(presolved_lp, settings, barrier_lp, column_scales), 0); + EXPECT_EQ(barrier_lp.cone_var_start, 0); +} + +TEST(barrier, presolve_packs_free_variable_partner_before_cones) +{ + raft::handle_t handle{}; + init_handler(&handle); + + using namespace cuopt::linear_programming::dual_simplex; + user_problem_t user_problem(&handle); + + constexpr int m = 1; + constexpr int n = 5; + constexpr int nz = 5; + + user_problem.num_rows = m; + user_problem.num_cols = n; + user_problem.objective = {0.0, 0.0, 0.0, 0.0, 0.0}; + + user_problem.A.m = m; + user_problem.A.n = n; + user_problem.A.nz_max = nz; + user_problem.A.reallocate(nz); + user_problem.A.col_start = {0, 1, 2, 3, 4, 5}; + for (int j = 0; j < n; ++j) { + user_problem.A.i[j] = 0; + user_problem.A.x[j] = 1.0; + } + + user_problem.rhs = {1.0}; + user_problem.row_sense = {'E'}; + // Two free linear vars ensure the new implied-bound pass cannot fully + // eliminate the free-variable expansion path before the cone block. + user_problem.lower = {-inf, -inf, 0.0, 0.0, 0.0}; + user_problem.upper.assign(n, inf); + user_problem.num_range_rows = 0; + user_problem.cone_var_start = 2; + user_problem.second_order_cone_dims = {3}; + user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); + + simplex_solver_settings_t settings; + settings.barrier = true; + settings.barrier_presolve = true; + settings.dualize = 0; + settings.scale_columns = false; + + std::vector new_slacks; + dualize_info_t dualize_info; + lp_problem_t original_lp(user_problem.handle_ptr, 1, 1, 1); + convert_user_problem(user_problem, settings, original_lp, new_slacks, dualize_info); + + presolve_info_t presolve_info; + lp_problem_t presolved_lp(user_problem.handle_ptr, 1, 1, 1); + ASSERT_EQ(presolve(original_lp, settings, presolved_lp, presolve_info), 0); + + EXPECT_EQ(presolved_lp.num_cols, 7); + EXPECT_EQ(presolved_lp.cone_var_start, 4); + EXPECT_EQ(presolved_lp.second_order_cone_dims, std::vector({3})); + ASSERT_EQ(presolve_info.free_variable_pairs.size(), 4); + EXPECT_EQ(presolve_info.free_variable_pairs[0], 0); + EXPECT_EQ(presolve_info.free_variable_pairs[1], 2); + EXPECT_EQ(presolve_info.free_variable_pairs[2], 1); + EXPECT_EQ(presolve_info.free_variable_pairs[3], 3); +} + +TEST(barrier, uncrush_solution_removes_non_tail_free_variable_partner) +{ + using namespace cuopt::linear_programming::dual_simplex; + + presolve_info_t presolve_info; + presolve_info.free_variable_pairs = {0, 1}; + + simplex_solver_settings_t settings; + std::vector crushed_x{5.0, 2.0, 9.0, 8.0}; + std::vector crushed_y{}; + std::vector crushed_z{7.0, 11.0, 13.0, 17.0}; + std::vector uncrushed_x(3); + std::vector uncrushed_y(0); + std::vector uncrushed_z(3); + + uncrush_solution(presolve_info, + settings, + crushed_x, + crushed_y, + crushed_z, + uncrushed_x, + uncrushed_y, + uncrushed_z); + + EXPECT_EQ(uncrushed_x, std::vector({3.0, 9.0, 8.0})); + EXPECT_EQ(uncrushed_z, std::vector({7.0, 13.0, 17.0})); +} + +TEST(barrier, rejects_middle_cone_input_before_barrier) +{ + raft::handle_t handle{}; + init_handler(&handle); + + using namespace cuopt::linear_programming::dual_simplex; + user_problem_t user_problem(&handle); + + constexpr int m = 3; + constexpr int n = 5; + constexpr int nz = 3; + + user_problem.num_rows = m; + user_problem.num_cols = n; + user_problem.objective = {1.0, 0.0, 0.0, 0.0, 1.0}; + + user_problem.A.m = m; + user_problem.A.n = n; + user_problem.A.nz_max = nz; + user_problem.A.reallocate(nz); + user_problem.A.col_start = {0, 1, 1, 2, 2, 3}; + user_problem.A.i[0] = 0; + user_problem.A.x[0] = 1.0; + user_problem.A.i[1] = 1; + user_problem.A.x[1] = 1.0; + user_problem.A.i[2] = 2; + user_problem.A.x[2] = 1.0; + + user_problem.rhs = {2.0, 1.0, 3.0}; + user_problem.row_sense = {'E', 'E', 'E'}; + user_problem.lower.assign(n, 0.0); + user_problem.upper.assign(n, inf); + user_problem.num_range_rows = 0; + user_problem.cone_var_start = 1; + user_problem.second_order_cone_dims = {3}; + user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); + + simplex_solver_settings_t settings; + settings.barrier = true; + settings.barrier_presolve = true; + settings.dualize = 0; + lp_solution_t solution(m, n); + + auto status = solve_linear_program_with_barrier(user_problem, settings, solution); + EXPECT_EQ(status, lp_status_t::NUMERICAL_ISSUES); +} + +TEST(barrier, socp_min_x0_subject_to_norm_constraint) +{ + // minimize x_0 + // subject to x_1 = 1 + // (x_0, x_1, x_2) in Q^3 + // + // Optimal: x* = (1, 1, 0), obj* = 1 + + raft::handle_t handle{}; + init_handler(&handle); + + using namespace cuopt::linear_programming::dual_simplex; + user_problem_t user_problem(&handle); + + constexpr int m = 1; + constexpr int n = 3; + constexpr int nz = 1; + + user_problem.num_rows = m; + user_problem.num_cols = n; + + user_problem.objective = {1.0, 0.0, 0.0}; + + user_problem.A.m = m; + user_problem.A.n = n; + user_problem.A.nz_max = nz; + user_problem.A.reallocate(nz); + user_problem.A.col_start = {0, 0, 1, 1}; + user_problem.A.i[0] = 0; + user_problem.A.x[0] = 1.0; + + user_problem.rhs = {1.0}; + user_problem.row_sense = {'E'}; + + user_problem.lower = {0.0, 0.0, 0.0}; + user_problem.upper = {inf, inf, inf}; + + user_problem.num_range_rows = 0; + user_problem.problem_name = "socp_norm_cone"; + + user_problem.cone_var_start = 0; + user_problem.second_order_cone_dims = {3}; + + user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); + + simplex_solver_settings_t settings; + settings.barrier = true; + settings.barrier_presolve = false; + settings.dualize = 0; + + lp_solution_t solution(m, n); + auto status = solve_linear_program_with_barrier(user_problem, settings, solution); + EXPECT_EQ(status, lp_status_t::OPTIMAL); + EXPECT_NEAR(solution.objective, 1.0, 1e-4); + EXPECT_NEAR(solution.x[0], 1.0, 1e-4); + EXPECT_NEAR(solution.x[1], 1.0, 1e-4); + EXPECT_NEAR(std::abs(solution.x[2]), 0.0, 1e-4); +} + +TEST(barrier, mixed_linear_and_soc_block) +{ + // Variables ordered as [l | t, u, v], where (t, u, v) \in Q^3. + // + // minimize l + // subject to l - t = 0 + // u = 1 + // (t, u, v) in Q^3 + // + // Optimal: l* = 1, t* = 1, u* = 1, v* = 0, obj* = 1. + raft::handle_t handle{}; + init_handler(&handle); + + using namespace cuopt::linear_programming::dual_simplex; + user_problem_t user_problem(&handle); + + constexpr int m = 2; + constexpr int n = 4; + constexpr int nz = 4; + + user_problem.num_rows = m; + user_problem.num_cols = n; + user_problem.objective = {1.0, 0.0, 0.0, 0.0}; + + user_problem.A.m = m; + user_problem.A.n = n; + user_problem.A.nz_max = nz; + user_problem.A.reallocate(nz); + // Columns: l, t, u, v + user_problem.A.col_start = {0, 1, 2, 3, 3}; + user_problem.A.i[0] = 0; + user_problem.A.x[0] = 1.0; + user_problem.A.i[1] = 0; + user_problem.A.x[1] = -1.0; + user_problem.A.i[2] = 1; + user_problem.A.x[2] = 1.0; + + user_problem.rhs = {0.0, 1.0}; + user_problem.row_sense = {'E', 'E'}; + + user_problem.lower = {0.0, 0.0, 0.0, 0.0}; + user_problem.upper = {inf, inf, inf, inf}; + + user_problem.num_range_rows = 0; + user_problem.problem_name = "mixed_linear_and_soc_block"; + + user_problem.cone_var_start = 1; + user_problem.second_order_cone_dims = {3}; + user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); + + simplex_solver_settings_t settings; + settings.barrier = true; + settings.barrier_presolve = false; + settings.dualize = 0; + + lp_solution_t solution(m, n); + auto status = solve_linear_program_with_barrier(user_problem, settings, solution); + + EXPECT_EQ(status, lp_status_t::OPTIMAL); + EXPECT_NEAR(solution.objective, 1.0, 1e-4); + EXPECT_NEAR(solution.x[0], 1.0, 1e-4); + EXPECT_NEAR(solution.x[1], 1.0, 1e-4); + EXPECT_NEAR(solution.x[2], 1.0, 1e-4); + EXPECT_NEAR(std::abs(solution.x[3]), 0.0, 1e-4); +} + +TEST(barrier, mixed_linear_and_soc_tail_coupling) +{ + // Variables ordered as [l | t, u, v], where (t, u, v) \in Q^3. + // + // minimize t + // subject to l - u = 0 + // l + u = 2 + // (t, u, v) in Q^3 + // + // Optimal: l* = 1, t* = 1, u* = 1, v* = 0, obj* = 1. + raft::handle_t handle{}; + init_handler(&handle); + + using namespace cuopt::linear_programming::dual_simplex; + user_problem_t user_problem(&handle); + + constexpr int m = 2; + constexpr int n = 4; + constexpr int nz = 4; + + user_problem.num_rows = m; + user_problem.num_cols = n; + user_problem.objective = {0.0, 1.0, 0.0, 0.0}; + + user_problem.A.m = m; + user_problem.A.n = n; + user_problem.A.nz_max = nz; + user_problem.A.reallocate(nz); + // Columns: l, t, u, v + user_problem.A.col_start = {0, 2, 2, 4, 4}; + user_problem.A.i[0] = 0; + user_problem.A.x[0] = 1.0; + user_problem.A.i[1] = 1; + user_problem.A.x[1] = 1.0; + user_problem.A.i[2] = 0; + user_problem.A.x[2] = -1.0; + user_problem.A.i[3] = 1; + user_problem.A.x[3] = 1.0; + + user_problem.rhs = {0.0, 2.0}; + user_problem.row_sense = {'E', 'E'}; + user_problem.lower = {0.0, 0.0, 0.0, 0.0}; + user_problem.upper = {inf, inf, inf, inf}; + + user_problem.num_range_rows = 0; + user_problem.problem_name = "mixed_linear_and_soc_tail_coupling"; + user_problem.cone_var_start = 1; + user_problem.second_order_cone_dims = {3}; + user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); + + simplex_solver_settings_t settings; + settings.barrier = true; + settings.barrier_presolve = false; + settings.dualize = 0; + settings.scale_columns = true; + + lp_solution_t solution(m, n); + auto status = solve_linear_program_with_barrier(user_problem, settings, solution); + + EXPECT_EQ(status, lp_status_t::OPTIMAL); + EXPECT_NEAR(solution.objective, 1.0, 1e-4); + EXPECT_NEAR(solution.x[0], 1.0, 1e-4); + EXPECT_NEAR(solution.x[1], 1.0, 1e-4); + EXPECT_NEAR(solution.x[2], 1.0, 1e-4); + EXPECT_NEAR(std::abs(solution.x[3]), 0.0, 1e-4); +} + +TEST(barrier, mixed_linear_and_soc_tail_coupling_with_inequality) +{ + // Variables ordered as [l | t, u, v], where (t, u, v) \in Q^3. + // + // minimize t + // subject to l - u = 0 + // l + u >= 2 + // (t, u, v) in Q^3 + // + // Optimal: l* = 1, t* = 1, u* = 1, v* = 0, obj* = 1. + raft::handle_t handle{}; + init_handler(&handle); + + using namespace cuopt::linear_programming::dual_simplex; + user_problem_t user_problem(&handle); + + constexpr int m = 2; + constexpr int n = 4; + constexpr int nz = 4; + + user_problem.num_rows = m; + user_problem.num_cols = n; + user_problem.objective = {0.0, 1.0, 0.0, 0.0}; + + user_problem.A.m = m; + user_problem.A.n = n; + user_problem.A.nz_max = nz; + user_problem.A.reallocate(nz); + // Columns: l, t, u, v + user_problem.A.col_start = {0, 2, 2, 4, 4}; + user_problem.A.i[0] = 0; + user_problem.A.x[0] = 1.0; + user_problem.A.i[1] = 1; + user_problem.A.x[1] = 1.0; + user_problem.A.i[2] = 0; + user_problem.A.x[2] = -1.0; + user_problem.A.i[3] = 1; + user_problem.A.x[3] = 1.0; + + user_problem.rhs = {0.0, 2.0}; + user_problem.row_sense = {'E', 'G'}; + user_problem.lower = {0.0, 0.0, 0.0, 0.0}; + user_problem.upper = {inf, inf, inf, inf}; + + user_problem.num_range_rows = 0; + user_problem.problem_name = "mixed_linear_and_soc_tail_coupling_with_inequality"; + user_problem.cone_var_start = 1; + user_problem.second_order_cone_dims = {3}; + user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); + + simplex_solver_settings_t settings; + settings.barrier = true; + settings.barrier_presolve = false; + settings.dualize = 0; + settings.scale_columns = true; + + lp_solution_t solution(m, n); + auto status = solve_linear_program_with_barrier(user_problem, settings, solution); + + EXPECT_EQ(status, lp_status_t::OPTIMAL); + EXPECT_NEAR(solution.objective, 1.0, 1e-4); + EXPECT_NEAR(solution.x[0], 1.0, 1e-4); + EXPECT_NEAR(solution.x[1], 1.0, 1e-4); + EXPECT_NEAR(solution.x[2], 1.0, 1e-4); + EXPECT_NEAR(std::abs(solution.x[3]), 0.0, 1e-4); +} + +TEST(barrier, mixed_linear_and_two_soc_blocks) +{ + // Variables ordered as [l1, l2 | t1, u1, v1 | t2, u2, v2], + // where (t1, u1, v1), (t2, u2, v2) \in Q^3. + // + // minimize t1 + t2 + // subject to l1 - u1 = 0 + // l2 - u2 = 0 + // l1 + l2 = 3 + // l1 - l2 = 1 + // + // Optimal: l1* = 2, l2* = 1, t1* = 2, u1* = 2, v1* = 0, + // t2* = 1, u2* = 1, v2* = 0, obj* = 3. + raft::handle_t handle{}; + init_handler(&handle); + + using namespace cuopt::linear_programming::dual_simplex; + user_problem_t user_problem(&handle); + + constexpr int m = 4; + constexpr int n = 8; + constexpr int nz = 8; + + user_problem.num_rows = m; + user_problem.num_cols = n; + user_problem.objective = {0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0}; + + user_problem.A.m = m; + user_problem.A.n = n; + user_problem.A.nz_max = nz; + user_problem.A.reallocate(nz); + // Columns: l1, l2, t1, u1, v1, t2, u2, v2 + user_problem.A.col_start = {0, 3, 6, 6, 7, 7, 7, 8, 8}; + user_problem.A.i[0] = 0; + user_problem.A.x[0] = 1.0; + user_problem.A.i[1] = 2; + user_problem.A.x[1] = 1.0; + user_problem.A.i[2] = 3; + user_problem.A.x[2] = 1.0; + user_problem.A.i[3] = 1; + user_problem.A.x[3] = 1.0; + user_problem.A.i[4] = 2; + user_problem.A.x[4] = 1.0; + user_problem.A.i[5] = 3; + user_problem.A.x[5] = -1.0; + user_problem.A.i[6] = 0; + user_problem.A.x[6] = -1.0; + user_problem.A.i[7] = 1; + user_problem.A.x[7] = -1.0; + + user_problem.rhs = {0.0, 0.0, 3.0, 1.0}; + user_problem.row_sense = {'E', 'E', 'E', 'E'}; + user_problem.lower.assign(n, 0.0); + user_problem.upper.assign(n, inf); + + user_problem.num_range_rows = 0; + user_problem.problem_name = "mixed_linear_and_two_soc_blocks"; + user_problem.cone_var_start = 2; + user_problem.second_order_cone_dims = {3, 3}; + user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); + + simplex_solver_settings_t settings; + settings.barrier = true; + settings.barrier_presolve = false; + settings.dualize = 0; + + lp_solution_t solution(m, n); + auto status = solve_linear_program_with_barrier(user_problem, settings, solution); + + EXPECT_EQ(status, lp_status_t::OPTIMAL); + EXPECT_NEAR(solution.objective, 3.0, 1e-4); + EXPECT_NEAR(solution.x[0], 2.0, 1e-4); + EXPECT_NEAR(solution.x[1], 1.0, 1e-4); + EXPECT_NEAR(solution.x[2], 2.0, 1e-4); + EXPECT_NEAR(solution.x[3], 2.0, 1e-4); + EXPECT_NEAR(std::abs(solution.x[4]), 0.0, 1e-4); + EXPECT_NEAR(solution.x[5], 1.0, 1e-4); + EXPECT_NEAR(solution.x[6], 1.0, 1e-4); + EXPECT_NEAR(std::abs(solution.x[7]), 0.0, 1e-4); +} + +TEST(barrier, mixed_linear_and_two_soc_blocks_with_inequality) +{ + // Variables ordered as [l1, l2 | t1, u1, v1 | t2, u2, v2], + // where (t1, u1, v1), (t2, u2, v2) \in Q^3. + // + // minimize t1 + t2 + // subject to l1 - u1 = 0 + // l2 - u2 = 0 + // l1 + l2 >= 3 + // l1 - l2 = 1 + // + // Optimal: l1* = 2, l2* = 1, t1* = 2, u1* = 2, v1* = 0, + // t2* = 1, u2* = 1, v2* = 0, obj* = 3. + raft::handle_t handle{}; + init_handler(&handle); + + using namespace cuopt::linear_programming::dual_simplex; + user_problem_t user_problem(&handle); + + constexpr int m = 4; + constexpr int n = 8; + constexpr int nz = 8; + + user_problem.num_rows = m; + user_problem.num_cols = n; + user_problem.objective = {0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0}; + + user_problem.A.m = m; + user_problem.A.n = n; + user_problem.A.nz_max = nz; + user_problem.A.reallocate(nz); + // Columns: l1, l2, t1, u1, v1, t2, u2, v2 + user_problem.A.col_start = {0, 3, 6, 6, 7, 7, 7, 8, 8}; + user_problem.A.i[0] = 0; + user_problem.A.x[0] = 1.0; + user_problem.A.i[1] = 2; + user_problem.A.x[1] = 1.0; + user_problem.A.i[2] = 3; + user_problem.A.x[2] = 1.0; + user_problem.A.i[3] = 1; + user_problem.A.x[3] = 1.0; + user_problem.A.i[4] = 2; + user_problem.A.x[4] = 1.0; + user_problem.A.i[5] = 3; + user_problem.A.x[5] = -1.0; + user_problem.A.i[6] = 0; + user_problem.A.x[6] = -1.0; + user_problem.A.i[7] = 1; + user_problem.A.x[7] = -1.0; + + user_problem.rhs = {0.0, 0.0, 3.0, 1.0}; + user_problem.row_sense = {'E', 'E', 'G', 'E'}; + user_problem.lower.assign(n, 0.0); + user_problem.upper.assign(n, inf); + + user_problem.num_range_rows = 0; + user_problem.problem_name = "mixed_linear_and_two_soc_blocks_with_inequality"; + user_problem.cone_var_start = 2; + user_problem.second_order_cone_dims = {3, 3}; + user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); + + simplex_solver_settings_t settings; + settings.barrier = true; + settings.barrier_presolve = false; + settings.dualize = 0; + settings.scale_columns = true; + + lp_solution_t solution(m, n); + auto status = solve_linear_program_with_barrier(user_problem, settings, solution); + + EXPECT_EQ(status, lp_status_t::OPTIMAL); + EXPECT_NEAR(solution.objective, 3.0, 1e-4); + EXPECT_NEAR(solution.x[0], 2.0, 1e-4); + EXPECT_NEAR(solution.x[1], 1.0, 1e-4); + EXPECT_NEAR(solution.x[2], 2.0, 1e-4); + EXPECT_NEAR(solution.x[3], 2.0, 1e-4); + EXPECT_NEAR(std::abs(solution.x[4]), 0.0, 1e-4); + EXPECT_NEAR(solution.x[5], 1.0, 1e-4); + EXPECT_NEAR(solution.x[6], 1.0, 1e-4); + EXPECT_NEAR(std::abs(solution.x[7]), 0.0, 1e-4); +} + +TEST(barrier, free_linear_prefix_is_uncrushed_correctly_with_soc_block) +{ + // Variables ordered as [l | t, u, v], where (t, u, v) \in Q^3 and l is free. + // + // minimize t + // subject to l - u = 0 + // u = 1 + // (t, u, v) in Q^3 + // + // Presolve splits the free linear variable into a partner column before the + // cone block, so the returned user-space solution must uncrush back to + // l* = 1, t* = 1, u* = 1, v* = 0, obj* = 1. + raft::handle_t handle{}; + init_handler(&handle); + + using namespace cuopt::linear_programming::dual_simplex; + user_problem_t user_problem(&handle); + + constexpr int m = 2; + constexpr int n = 4; + constexpr int nz = 3; + + user_problem.num_rows = m; + user_problem.num_cols = n; + user_problem.objective = {0.0, 1.0, 0.0, 0.0}; + + user_problem.A.m = m; + user_problem.A.n = n; + user_problem.A.nz_max = nz; + user_problem.A.reallocate(nz); + // Columns: l, t, u, v + user_problem.A.col_start = {0, 1, 1, 3, 3}; + user_problem.A.i[0] = 0; + user_problem.A.x[0] = 1.0; + user_problem.A.i[1] = 0; + user_problem.A.x[1] = -1.0; + user_problem.A.i[2] = 1; + user_problem.A.x[2] = 1.0; + + user_problem.rhs = {0.0, 1.0}; + user_problem.row_sense = {'E', 'E'}; + user_problem.lower = {-inf, 0.0, 0.0, 0.0}; + user_problem.upper = {inf, inf, inf, inf}; + + user_problem.num_range_rows = 0; + user_problem.problem_name = "free_linear_prefix_is_uncrushed_correctly_with_soc_block"; + user_problem.cone_var_start = 1; + user_problem.second_order_cone_dims = {3}; + user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); + + simplex_solver_settings_t settings; + settings.barrier = true; + settings.dualize = 0; + + lp_solution_t solution(m, n); + auto status = solve_linear_program_with_barrier(user_problem, settings, solution); + + EXPECT_EQ(status, lp_status_t::OPTIMAL); + EXPECT_NEAR(solution.objective, 1.0, 1e-4); + EXPECT_NEAR(solution.x[0], 1.0, 1e-4); + EXPECT_NEAR(solution.x[1], 1.0, 1e-4); + EXPECT_NEAR(solution.x[2], 1.0, 1e-4); + EXPECT_NEAR(std::abs(solution.x[3]), 0.0, 1e-4); +} + +TEST(barrier, qp_with_soc_block) +{ + // Variables ordered as [l | t, u, v], where (t, u, v) \in Q^3. + // + // minimize 0.5 l^2 + t + // subject to l + u = 2 + // (t, u, v) in Q^3 + // + // Since t >= |u| and u = 2 - l with l >= 0, the objective becomes + // 0.5 l^2 + |2 - l|, which is minimized at l* = 1, u* = 1, t* = 1, v* = 0. + raft::handle_t handle{}; + init_handler(&handle); + + using namespace cuopt::linear_programming::dual_simplex; + user_problem_t user_problem(&handle); + + constexpr int m = 1; + constexpr int n = 4; + constexpr int nz = 2; + + user_problem.num_rows = m; + user_problem.num_cols = n; + user_problem.objective = {0.0, 1.0, 0.0, 0.0}; + + user_problem.A.m = m; + user_problem.A.n = n; + user_problem.A.nz_max = nz; + user_problem.A.reallocate(nz); + // Columns: l, t, u, v + user_problem.A.col_start = {0, 1, 1, 2, 2}; + user_problem.A.i[0] = 0; + user_problem.A.x[0] = 1.0; + user_problem.A.i[1] = 0; + user_problem.A.x[1] = 1.0; + + user_problem.rhs = {2.0}; + user_problem.row_sense = {'E'}; + user_problem.lower.assign(n, 0.0); + user_problem.upper.assign(n, inf); + + user_problem.Q_offsets = {0, 1, 1, 1, 1}; + user_problem.Q_indices = {0}; + user_problem.Q_values = {1.0}; + + user_problem.num_range_rows = 0; + user_problem.problem_name = "qp_with_soc_block"; + user_problem.cone_var_start = 1; + user_problem.second_order_cone_dims = {3}; + user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); + + simplex_solver_settings_t settings; + settings.barrier = true; + settings.dualize = 0; + + lp_solution_t solution(m, n); + auto status = solve_linear_program_with_barrier(user_problem, settings, solution); + + EXPECT_EQ(status, lp_status_t::OPTIMAL); + EXPECT_NEAR(solution.objective, 1.5, 1e-4); + EXPECT_NEAR(solution.x[0], 1.0, 1e-4); + EXPECT_NEAR(solution.x[1], 1.0, 1e-4); + EXPECT_NEAR(solution.x[2], 1.0, 1e-4); + EXPECT_NEAR(std::abs(solution.x[3]), 0.0, 1e-4); +} + } // namespace cuopt::linear_programming::dual_simplex::test diff --git a/datasets/qcqp/QC_Test_1.mps b/datasets/qcqp/QC_Test_1.mps new file mode 100644 index 0000000000..e791fdadd6 --- /dev/null +++ b/datasets/qcqp/QC_Test_1.mps @@ -0,0 +1,30 @@ +NAME QCTest +ROWS + N OBJ + L LIN0 + L QC0 + L QC1 +COLUMNS + VAR1 OBJ 0 + VAR1 LIN0 2 + VAR1 QC0 1 + VAR1 QC1 3 + VAR2 OBJ 0 + VAR2 LIN0 1 + VAR2 QC0 1 + VAR2 QC1 1 +RHS + RHS1 LIN0 15 + RHS1 QC0 5 + RHS1 QC1 10 +QCMATRIX QC0 + VAR1 VAR1 10 + VAR1 VAR2 2 + VAR2 VAR1 2 + VAR2 VAR2 2 +QCMATRIX QC1 + VAR1 VAR1 4 + VAR1 VAR2 1 + VAR2 VAR1 1 + VAR2 VAR2 6 +ENDATA diff --git a/datasets/qcqp/p0033_qc1.mps b/datasets/qcqp/p0033_qc1.mps new file mode 100644 index 0000000000..a06cec03c5 --- /dev/null +++ b/datasets/qcqp/p0033_qc1.mps @@ -0,0 +1,214 @@ +NAME p0033_qc1 +ROWS + N R100 + L R118 + L R119 + L R120 + L R121 + L R122 + L R123 + L R124 + L R125 + L R126 + L R127 + L R128 + L ZBESTROW + L QC1 + L QC2 + L QC3 + L QC4 +COLUMNS + C157 R100 171 + C157 R122 -300 + C157 R123 -300 + C158 R100 171 + C158 R126 -300 + C158 R127 -300 + C159 R100 171 + C159 R119 300 + C159 R120 -300 + C159 R121 -300 + C159 QC1 1 + C160 R100 171 + C160 R119 300 + C160 R120 -300 + C160 R121 -300 + C161 R100 163 + C161 R119 285 + C161 R120 -285 + C161 R124 -285 + C161 R125 -285 + C162 R100 162 + C162 R119 285 + C162 R120 -285 + C162 R122 -285 + C162 R123 -285 + C163 R100 163 + C163 R128 -285 + C164 R100 69 + C164 R119 265 + C164 R120 -265 + C164 R124 -265 + C164 R125 -265 + C165 R100 69 + C165 R119 265 + C165 R120 -265 + C165 R122 -265 + C165 R123 -265 + C166 R100 183 + C166 R118 -230 + C167 R100 183 + C167 R124 -230 + C167 R125 -230 + C168 R100 183 + C168 R119 230 + C168 R120 -230 + C168 R125 -230 + C169 R100 183 + C169 R119 230 + C169 R120 -230 + C169 R123 -230 + C170 R100 49 + C170 R119 190 + C170 R120 -190 + C170 R122 -190 + C170 R123 -190 + C171 R100 183 + C172 R100 258 + C172 R118 -200 + C173 R100 517 + C173 R118 -400 + C174 R100 250 + C174 R126 -200 + C174 R127 -200 + C175 R100 500 + C175 R126 -400 + C175 R127 -400 + C176 R100 250 + C176 R127 -200 + C177 R100 500 + C177 R127 -400 + C178 R100 159 + C178 R119 200 + C178 R120 -200 + C178 R124 -200 + C178 R125 -200 + C179 R100 318 + C179 R119 400 + C179 R120 -400 + C179 R124 -400 + C179 R125 -400 + C180 R100 159 + C180 R119 200 + C180 R120 -200 + C180 R125 -200 + C181 R100 318 + C181 R119 400 + C181 R120 -400 + C181 R125 -400 + C182 R100 159 + C182 R119 200 + C182 R120 -200 + C182 R122 -200 + C182 R123 -200 + C183 R100 318 + C183 R119 400 + C183 R120 -400 + C183 R122 -400 + C183 R123 -400 + C184 R100 159 + C184 R119 200 + C184 R120 -200 + C184 R123 -200 + C185 R100 318 + C185 R119 400 + C185 R120 -400 + C185 R123 -400 + C186 R100 114 + C186 R119 200 + C186 R120 -200 + C186 R121 -200 + C187 R100 228 + C187 R119 400 + C187 R120 -400 + C187 R121 -400 + C188 R100 159 + C188 R128 -200 + C189 R100 318 + C189 R128 -400 +RHS + rhs R118 -5 + rhs R119 2700 + rhs R120 -2600 + rhs R121 -100 + rhs R122 -900 + rhs R123 -1656 + rhs R124 -335 + rhs R125 -1026 + rhs R126 -5 + rhs R127 -500 + rhs R128 -270 + rhs QC1 1 + rhs QC2 2 + rhs QC3 1 + rhs QC4 1 +BOUNDS + UP bnd C157 1 + UP bnd C158 1 + UP bnd C159 1 + UP bnd C160 1 + UP bnd C161 1 + UP bnd C162 1 + UP bnd C163 1 + UP bnd C164 1 + UP bnd C165 1 + UP bnd C166 1 + UP bnd C167 1 + UP bnd C168 1 + UP bnd C169 1 + UP bnd C170 1 + UP bnd C171 1 + UP bnd C172 1 + UP bnd C173 1 + UP bnd C174 1 + UP bnd C175 1 + UP bnd C176 1 + UP bnd C177 1 + UP bnd C178 1 + UP bnd C179 1 + UP bnd C180 1 + UP bnd C181 1 + UP bnd C182 1 + UP bnd C183 1 + UP bnd C184 1 + UP bnd C185 1 + UP bnd C186 1 + UP bnd C187 1 + UP bnd C188 1 + UP bnd C189 1 +QMATRIX + C158 C158 1 + C158 C189 0.5 + C189 C158 0.5 + C189 C189 1 +QCMATRIX QC1 + C157 C157 1 + C157 C158 0.5 + C158 C157 0.5 + C158 C158 1 + C159 C159 1 + C160 C160 1 +QCMATRIX QC2 + C161 C161 2 + C162 C162 2 + C163 C163 1 +QCMATRIX QC3 + C164 C164 1 + C165 C165 1 +QCMATRIX QC4 + C166 C166 1 + C167 C167 1 + C168 C168 1 + C169 C169 1 + C171 C171 1 +ENDATA \ No newline at end of file diff --git a/datasets/qcqp/socp1.mps b/datasets/qcqp/socp1.mps new file mode 100644 index 0000000000..3e479054d0 --- /dev/null +++ b/datasets/qcqp/socp1.mps @@ -0,0 +1,23 @@ +NAME +ROWS + N OBJ + L c2 + E c1 +COLUMNS + y OBJ -1 + z OBJ -1 + x c1 1 +RHS + rhs c2 0 + rhs c1 1 +RANGES +BOUNDS + FR bounds y + FR bounds z + LO bounds x 0 + PL bounds x +QCMATRIX c2 + y y 1 + z z 1 + x x -1 +ENDATA diff --git a/datasets/qcqp/socp2.mps b/datasets/qcqp/socp2.mps new file mode 100644 index 0000000000..6542d1d4f8 --- /dev/null +++ b/datasets/qcqp/socp2.mps @@ -0,0 +1,26 @@ +NAME +ROWS + N OBJ + L c2 + G c1 + E c3 +COLUMNS + x OBJ 1 + y c1 1 + tau c3 1 +RHS + rhs c2 0 + rhs c1 0.7071067811865476 + rhs c3 1 +RANGES +BOUNDS + FR bounds x + LO bounds y 0 + PL bounds y + LO bounds tau 0 + PL bounds tau +QCMATRIX c2 + x x 1 + y y 1 + tau tau -1 +ENDATA diff --git a/datasets/qcqp/socp3.mps b/datasets/qcqp/socp3.mps new file mode 100644 index 0000000000..ae92220042 --- /dev/null +++ b/datasets/qcqp/socp3.mps @@ -0,0 +1,22 @@ +NAME +ROWS + N OBJ + L c2 + L c3 + G c1 +COLUMNS + y c1 1 + x c2 1 +RHS + rhs c2 1 + rhs c3 0 + rhs c1 2 +RANGES +BOUNDS + FR bounds y + LO bounds x 0 + PL bounds x +QCMATRIX c3 + y y 1 + x x -1 +ENDATA diff --git a/datasets/qcqp/socp4.mps b/datasets/qcqp/socp4.mps new file mode 100644 index 0000000000..584725963f --- /dev/null +++ b/datasets/qcqp/socp4.mps @@ -0,0 +1,33 @@ +NAME +ROWS + N OBJ + L c2 + E c1a + E c1b + E c1c +COLUMNS + x2 c1b 1 + x2 OBJ -2 + x3 c1c 1 + x3 OBJ -1 + x1 c1a 1 + x4 c1b -1 + x5 c1c -1 +RHS + rhs c2 0 + rhs c1a 1 + rhs c1b 0 + rhs c1c 0 +RANGES +BOUNDS + FR bounds x2 + FR bounds x3 + LO bounds x1 0 + PL bounds x1 + FR bounds x4 + FR bounds x5 +QCMATRIX c2 + x1 x1 -1 + x4 x4 1 + x5 x5 1 +ENDATA