diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..188fb31 --- /dev/null +++ b/.clang-format @@ -0,0 +1,46 @@ +BasedOnStyle: LLVM + +IndentWidth: 4 +TabWidth: 4 +UseTab: Never + +ColumnLimit: 120 + +BreakBeforeBraces: Allman + +PointerAlignment: Right +DerivePointerAlignment: false + +BinPackParameters: false +BinPackArguments: false + +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AllowShortFunctionsOnASingleLine: false + +IndentCaseLabels: true +InsertNewlineAtEOF: true + +SortIncludes: true + +ReflowComments: false +AlignOperands: false + +StatementMacros: + - ASSERT + - CHECK + - REQUIRE + - ENSURE + - EXPECT + - CUDA_CHECK + - CUBLAS_CHECK + - CUSPARSE_CHECK + +AttributeMacros: + - __global__ + - __device__ + - __host__ + - __shared__ + - __constant__ + - __managed__ + - __launch_bounds__ \ No newline at end of file diff --git a/.github/workflows/check-clang-format.yml b/.github/workflows/check-clang-format.yml new file mode 100644 index 0000000..b486bae --- /dev/null +++ b/.github/workflows/check-clang-format.yml @@ -0,0 +1,21 @@ +name: Format + +on: + push: + branches: [main] + pull_request: + branches: [main] + workflow_dispatch: + +jobs: + build: + name: Clang Format Check + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - uses: DoozyX/clang-format-lint-action@v0.20 + with: + source: '.' + extensions: 'h,c,cu' + clangFormatVersion: 20 \ No newline at end of file diff --git a/.github/workflows/spelling.yml b/.github/workflows/spelling.yml new file mode 100644 index 0000000..f003f40 --- /dev/null +++ b/.github/workflows/spelling.yml @@ -0,0 +1,24 @@ +name: Spelling + +permissions: + contents: read + +on: + push: + branches: [main] + pull_request: + branches: [main] + workflow_dispatch: + +env: + CLICOLOR: 1 + +jobs: + spelling: + name: Spell Check with Typos + runs-on: ubuntu-latest + steps: + - name: Checkout Actions Repository + uses: actions/checkout@v5 + - name: Spell Check Repo + uses: crate-ci/typos@v1.42.3 \ No newline at end of file diff --git a/include/cupdlpx.h b/include/cupdlpx.h index d18c242..5ad5e11 100644 --- a/include/cupdlpx.h +++ b/include/cupdlpx.h @@ -24,22 +24,19 @@ extern "C" #endif // create an lp_problem_t from a matrix descriptor - lp_problem_t *create_lp_problem( - const double *objective_c, - const matrix_desc_t *A_desc, - const double *con_lb, - const double *con_ub, - const double *var_lb, - const double *var_ub, - const double *objective_constant); + lp_problem_t *create_lp_problem(const double *objective_c, + const matrix_desc_t *A_desc, + const double *con_lb, + const double *con_ub, + const double *var_lb, + const double *var_ub, + const double *objective_constant); // Set up initial primal and dual solution for an lp_problem_t void set_start_values(lp_problem_t *prob, const double *primal, const double *dual); // solve the LP problem using PDHG - cupdlpx_result_t *solve_lp_problem( - lp_problem_t *prob, - const pdhg_parameters_t *params); + cupdlpx_result_t *solve_lp_problem(lp_problem_t *prob, const pdhg_parameters_t *params); // parameter void set_default_parameters(pdhg_parameters_t *params); @@ -50,4 +47,4 @@ extern "C" #ifdef __cplusplus } // extern "C" -#endif \ No newline at end of file +#endif diff --git a/include/cupdlpx_types.h b/include/cupdlpx_types.h index b4efe1a..d874206 100644 --- a/include/cupdlpx_types.h +++ b/include/cupdlpx_types.h @@ -25,171 +25,171 @@ extern "C" { #endif - typedef enum - { - TERMINATION_REASON_UNSPECIFIED, - TERMINATION_REASON_OPTIMAL, - TERMINATION_REASON_PRIMAL_INFEASIBLE, - TERMINATION_REASON_DUAL_INFEASIBLE, - TERMINATION_REASON_INFEASIBLE_OR_UNBOUNDED, - TERMINATION_REASON_TIME_LIMIT, - TERMINATION_REASON_ITERATION_LIMIT, - TERMINATION_REASON_FEAS_POLISH_SUCCESS - } termination_reason_t; - - typedef enum + typedef enum + { + TERMINATION_REASON_UNSPECIFIED, + TERMINATION_REASON_OPTIMAL, + TERMINATION_REASON_PRIMAL_INFEASIBLE, + TERMINATION_REASON_DUAL_INFEASIBLE, + TERMINATION_REASON_INFEASIBLE_OR_UNBOUNDED, + TERMINATION_REASON_TIME_LIMIT, + TERMINATION_REASON_ITERATION_LIMIT, + TERMINATION_REASON_FEAS_POLISH_SUCCESS + } termination_reason_t; + + typedef enum { NORM_TYPE_L2 = 0, NORM_TYPE_L_INF = 1 } norm_type_t; - typedef struct - { - int num_variables; - int num_constraints; - double *variable_lower_bound; - double *variable_upper_bound; - double *objective_vector; - double objective_constant; - - int *constraint_matrix_row_pointers; - int *constraint_matrix_col_indices; - double *constraint_matrix_values; - int constraint_matrix_num_nonzeros; - - double *constraint_lower_bound; - double *constraint_upper_bound; - - double *primal_start; - double *dual_start; - } lp_problem_t; - - typedef struct - { - double artificial_restart_threshold; - double sufficient_reduction_for_restart; - double necessary_reduction_for_restart; - double k_p; - double k_i; - double k_d; - double i_smooth; - } restart_parameters_t; - - typedef struct - { - double eps_optimal_relative; - double eps_feasible_relative; - double eps_feas_polish_relative; - double time_sec_limit; - int iteration_limit; - } termination_criteria_t; - - typedef struct - { - int l_inf_ruiz_iterations; - bool has_pock_chambolle_alpha; - double pock_chambolle_alpha; - bool bound_objective_rescaling; - bool verbose; - int termination_evaluation_frequency; - int sv_max_iter; - double sv_tol; - termination_criteria_t termination_criteria; - restart_parameters_t restart_params; - double reflection_coefficient; - bool feasibility_polishing; - norm_type_t optimality_norm; - bool presolve; - double matrix_zero_tol; - } pdhg_parameters_t; - - typedef struct - { - int num_variables; - int num_constraints; - int num_nonzeros; - - int num_reduced_variables; - int num_reduced_constraints; - int num_reduced_nonzeros; - - double *primal_solution; - double *dual_solution; - double *reduced_cost; - - int total_count; - double rescaling_time_sec; - double cumulative_time_sec; - double presolve_time; - int presolve_status; - // PresolveStats presolve_stats; - - double absolute_primal_residual; - double relative_primal_residual; - double absolute_dual_residual; - double relative_dual_residual; - double primal_objective_value; - double dual_objective_value; - double objective_gap; - double relative_objective_gap; - double max_primal_ray_infeasibility; - double max_dual_ray_infeasibility; - double primal_ray_linear_objective; - double dual_ray_objective; - termination_reason_t termination_reason; - double feasibility_polishing_time; - int feasibility_iteration; - } cupdlpx_result_t; - - // matrix formats - typedef enum - { - matrix_dense = 0, - matrix_csr = 1, - matrix_csc = 2, - matrix_coo = 3 - } matrix_format_t; - - // matrix descriptor - typedef struct - { - int m; // num_constraints - int n; // num_variables - matrix_format_t fmt; - - union MatrixData - { - struct MatrixDense - { // Dense (row-major) - const double *A; // m*n - } dense; - - struct MatrixCSR - { // CSR - int nnz; - const int *row_ptr; - const int *col_ind; - const double *vals; - } csr; - - struct MatrixCSC - { // CSC - int nnz; - const int *col_ptr; - const int *row_ind; - const double *vals; - } csc; - - struct MatrixCOO - { // COO - int nnz; - const int *row_ind; - const int *col_ind; - const double *vals; - } coo; - } data; - } matrix_desc_t; + typedef struct + { + int num_variables; + int num_constraints; + double *variable_lower_bound; + double *variable_upper_bound; + double *objective_vector; + double objective_constant; + + int *constraint_matrix_row_pointers; + int *constraint_matrix_col_indices; + double *constraint_matrix_values; + int constraint_matrix_num_nonzeros; + + double *constraint_lower_bound; + double *constraint_upper_bound; + + double *primal_start; + double *dual_start; + } lp_problem_t; + + typedef struct + { + double artificial_restart_threshold; + double sufficient_reduction_for_restart; + double necessary_reduction_for_restart; + double k_p; + double k_i; + double k_d; + double i_smooth; + } restart_parameters_t; + + typedef struct + { + double eps_optimal_relative; + double eps_feasible_relative; + double eps_feas_polish_relative; + double time_sec_limit; + int iteration_limit; + } termination_criteria_t; + + typedef struct + { + int l_inf_ruiz_iterations; + bool has_pock_chambolle_alpha; + double pock_chambolle_alpha; + bool bound_objective_rescaling; + bool verbose; + int termination_evaluation_frequency; + int sv_max_iter; + double sv_tol; + termination_criteria_t termination_criteria; + restart_parameters_t restart_params; + double reflection_coefficient; + bool feasibility_polishing; + norm_type_t optimality_norm; + bool presolve; + double matrix_zero_tol; + } pdhg_parameters_t; + + typedef struct + { + int num_variables; + int num_constraints; + int num_nonzeros; + + int num_reduced_variables; + int num_reduced_constraints; + int num_reduced_nonzeros; + + double *primal_solution; + double *dual_solution; + double *reduced_cost; + + int total_count; + double rescaling_time_sec; + double cumulative_time_sec; + double presolve_time; + int presolve_status; + // PresolveStats presolve_stats; + + double absolute_primal_residual; + double relative_primal_residual; + double absolute_dual_residual; + double relative_dual_residual; + double primal_objective_value; + double dual_objective_value; + double objective_gap; + double relative_objective_gap; + double max_primal_ray_infeasibility; + double max_dual_ray_infeasibility; + double primal_ray_linear_objective; + double dual_ray_objective; + termination_reason_t termination_reason; + double feasibility_polishing_time; + int feasibility_iteration; + } cupdlpx_result_t; + + // matrix formats + typedef enum + { + matrix_dense = 0, + matrix_csr = 1, + matrix_csc = 2, + matrix_coo = 3 + } matrix_format_t; + + // matrix descriptor + typedef struct + { + int m; // num_constraints + int n; // num_variables + matrix_format_t fmt; + + union MatrixData + { + struct MatrixDense + { // Dense (row-major) + const double *A; // m*n + } dense; + + struct MatrixCSR + { // CSR + int nnz; + const int *row_ptr; + const int *col_ind; + const double *vals; + } csr; + + struct MatrixCSC + { // CSC + int nnz; + const int *col_ptr; + const int *row_ind; + const double *vals; + } csc; + + struct MatrixCOO + { // COO + int nnz; + const int *row_ind; + const int *col_ind; + const double *vals; + } coo; + } data; + } matrix_desc_t; #ifdef __cplusplus } // extern "C" -#endif \ No newline at end of file +#endif diff --git a/include/mps_parser.h b/include/mps_parser.h index 84cbeef..a1eed4f 100644 --- a/include/mps_parser.h +++ b/include/mps_parser.h @@ -18,4 +18,4 @@ limitations under the License. #include "cupdlpx_types.h" -lp_problem_t *read_mps_file(const char *filename); \ No newline at end of file +lp_problem_t *read_mps_file(const char *filename); diff --git a/internal/internal_types.h b/internal/internal_types.h index 9eec9f9..9894c93 100644 --- a/internal/internal_types.h +++ b/internal/internal_types.h @@ -23,124 +23,124 @@ limitations under the License. typedef struct { - int num_rows; - int num_cols; - int num_nonzeros; - int *row_ptr; - int *col_ind; - int *row_ind; - double *val; - int *transpose_map; + int num_rows; + int num_cols; + int num_nonzeros; + int *row_ptr; + int *col_ind; + int *row_ind; + double *val; + int *transpose_map; } cu_sparse_matrix_csr_t; typedef struct { - int num_variables; - int num_constraints; - double *variable_lower_bound; - double *variable_upper_bound; - double *objective_vector; - double objective_constant; - cu_sparse_matrix_csr_t *constraint_matrix; - cu_sparse_matrix_csr_t *constraint_matrix_t; - double *constraint_lower_bound; - double *constraint_upper_bound; - int num_blocks_primal; - int num_blocks_dual; - int num_blocks_primal_dual; - int num_blocks_nnz; - double objective_vector_norm; - double constraint_bound_norm; - double *constraint_lower_bound_finite_val; - double *constraint_upper_bound_finite_val; - double *variable_lower_bound_finite_val; - double *variable_upper_bound_finite_val; - - double *initial_primal_solution; - double *current_primal_solution; - double *pdhg_primal_solution; - double *reflected_primal_solution; - double *dual_product; - double *initial_dual_solution; - double *current_dual_solution; - double *pdhg_dual_solution; - double *reflected_dual_solution; - double *primal_product; - double step_size; - double *d_primal_step_size; - double *d_dual_step_size; - double primal_weight; - int total_count; - bool is_this_major_iteration; - double primal_weight_error_sum; - double primal_weight_last_error; - double best_primal_weight; - double best_primal_dual_residual_gap; - - double *constraint_rescaling; - double *variable_rescaling; - double constraint_bound_rescaling; - double objective_vector_rescaling; - double *primal_slack; - double *dual_slack; - double rescaling_time_sec; - clock_t start_time; - double cumulative_time_sec; - - double *primal_residual; - double absolute_primal_residual; - double relative_primal_residual; - double *dual_residual; - double absolute_dual_residual; - double relative_dual_residual; - double primal_objective_value; - double dual_objective_value; - double objective_gap; - double relative_objective_gap; - double max_primal_ray_infeasibility; - double max_dual_ray_infeasibility; - double primal_ray_linear_objective; - double dual_ray_objective; - termination_reason_t termination_reason; - - double *delta_primal_solution; - double *delta_dual_solution; - double fixed_point_error; - double initial_fixed_point_error; - double last_trial_fixed_point_error; - int inner_count; - int *d_inner_count; - - cusparseHandle_t sparse_handle; - cublasHandle_t blas_handle; - size_t spmv_buffer_size; - size_t primal_spmv_buffer_size; - size_t dual_spmv_buffer_size; - void *primal_spmv_buffer; - void *dual_spmv_buffer; - void *spmv_buffer; - - cusparseSpMatDescr_t matA; - cusparseSpMatDescr_t matAt; - cusparseDnVecDescr_t vec_primal_sol; - cusparseDnVecDescr_t vec_dual_sol; - cusparseDnVecDescr_t vec_primal_prod; - cusparseDnVecDescr_t vec_dual_prod; - - double *ones_primal_d; - double *ones_dual_d; - - double feasibility_polishing_time; - int feasibility_iteration; + int num_variables; + int num_constraints; + double *variable_lower_bound; + double *variable_upper_bound; + double *objective_vector; + double objective_constant; + cu_sparse_matrix_csr_t *constraint_matrix; + cu_sparse_matrix_csr_t *constraint_matrix_t; + double *constraint_lower_bound; + double *constraint_upper_bound; + int num_blocks_primal; + int num_blocks_dual; + int num_blocks_primal_dual; + int num_blocks_nnz; + double objective_vector_norm; + double constraint_bound_norm; + double *constraint_lower_bound_finite_val; + double *constraint_upper_bound_finite_val; + double *variable_lower_bound_finite_val; + double *variable_upper_bound_finite_val; + + double *initial_primal_solution; + double *current_primal_solution; + double *pdhg_primal_solution; + double *reflected_primal_solution; + double *dual_product; + double *initial_dual_solution; + double *current_dual_solution; + double *pdhg_dual_solution; + double *reflected_dual_solution; + double *primal_product; + double step_size; + double *d_primal_step_size; + double *d_dual_step_size; + double primal_weight; + int total_count; + bool is_this_major_iteration; + double primal_weight_error_sum; + double primal_weight_last_error; + double best_primal_weight; + double best_primal_dual_residual_gap; + + double *constraint_rescaling; + double *variable_rescaling; + double constraint_bound_rescaling; + double objective_vector_rescaling; + double *primal_slack; + double *dual_slack; + double rescaling_time_sec; + clock_t start_time; + double cumulative_time_sec; + + double *primal_residual; + double absolute_primal_residual; + double relative_primal_residual; + double *dual_residual; + double absolute_dual_residual; + double relative_dual_residual; + double primal_objective_value; + double dual_objective_value; + double objective_gap; + double relative_objective_gap; + double max_primal_ray_infeasibility; + double max_dual_ray_infeasibility; + double primal_ray_linear_objective; + double dual_ray_objective; + termination_reason_t termination_reason; + + double *delta_primal_solution; + double *delta_dual_solution; + double fixed_point_error; + double initial_fixed_point_error; + double last_trial_fixed_point_error; + int inner_count; + int *d_inner_count; + + cusparseHandle_t sparse_handle; + cublasHandle_t blas_handle; + size_t spmv_buffer_size; + size_t primal_spmv_buffer_size; + size_t dual_spmv_buffer_size; + void *primal_spmv_buffer; + void *dual_spmv_buffer; + void *spmv_buffer; + + cusparseSpMatDescr_t matA; + cusparseSpMatDescr_t matAt; + cusparseDnVecDescr_t vec_primal_sol; + cusparseDnVecDescr_t vec_dual_sol; + cusparseDnVecDescr_t vec_primal_prod; + cusparseDnVecDescr_t vec_dual_prod; + + double *ones_primal_d; + double *ones_dual_d; + + double feasibility_polishing_time; + int feasibility_iteration; cudaStream_t stream; } pdhg_solver_state_t; typedef struct { - double *con_rescale; - double *var_rescale; - double con_bound_rescale; - double obj_vec_rescale; - double rescaling_time_sec; + double *con_rescale; + double *var_rescale; + double con_bound_rescale; + double obj_vec_rescale; + double rescaling_time_sec; } rescale_info_t; diff --git a/internal/preconditioner.h b/internal/preconditioner.h index 71a5c2b..e19bc6c 100644 --- a/internal/preconditioner.h +++ b/internal/preconditioner.h @@ -23,9 +23,7 @@ extern "C" { #endif - rescale_info_t *rescale_problem( - const pdhg_parameters_t *params, - pdhg_solver_state_t *state); + rescale_info_t *rescale_problem(const pdhg_parameters_t *params, pdhg_solver_state_t *state); #ifdef __cplusplus } diff --git a/internal/presolve.h b/internal/presolve.h index a852cef..8937668 100644 --- a/internal/presolve.h +++ b/internal/presolve.h @@ -22,7 +22,8 @@ extern "C" cupdlpx_presolve_info_t *pslp_presolve(const lp_problem_t *original_prob, const pdhg_parameters_t *params); - cupdlpx_result_t *create_result_from_presolve(const cupdlpx_presolve_info_t *info, const lp_problem_t *original_prob); + cupdlpx_result_t *create_result_from_presolve(const cupdlpx_presolve_info_t *info, + const lp_problem_t *original_prob); const char *get_presolve_status_str(enum PresolveStatus_ status); @@ -36,4 +37,4 @@ extern "C" } #endif -#endif // PRESOLVE_H \ No newline at end of file +#endif // PRESOLVE_H diff --git a/internal/solver.h b/internal/solver.h index f7b6f49..b07aba8 100644 --- a/internal/solver.h +++ b/internal/solver.h @@ -23,9 +23,7 @@ extern "C" { #endif - cupdlpx_result_t *optimize( - const pdhg_parameters_t *params, - lp_problem_t *original_problem); + cupdlpx_result_t *optimize(const pdhg_parameters_t *params, lp_problem_t *original_problem); #ifdef __cplusplus } diff --git a/internal/utils.h b/internal/utils.h index 186eeeb..ce8d674 100644 --- a/internal/utils.h +++ b/internal/utils.h @@ -28,40 +28,37 @@ extern "C" { #endif -#define CUDA_CHECK(call) \ - do \ - { \ - cudaError_t err = call; \ - if (err != cudaSuccess) \ - { \ - fprintf(stderr, "CUDA Error at %s:%d: %s\n", __FILE__, \ - __LINE__, cudaGetErrorName(err)); \ - exit(EXIT_FAILURE); \ - } \ +#define CUDA_CHECK(call) \ + do \ + { \ + cudaError_t err = call; \ + if (err != cudaSuccess) \ + { \ + fprintf(stderr, "CUDA Error at %s:%d: %s\n", __FILE__, __LINE__, cudaGetErrorName(err)); \ + exit(EXIT_FAILURE); \ + } \ } while (0) -#define CUBLAS_CHECK(call) \ - do \ - { \ - cublasStatus_t status = call; \ - if (status != CUBLAS_STATUS_SUCCESS) \ - { \ - fprintf(stderr, "cuBLAS Error at %s:%d: %s\n", __FILE__, \ - __LINE__, cublasGetStatusName(status)); \ - exit(EXIT_FAILURE); \ - } \ +#define CUBLAS_CHECK(call) \ + do \ + { \ + cublasStatus_t status = call; \ + if (status != CUBLAS_STATUS_SUCCESS) \ + { \ + fprintf(stderr, "cuBLAS Error at %s:%d: %s\n", __FILE__, __LINE__, cublasGetStatusName(status)); \ + exit(EXIT_FAILURE); \ + } \ } while (0) -#define CUSPARSE_CHECK(call) \ - do \ - { \ - cusparseStatus_t status = call; \ - if (status != CUSPARSE_STATUS_SUCCESS) \ - { \ - fprintf(stderr, "cuSPARSE Error at %s:%d: %s\n", __FILE__, \ - __LINE__, cusparseGetErrorName(status)); \ - exit(EXIT_FAILURE); \ - } \ +#define CUSPARSE_CHECK(call) \ + do \ + { \ + cusparseStatus_t status = call; \ + if (status != CUSPARSE_STATUS_SUCCESS) \ + { \ + fprintf(stderr, "cuSPARSE Error at %s:%d: %s\n", __FILE__, __LINE__, cusparseGetErrorName(status)); \ + exit(EXIT_FAILURE); \ + } \ } while (0) #define THREADS_PER_BLOCK 256 @@ -75,27 +72,20 @@ extern "C" void *safe_realloc(void *ptr, size_t new_size); - double estimate_maximum_singular_value( - cusparseHandle_t sparse_handle, - cublasHandle_t blas_handle, - const cu_sparse_matrix_csr_t *A, - const cu_sparse_matrix_csr_t *AT, - int max_iterations, - double tolerance); + double estimate_maximum_singular_value(cusparseHandle_t sparse_handle, + cublasHandle_t blas_handle, + const cu_sparse_matrix_csr_t *A, + const cu_sparse_matrix_csr_t *AT, + int max_iterations, + double tolerance); - void compute_interaction_and_movement( - pdhg_solver_state_t *solver_state, - double *interaction, - double *movement); + void compute_interaction_and_movement(pdhg_solver_state_t *solver_state, double *interaction, double *movement); - bool should_do_adaptive_restart( - pdhg_solver_state_t *solver_state, - const restart_parameters_t *restart_params, - int termination_evaluation_frequency); + bool should_do_adaptive_restart(pdhg_solver_state_t *solver_state, + const restart_parameters_t *restart_params, + int termination_evaluation_frequency); - void check_termination_criteria( - pdhg_solver_state_t *solver_state, - const termination_criteria_t *criteria); + void check_termination_criteria(pdhg_solver_state_t *solver_state, const termination_criteria_t *criteria); void print_initial_info(const pdhg_parameters_t *params, lp_problem_t *problem); @@ -113,30 +103,32 @@ extern "C" void fill_or_copy(double **dest, int n, const double *src, double fill_value); - int dense_to_csr(const matrix_desc_t *desc, - int **row_ptr, int **col_ind, double **vals, int *nnz_out); + int dense_to_csr(const matrix_desc_t *desc, int **row_ptr, int **col_ind, double **vals, int *nnz_out); - int csc_to_csr(const matrix_desc_t *desc, - int **row_ptr, int **col_ind, double **vals); + int csc_to_csr(const matrix_desc_t *desc, int **row_ptr, int **col_ind, double **vals); - int coo_to_csr(const matrix_desc_t *desc, - int **row_ptr, int **col_ind, double **vals); + int coo_to_csr(const matrix_desc_t *desc, int **row_ptr, int **col_ind, double **vals); - void check_feas_polishing_termination_criteria( - pdhg_solver_state_t *solver_state, - const pdhg_solver_state_t *ori_solver_state, - const termination_criteria_t *criteria, - bool is_primal_polish); + void check_feas_polishing_termination_criteria(pdhg_solver_state_t *solver_state, + const pdhg_solver_state_t *ori_solver_state, + const termination_criteria_t *criteria, + bool is_primal_polish); void print_initial_feas_polish_info(bool is_primal_polish, const pdhg_parameters_t *params); void display_feas_polish_iteration_stats(const pdhg_solver_state_t *state, bool verbose, bool is_primal_polish); - void pdhg_feas_polish_final_log(const pdhg_solver_state_t *primal_state, const pdhg_solver_state_t *dual_state, bool verbose); + void pdhg_feas_polish_final_log(const pdhg_solver_state_t *primal_state, + const pdhg_solver_state_t *dual_state, + bool verbose); - void compute_primal_feas_polish_residual(pdhg_solver_state_t *state, const pdhg_solver_state_t *ori_state, norm_type_t optimality_norm); + void compute_primal_feas_polish_residual(pdhg_solver_state_t *state, + const pdhg_solver_state_t *ori_state, + norm_type_t optimality_norm); - void compute_dual_feas_polish_residual(pdhg_solver_state_t *state, const pdhg_solver_state_t *ori_state, norm_type_t optimality_norm); + void compute_dual_feas_polish_residual(pdhg_solver_state_t *state, + const pdhg_solver_state_t *ori_state, + norm_type_t optimality_norm); void set_default_parameters(pdhg_parameters_t *params); diff --git a/python_bindings/_core_bindings.cpp b/python_bindings/_core_bindings.cpp index c990fe2..0329d96 100644 --- a/python_bindings/_core_bindings.cpp +++ b/python_bindings/_core_bindings.cpp @@ -89,8 +89,8 @@ static const double *get_arr_ptr_f64_or_null(py::object obj, const char *name, M } // get int32 pointer to contiguous numpy array -static const int32_t *get_index_ptr_i32(py::object obj, const char *name, - MatrixKeepalive &keep, std::vector &tmp_vec) +static const int32_t * +get_index_ptr_i32(py::object obj, const char *name, MatrixKeepalive &keep, std::vector &tmp_vec) { // nullptr if obj is None if (!obj || obj.is_none()) @@ -126,8 +126,9 @@ static const int32_t *get_index_ptr_i32(py::object obj, const char *name, int64_t v = p[i]; if (v < 0 || v > I32_MAX) { - throw std::overflow_error(std::string(name) + " has value out of int32 range; " - "backend currently supports only 32-bit indices."); + throw std::overflow_error(std::string(name) + + " has value out of int32 range; " + "backend currently supports only 32-bit indices."); } tmp_vec[static_cast(i)] = static_cast(v); } @@ -138,16 +139,21 @@ static const int32_t *get_index_ptr_i32(py::object obj, const char *name, } // helper function to convert norm string to enum -static norm_type_t parse_norm_string(const std::string& s) +static norm_type_t parse_norm_string(const std::string &s) { std::string lower = s; std::transform(lower.begin(), lower.end(), lower.begin(), ::tolower); - - if (lower == "l2") { + + if (lower == "l2") + { return NORM_TYPE_L2; - } else if (lower == "linf") { + } + else if (lower == "linf") + { return NORM_TYPE_L_INF; - } else { + } + else + { throw std::invalid_argument("Unknown norm type: " + s + ". Use 'l2' or 'linf'."); } } @@ -170,7 +176,8 @@ static void ensure_len_or_null(py::object obj, const char *name, int expect_len) // check length if ((int)arr.size() != expect_len) { - throw std::invalid_argument(std::string(name) + " length mismatch: expect " + std::to_string(expect_len) + ", got " + std::to_string((int)arr.size())); + throw std::invalid_argument(std::string(name) + " length mismatch: expect " + std::to_string(expect_len) + + ", got " + std::to_string((int)arr.size())); } } @@ -179,24 +186,24 @@ static const char *status_to_str(termination_reason_t r) { switch (r) { - case TERMINATION_REASON_OPTIMAL: - return "OPTIMAL"; - case TERMINATION_REASON_PRIMAL_INFEASIBLE: - return "PRIMAL_INFEASIBLE"; - case TERMINATION_REASON_DUAL_INFEASIBLE: - return "DUAL_INFEASIBLE"; - case TERMINATION_REASON_INFEASIBLE_OR_UNBOUNDED: - return "INFEASIBLE_OR_UNBOUNDED"; - case TERMINATION_REASON_TIME_LIMIT: - return "TIME_LIMIT"; - case TERMINATION_REASON_ITERATION_LIMIT: - return "ITERATION_LIMIT"; - case TERMINATION_REASON_FEAS_POLISH_SUCCESS: - return "FEAS_POLISH_SUCCESS"; - case TERMINATION_REASON_UNSPECIFIED: - return "UNSPECIFIED"; - default: - return "UNKNOWN"; + case TERMINATION_REASON_OPTIMAL: + return "OPTIMAL"; + case TERMINATION_REASON_PRIMAL_INFEASIBLE: + return "PRIMAL_INFEASIBLE"; + case TERMINATION_REASON_DUAL_INFEASIBLE: + return "DUAL_INFEASIBLE"; + case TERMINATION_REASON_INFEASIBLE_OR_UNBOUNDED: + return "INFEASIBLE_OR_UNBOUNDED"; + case TERMINATION_REASON_TIME_LIMIT: + return "TIME_LIMIT"; + case TERMINATION_REASON_ITERATION_LIMIT: + return "ITERATION_LIMIT"; + case TERMINATION_REASON_FEAS_POLISH_SUCCESS: + return "FEAS_POLISH_SUCCESS"; + case TERMINATION_REASON_UNSPECIFIED: + return "UNSPECIFIED"; + default: + return "UNKNOWN"; } } @@ -205,21 +212,21 @@ static int status_to_code(termination_reason_t r) { switch (r) { - case TERMINATION_REASON_OPTIMAL: - return 0; - case TERMINATION_REASON_PRIMAL_INFEASIBLE: - return 1; - case TERMINATION_REASON_DUAL_INFEASIBLE: - return 2; - case TERMINATION_REASON_TIME_LIMIT: - return 3; - case TERMINATION_REASON_ITERATION_LIMIT: - return 4; - case TERMINATION_REASON_INFEASIBLE_OR_UNBOUNDED: - return 5; - case TERMINATION_REASON_UNSPECIFIED: - default: - return -1; + case TERMINATION_REASON_OPTIMAL: + return 0; + case TERMINATION_REASON_PRIMAL_INFEASIBLE: + return 1; + case TERMINATION_REASON_DUAL_INFEASIBLE: + return 2; + case TERMINATION_REASON_TIME_LIMIT: + return 3; + case TERMINATION_REASON_ITERATION_LIMIT: + return 4; + case TERMINATION_REASON_INFEASIBLE_OR_UNBOUNDED: + return 5; + case TERMINATION_REASON_UNSPECIFIED: + default: + return -1; } } @@ -283,20 +290,32 @@ static void parse_params_from_python(py::object params_obj, pdhg_parameters_t *p py::dict d = params_obj.cast(); auto getf = [&](const char *k, double &tgt) - { if (d.contains(k)) tgt = py::cast(d[k]); }; + { + if (d.contains(k)) + tgt = py::cast(d[k]); + }; auto geti = [&](const char *k, int &tgt) - { if (d.contains(k)) tgt = py::cast(d[k]); }; + { + if (d.contains(k)) + tgt = py::cast(d[k]); + }; auto getb = [&](const char *k, bool &tgt) - { if (d.contains(k)) tgt = py::cast(d[k]); }; + { + if (d.contains(k)) + tgt = py::cast(d[k]); + }; auto get_norm = [&](const char *k, norm_type_t &tgt) - { - if (d.contains(k)) { + { + if (d.contains(k)) + { py::object val = d[k]; - if (py::isinstance(val)) { + if (py::isinstance(val)) + { std::string sval = py::cast(val); tgt = parse_norm_string(sval); } - else { + else + { throw std::invalid_argument("optimality_norm must be a string ('l2'/'linf')"); } } @@ -436,17 +455,16 @@ static PyMatrixView get_matrix_from_python(py::object A) } // solve function -static py::dict solve_once( - py::object A, - py::object objective_vector, // c - py::object objective_constant, // c0 (optional → 0) - py::object variable_lower_bound, // lb (optional → 0) - py::object variable_upper_bound, // ub (optional → inf) - py::object constraint_lower_bound, // l (optional → -inf) - py::object constraint_upper_bound, // u (optional → inf) - py::object params = py::none(), // PDHG parameters (optional → default) - py::object primal_start = py::none(), // warm start primal solution (optional) - py::object dual_start = py::none() // warm start dual solution (optional) +static py::dict solve_once(py::object A, + py::object objective_vector, // c + py::object objective_constant, // c0 (optional → 0) + py::object variable_lower_bound, // lb (optional → 0) + py::object variable_upper_bound, // ub (optional → inf) + py::object constraint_lower_bound, // l (optional → -inf) + py::object constraint_upper_bound, // u (optional → inf) + py::object params = py::none(), // PDHG parameters (optional → default) + py::object primal_start = py::none(), // warm start primal solution (optional) + py::object dual_start = py::none() // warm start dual solution (optional) ) { // parse matrix @@ -564,10 +582,10 @@ PYBIND11_MODULE(_cupdlpx_core, m) { m.doc() = "cupdlpx core bindings (auto-detect dense/CSR/CSC/COO; initialize default params here)"; - m.def("get_default_params", &get_default_params_py, - "Return default PDHG parameters as a dict"); + m.def("get_default_params", &get_default_params_py, "Return default PDHG parameters as a dict"); - m.def("solve_once", &solve_once, + m.def("solve_once", + &solve_once, py::arg("A"), py::arg("objective_vector"), py::arg("objective_constant") = py::none(), @@ -578,4 +596,4 @@ PYBIND11_MODULE(_cupdlpx_core, m) py::arg("params") = py::none(), py::arg("primal_start") = py::none(), py::arg("dual_start") = py::none()); -} \ No newline at end of file +} diff --git a/src/cli.c b/src/cli.c index e8b7058..5bfa4a6 100644 --- a/src/cli.c +++ b/src/cli.c @@ -27,11 +27,9 @@ limitations under the License. #include #include -char *get_output_path(const char *output_dir, const char *instance_name, - const char *suffix) +char *get_output_path(const char *output_dir, const char *instance_name, const char *suffix) { - size_t path_len = - strlen(output_dir) + strlen(instance_name) + strlen(suffix) + 2; + size_t path_len = strlen(output_dir) + strlen(instance_name) + strlen(suffix) + 2; char *full_path = safe_malloc(path_len * sizeof(char)); snprintf(full_path, path_len, "%s/%s%s", output_dir, instance_name, suffix); return full_path; @@ -58,8 +56,7 @@ char *extract_instance_name(const char *filename) return instance_name; } -void save_solution(const double *data, int size, const char *output_dir, - const char *instance_name, const char *suffix) +void save_solution(const double *data, int size, const char *output_dir, const char *instance_name, const char *suffix) { char *file_path = get_output_path(output_dir, instance_name, suffix); if (file_path == NULL || data == NULL) @@ -84,8 +81,7 @@ void save_solution(const double *data, int size, const char *output_dir, free(file_path); } -void save_solver_summary(const cupdlpx_result_t *result, const char *output_dir, - const char *instance_name) +void save_solver_summary(const cupdlpx_result_t *result, const char *output_dir, const char *instance_name) { char *file_path = get_output_path(output_dir, instance_name, "_summary.txt"); if (file_path == NULL) @@ -100,8 +96,7 @@ void save_solver_summary(const cupdlpx_result_t *result, const char *output_dir, free(file_path); return; } - fprintf(outfile, "Termination Reason: %s\n", - termination_reason_to_string(result->termination_reason)); + fprintf(outfile, "Termination Reason: %s\n", termination_reason_to_string(result->termination_reason)); if (result->presolve_time > 0.0) { fprintf(outfile, "Presolve Status: %s\n", get_presolve_status_str(result->presolve_status)); @@ -116,7 +111,7 @@ void save_solver_summary(const cupdlpx_result_t *result, const char *output_dir, // fprintf(outfile, "NNZ Removed Primal Propagation: %d\n", result->presolve_stats.nnz_removed_primal_propagation); // fprintf(outfile, "NNZ Removed Parallel Rows: %d\n", result->presolve_stats.nnz_removed_parallel_rows); // fprintf(outfile, "NNZ Removed Parallel Cols: %d\n", result->presolve_stats.nnz_removed_parallel_cols); - + // fprintf(outfile, "Presolve Time Init (sec): %e\n", result->presolve_stats.time_init); // fprintf(outfile, "Presolve Time Run (sec): %e\n", result->presolve_stats.time_presolve); // fprintf(outfile, "Presolve Time Fast (sec): %e\n", result->presolve_stats.time_fast_reductions); @@ -130,16 +125,12 @@ void save_solver_summary(const cupdlpx_result_t *result, const char *output_dir, fprintf(outfile, "Precondition time (sec): %e\n", result->rescaling_time_sec); fprintf(outfile, "Runtime (sec): %e\n", result->cumulative_time_sec); fprintf(outfile, "Iterations Count: %d\n", result->total_count); - fprintf(outfile, "Primal Objective Value: %e\n", - result->primal_objective_value); + fprintf(outfile, "Primal Objective Value: %e\n", result->primal_objective_value); fprintf(outfile, "Dual Objective Value: %e\n", result->dual_objective_value); - fprintf(outfile, "Relative Primal Residual: %e\n", - result->relative_primal_residual); - fprintf(outfile, "Relative Dual Residual: %e\n", - result->relative_dual_residual); + fprintf(outfile, "Relative Primal Residual: %e\n", result->relative_primal_residual); + fprintf(outfile, "Relative Dual Residual: %e\n", result->relative_dual_residual); fprintf(outfile, "Absolute Objective Gap: %e\n", result->objective_gap); - fprintf(outfile, "Relative Objective Gap: %e\n", - result->relative_objective_gap); + fprintf(outfile, "Relative Objective Gap: %e\n", result->relative_objective_gap); fprintf(outfile, "Rows: %d\n", result->num_constraints); fprintf(outfile, "Columns: %d\n", result->num_variables); fprintf(outfile, "Nonzeros: %d\n", result->num_nonzeros); @@ -157,55 +148,67 @@ void print_usage(const char *prog_name) fprintf(stderr, "Usage: %s [OPTIONS] \n\n", prog_name); fprintf(stderr, "Arguments:\n"); - fprintf(stderr, " Path to the input problem in MPS " - "format (.mps or .mps.gz).\n"); - fprintf(stderr, " Directory where output files " - "will be saved. It will contain:\n"); - fprintf(stderr, " - _summary.txt\n"); fprintf(stderr, - " - _primal_solution.txt\n"); + " Path to the input problem in MPS " + "format (.mps or .mps.gz).\n"); fprintf(stderr, - " - _dual_solution.txt\n\n"); + " Directory where output files " + "will be saved. It will contain:\n"); + fprintf(stderr, " - _summary.txt\n"); + fprintf(stderr, " - _primal_solution.txt\n"); + fprintf(stderr, " - _dual_solution.txt\n\n"); fprintf(stderr, "Options:\n"); + fprintf(stderr, " -h, --help Display this help message.\n"); + fprintf(stderr, + " -v, --verbose " + "Enable verbose logging (default: false).\n"); + fprintf(stderr, + " --time_limit " + "Time limit in seconds (default: 3600.0).\n"); + fprintf(stderr, " --iter_limit Iteration limit (default: %d).\n", INT32_MAX); + fprintf(stderr, + " --eps_opt " + "Relative optimality tolerance (default: 1e-4).\n"); + fprintf(stderr, + " --eps_feas " + "Relative feasibility tolerance (default: 1e-4).\n"); + fprintf(stderr, + " --l_inf_ruiz_iter " + "Iterations for L-inf Ruiz rescaling (default: 10).\n"); + fprintf(stderr, + " --no_pock_chambolle " + "Disable Pock-Chambolle rescaling (default: enabled).\n"); + fprintf(stderr, + " --pock_chambolle_alpha " + "Value for Pock-Chambolle alpha (default: 1.0).\n"); fprintf(stderr, - " -h, --help Display this help message.\n"); - fprintf(stderr, " -v, --verbose " - "Enable verbose logging (default: false).\n"); - fprintf(stderr, " --time_limit " - "Time limit in seconds (default: 3600.0).\n"); - fprintf( - stderr, - " --iter_limit Iteration limit (default: %d).\n", - INT32_MAX); - fprintf(stderr, " --eps_opt " - "Relative optimality tolerance (default: 1e-4).\n"); - fprintf(stderr, " --eps_feas " - "Relative feasibility tolerance (default: 1e-4).\n"); - fprintf(stderr, " --l_inf_ruiz_iter " - "Iterations for L-inf Ruiz rescaling (default: 10).\n"); - fprintf(stderr, " --no_pock_chambolle " - "Disable Pock-Chambolle rescaling (default: enabled).\n"); - fprintf(stderr, " --pock_chambolle_alpha " - "Value for Pock-Chambolle alpha (default: 1.0).\n"); - fprintf(stderr, " --no_bound_obj_rescaling " - "Disable bound objective rescaling (default: enabled).\n"); - fprintf(stderr, " --eval_freq " - "Termination evaluation frequency (default: 200).\n"); - fprintf(stderr, " --sv_max_iter " - "Max iterations for singular value estimation (default: 5000).\n"); - fprintf(stderr, " --sv_tol " - "Tolerance for singular value estimation (default: 1e-4).\n"); - fprintf(stderr, " -f --feasibility_polishing " - "Enable feasibility use feasibility polishing (default: false).\n"); - fprintf(stderr, " --eps_feas_polish Relative feasibility " - "polish tolerance (default: 1e-6).\n"); - fprintf(stderr, " --opt_norm " - "Norm for optimality criteria: l2 or linf (default: l2).\n"); - fprintf(stderr, " --no_presolve " - "Disable presolve (default: enabled).\n"); - fprintf(stderr, " --matrix_zero_tol . " - "Zero tolerance in constraint matrix.\n"); + " --no_bound_obj_rescaling " + "Disable bound objective rescaling (default: enabled).\n"); + fprintf(stderr, + " --eval_freq " + "Termination evaluation frequency (default: 200).\n"); + fprintf(stderr, + " --sv_max_iter " + "Max iterations for singular value estimation (default: 5000).\n"); + fprintf(stderr, + " --sv_tol " + "Tolerance for singular value estimation (default: 1e-4).\n"); + fprintf(stderr, + " -f --feasibility_polishing " + "Enable feasibility use feasibility polishing (default: false).\n"); + fprintf(stderr, + " --eps_feas_polish Relative feasibility " + "polish tolerance (default: 1e-6).\n"); + fprintf(stderr, + " --opt_norm " + "Norm for optimality criteria: l2 or linf (default: l2).\n"); + fprintf(stderr, + " --no_presolve " + "Disable presolve (default: enabled).\n"); + fprintf(stderr, + " --matrix_zero_tol . " + "Zero tolerance in constraint matrix.\n"); } int main(int argc, char *argv[]) @@ -213,106 +216,108 @@ int main(int argc, char *argv[]) pdhg_parameters_t params; set_default_parameters(¶ms); - static struct option long_options[] = { - {"help", no_argument, 0, 'h'}, - {"verbose", no_argument, 0, 'v'}, - {"time_limit", required_argument, 0, 1001}, - {"iter_limit", required_argument, 0, 1002}, - {"eps_opt", required_argument, 0, 1003}, - {"eps_feas", required_argument, 0, 1004}, - {"eps_feas_polish", required_argument, 0, 1006}, - {"feasibility_polishing", no_argument, 0, 'f'}, - {"l_inf_ruiz_iter", required_argument, 0, 1007}, - {"pock_chambolle_alpha", required_argument, 0, 1008}, - {"no_pock_chambolle", no_argument, 0, 1009}, - {"no_bound_obj_rescaling", no_argument, 0, 1010}, - {"sv_max_iter", required_argument, 0, 1011}, - {"sv_tol", required_argument, 0, 1012}, - {"eval_freq", required_argument, 0, 1013}, - {"opt_norm", required_argument, 0, 1014}, - {"no_presolve", no_argument, 0, 1015}, - {"matrix_zero_tol", required_argument, 0, 1016}, - {0, 0, 0, 0}}; + static struct option long_options[] = {{"help", no_argument, 0, 'h'}, + {"verbose", no_argument, 0, 'v'}, + {"time_limit", required_argument, 0, 1001}, + {"iter_limit", required_argument, 0, 1002}, + {"eps_opt", required_argument, 0, 1003}, + {"eps_feas", required_argument, 0, 1004}, + {"eps_feas_polish", required_argument, 0, 1006}, + {"feasibility_polishing", no_argument, 0, 'f'}, + {"l_inf_ruiz_iter", required_argument, 0, 1007}, + {"pock_chambolle_alpha", required_argument, 0, 1008}, + {"no_pock_chambolle", no_argument, 0, 1009}, + {"no_bound_obj_rescaling", no_argument, 0, 1010}, + {"sv_max_iter", required_argument, 0, 1011}, + {"sv_tol", required_argument, 0, 1012}, + {"eval_freq", required_argument, 0, 1013}, + {"opt_norm", required_argument, 0, 1014}, + {"no_presolve", no_argument, 0, 1015}, + {"matrix_zero_tol", required_argument, 0, 1016}, + {0, 0, 0, 0}}; int opt; while ((opt = getopt_long(argc, argv, "hvfp", long_options, NULL)) != -1) { switch (opt) { - case 'h': - print_usage(argv[0]); - return 0; - case 'v': - params.verbose = true; - break; - case 1001: // --time_limit - params.termination_criteria.time_sec_limit = atof(optarg); - break; - case 1002: // --iter_limit - params.termination_criteria.iteration_limit = atoi(optarg); - break; - case 1003: // --eps_optimal - params.termination_criteria.eps_optimal_relative = atof(optarg); - break; - case 1004: // --eps_feas - params.termination_criteria.eps_feasible_relative = atof(optarg); - break; - case 1006: // --eps_feas_polish_relative - params.termination_criteria.eps_feas_polish_relative = atof(optarg); - break; - case 'f': // --feasibility_polishing - params.feasibility_polishing = true; - break; - case 1007: // --l_inf_ruiz_iter - params.l_inf_ruiz_iterations = atoi(optarg); - break; - case 1008: // --pock_chambolle_alpha - params.pock_chambolle_alpha = atof(optarg); - break; - case 1009: // --no_pock_chambolle - params.has_pock_chambolle_alpha = false; - break; - case 1010: // --no_bound_obj_rescaling - params.bound_objective_rescaling = false; - break; - case 1011: // --sv_max_iter - params.sv_max_iter = atoi(optarg); - break; - case 1012: // --sv_tol - params.sv_tol = atof(optarg); - break; - case 1013: // --eval_freq - params.termination_evaluation_frequency = atoi(optarg); - break; - case 1014: // --opt_norm + case 'h': + print_usage(argv[0]); + return 0; + case 'v': + params.verbose = true; + break; + case 1001: // --time_limit + params.termination_criteria.time_sec_limit = atof(optarg); + break; + case 1002: // --iter_limit + params.termination_criteria.iteration_limit = atoi(optarg); + break; + case 1003: // --eps_optimal + params.termination_criteria.eps_optimal_relative = atof(optarg); + break; + case 1004: // --eps_feas + params.termination_criteria.eps_feasible_relative = atof(optarg); + break; + case 1006: // --eps_feas_polish_relative + params.termination_criteria.eps_feas_polish_relative = atof(optarg); + break; + case 'f': // --feasibility_polishing + params.feasibility_polishing = true; + break; + case 1007: // --l_inf_ruiz_iter + params.l_inf_ruiz_iterations = atoi(optarg); + break; + case 1008: // --pock_chambolle_alpha + params.pock_chambolle_alpha = atof(optarg); + break; + case 1009: // --no_pock_chambolle + params.has_pock_chambolle_alpha = false; + break; + case 1010: // --no_bound_obj_rescaling + params.bound_objective_rescaling = false; + break; + case 1011: // --sv_max_iter + params.sv_max_iter = atoi(optarg); + break; + case 1012: // --sv_tol + params.sv_tol = atof(optarg); + break; + case 1013: // --eval_freq + params.termination_evaluation_frequency = atoi(optarg); + break; + case 1014: // --opt_norm { const char *norm_str = optarg; - if (strcmp(norm_str, "l2") == 0) { + if (strcmp(norm_str, "l2") == 0) + { params.optimality_norm = NORM_TYPE_L2; - } else if (strcmp(norm_str, "linf") == 0) { + } + else if (strcmp(norm_str, "linf") == 0) + { params.optimality_norm = NORM_TYPE_L_INF; - } else { + } + else + { fprintf(stderr, "Error: opt_norm must be 'l2' or 'linf'\n"); return 1; } } break; - case 1015: // --no_presolve - params.presolve = false; - break; - case 1016: // --matrix_zero_tol - params.matrix_zero_tol = atof(optarg); - break; - case '?': // Unknown option - return 1; + case 1015: // --no_presolve + params.presolve = false; + break; + case 1016: // --matrix_zero_tol + params.matrix_zero_tol = atof(optarg); + break; + case '?': // Unknown option + return 1; } } if (argc - optind != 2) { - fprintf( - stderr, - "Error: You must specify an input file and an output directory.\n\n"); + fprintf(stderr, "Error: You must specify an input file and an output directory.\n\n"); print_usage(argv[0]); return 1; } @@ -344,10 +349,9 @@ int main(int argc, char *argv[]) else { save_solver_summary(result, output_dir, instance_name); - save_solution(result->primal_solution, problem->num_variables, output_dir, - instance_name, "_primal_solution.txt"); - save_solution(result->dual_solution, problem->num_constraints, output_dir, - instance_name, "_dual_solution.txt"); + save_solution( + result->primal_solution, problem->num_variables, output_dir, instance_name, "_primal_solution.txt"); + save_solution(result->dual_solution, problem->num_constraints, output_dir, instance_name, "_dual_solution.txt"); cupdlpx_result_free(result); } @@ -355,4 +359,4 @@ int main(int argc, char *argv[]) free(instance_name); return 0; -} \ No newline at end of file +} diff --git a/src/cupdlpx.c b/src/cupdlpx.c index f6b6c9c..035e520 100644 --- a/src/cupdlpx.c +++ b/src/cupdlpx.c @@ -25,8 +25,10 @@ limitations under the License. // create an lp_problem_t from a matrix lp_problem_t *create_lp_problem(const double *objective_c, const matrix_desc_t *A_desc, - const double *con_lb, const double *con_ub, - const double *var_lb, const double *var_ub, + const double *con_lb, + const double *con_ub, + const double *var_lb, + const double *var_ub, const double *objective_constant) { lp_problem_t *prob = (lp_problem_t *)safe_malloc(sizeof(lp_problem_t)); @@ -39,82 +41,75 @@ lp_problem_t *create_lp_problem(const double *objective_c, // handle matrix by format switch (A_desc->fmt) { - case matrix_dense: - dense_to_csr(A_desc, &prob->constraint_matrix_row_pointers, - &prob->constraint_matrix_col_indices, - &prob->constraint_matrix_values, - &prob->constraint_matrix_num_nonzeros); - break; - - case matrix_csc: - { - int *row_ptr = NULL, *col_ind = NULL; - double *vals = NULL; - if (csc_to_csr(A_desc, &row_ptr, &col_ind, &vals) != 0) + case matrix_dense: + dense_to_csr(A_desc, + &prob->constraint_matrix_row_pointers, + &prob->constraint_matrix_col_indices, + &prob->constraint_matrix_values, + &prob->constraint_matrix_num_nonzeros); + break; + + case matrix_csc: { - fprintf(stderr, "[interface] CSC->CSR failed.\n"); - free(prob); - return NULL; + int *row_ptr = NULL, *col_ind = NULL; + double *vals = NULL; + if (csc_to_csr(A_desc, &row_ptr, &col_ind, &vals) != 0) + { + fprintf(stderr, "[interface] CSC->CSR failed.\n"); + free(prob); + return NULL; + } + prob->constraint_matrix_num_nonzeros = A_desc->data.csc.nnz; + prob->constraint_matrix_row_pointers = row_ptr; + prob->constraint_matrix_col_indices = col_ind; + prob->constraint_matrix_values = vals; + break; } - prob->constraint_matrix_num_nonzeros = A_desc->data.csc.nnz; - prob->constraint_matrix_row_pointers = row_ptr; - prob->constraint_matrix_col_indices = col_ind; - prob->constraint_matrix_values = vals; - break; - } - case matrix_coo: - { - int *row_ptr = NULL, *col_ind = NULL; - double *vals = NULL; - if (coo_to_csr(A_desc, &row_ptr, &col_ind, &vals) != 0) + case matrix_coo: { - fprintf(stderr, "[interface] COO->CSR failed.\n"); - free(prob); - return NULL; + int *row_ptr = NULL, *col_ind = NULL; + double *vals = NULL; + if (coo_to_csr(A_desc, &row_ptr, &col_ind, &vals) != 0) + { + fprintf(stderr, "[interface] COO->CSR failed.\n"); + free(prob); + return NULL; + } + prob->constraint_matrix_num_nonzeros = A_desc->data.coo.nnz; + prob->constraint_matrix_row_pointers = row_ptr; + prob->constraint_matrix_col_indices = col_ind; + prob->constraint_matrix_values = vals; + break; } - prob->constraint_matrix_num_nonzeros = A_desc->data.coo.nnz; - prob->constraint_matrix_row_pointers = row_ptr; - prob->constraint_matrix_col_indices = col_ind; - prob->constraint_matrix_values = vals; - break; - } - case matrix_csr: - prob->constraint_matrix_num_nonzeros = A_desc->data.csr.nnz; - prob->constraint_matrix_row_pointers = - (int *)safe_malloc((size_t)(A_desc->m + 1) * sizeof(int)); - prob->constraint_matrix_col_indices = - (int *)safe_malloc((size_t)A_desc->data.csr.nnz * sizeof(int)); - prob->constraint_matrix_values = - (double *)safe_malloc((size_t)A_desc->data.csr.nnz * sizeof(double)); - memcpy(prob->constraint_matrix_row_pointers, A_desc->data.csr.row_ptr, - (size_t)(A_desc->m + 1) * sizeof(int)); - memcpy(prob->constraint_matrix_col_indices, A_desc->data.csr.col_ind, - (size_t)A_desc->data.csr.nnz * sizeof(int)); - memcpy(prob->constraint_matrix_values, A_desc->data.csr.vals, - (size_t)A_desc->data.csr.nnz * sizeof(double)); - break; - - default: - fprintf( - stderr, - "[interface] make_problem_from_matrix: unsupported matrix format %d.\n", - A_desc->fmt); - free(prob); - return NULL; + case matrix_csr: + prob->constraint_matrix_num_nonzeros = A_desc->data.csr.nnz; + prob->constraint_matrix_row_pointers = (int *)safe_malloc((size_t)(A_desc->m + 1) * sizeof(int)); + prob->constraint_matrix_col_indices = (int *)safe_malloc((size_t)A_desc->data.csr.nnz * sizeof(int)); + prob->constraint_matrix_values = (double *)safe_malloc((size_t)A_desc->data.csr.nnz * sizeof(double)); + memcpy( + prob->constraint_matrix_row_pointers, A_desc->data.csr.row_ptr, (size_t)(A_desc->m + 1) * sizeof(int)); + memcpy(prob->constraint_matrix_col_indices, + A_desc->data.csr.col_ind, + (size_t)A_desc->data.csr.nnz * sizeof(int)); + memcpy( + prob->constraint_matrix_values, A_desc->data.csr.vals, (size_t)A_desc->data.csr.nnz * sizeof(double)); + break; + + default: + fprintf(stderr, "[interface] make_problem_from_matrix: unsupported matrix format %d.\n", A_desc->fmt); + free(prob); + return NULL; } // default fill values prob->objective_constant = objective_constant ? *objective_constant : 0.0; fill_or_copy(&prob->objective_vector, prob->num_variables, objective_c, 0.0); fill_or_copy(&prob->variable_lower_bound, prob->num_variables, var_lb, -INFINITY); - fill_or_copy(&prob->variable_upper_bound, prob->num_variables, var_ub, - INFINITY); - fill_or_copy(&prob->constraint_lower_bound, prob->num_constraints, con_lb, - -INFINITY); - fill_or_copy(&prob->constraint_upper_bound, prob->num_constraints, con_ub, - INFINITY); + fill_or_copy(&prob->variable_upper_bound, prob->num_variables, var_ub, INFINITY); + fill_or_copy(&prob->constraint_lower_bound, prob->num_constraints, con_lb, -INFINITY); + fill_or_copy(&prob->constraint_upper_bound, prob->num_constraints, con_ub, INFINITY); return prob; } @@ -149,8 +144,7 @@ void lp_problem_free(lp_problem_t *prob) free(prob); } -void set_start_values(lp_problem_t *prob, const double *primal, - const double *dual) +void set_start_values(lp_problem_t *prob, const double *primal, const double *dual) { if (!prob) return; @@ -182,8 +176,7 @@ void set_start_values(lp_problem_t *prob, const double *primal, } } -cupdlpx_result_t *solve_lp_problem(lp_problem_t *prob, - const pdhg_parameters_t *params) +cupdlpx_result_t *solve_lp_problem(lp_problem_t *prob, const pdhg_parameters_t *params) { // argument checks if (!prob) diff --git a/src/mps_parser.c b/src/mps_parser.c index 756d741..c37687e 100644 --- a/src/mps_parser.c +++ b/src/mps_parser.c @@ -75,8 +75,7 @@ static void namemap_resize(NameMap *map) { NameNode *next = current->next; - unsigned long h = - hash_string(current->name) % (unsigned long)new_num_buckets; + unsigned long h = hash_string(current->name) % (unsigned long)new_num_buckets; current->next = map->buckets[h]; map->buckets[h] = current; @@ -174,8 +173,7 @@ static FastLineReader *fast_reader_open(const char *filename) reader->buffer = safe_malloc(READER_BUFFER_SIZE); - if (strlen(filename) > 3 && - strcmp(filename + strlen(filename) - 3, ".gz") == 0) + if (strlen(filename) > 3 && strcmp(filename + strlen(filename) - 3, ".gz") == 0) { reader->is_gz = true; reader->handle.gz_f = gzopen(filename, "rb"); @@ -222,8 +220,7 @@ static void fast_reader_close(FastLineReader *reader) free(reader); } -static char *fast_reader_gets(FastLineReader *reader, char *line_buf, - int line_buf_size) +static char *fast_reader_gets(FastLineReader *reader, char *line_buf, int line_buf_size) { int len = 0; @@ -234,8 +231,7 @@ static char *fast_reader_gets(FastLineReader *reader, char *line_buf, { if (reader->is_gz) { - int bytes_read = - gzread(reader->handle.gz_f, reader->buffer, READER_BUFFER_SIZE); + int bytes_read = gzread(reader->handle.gz_f, reader->buffer, READER_BUFFER_SIZE); if (bytes_read <= 0) { @@ -245,8 +241,7 @@ static char *fast_reader_gets(FastLineReader *reader, char *line_buf, } else { - size_t bytes_read = - fread(reader->buffer, 1, READER_BUFFER_SIZE, reader->handle.f); + size_t bytes_read = fread(reader->buffer, 1, READER_BUFFER_SIZE, reader->handle.f); if (bytes_read <= 0) { return (len > 0) ? line_buf : NULL; @@ -256,8 +251,7 @@ static char *fast_reader_gets(FastLineReader *reader, char *line_buf, reader->current_pos = reader->buffer; } - char *newline_pos = (char *)memchr(reader->current_pos, '\n', - reader->end_pos - reader->current_pos); + char *newline_pos = (char *)memchr(reader->current_pos, '\n', reader->end_pos - reader->current_pos); int bytes_to_copy; bool line_complete = (newline_pos != NULL); @@ -342,12 +336,9 @@ static int add_coo_entry(CooMatrix *coo, int row, int col, double value) if (coo->nnz >= coo->capacity) { size_t new_capacity = (coo->capacity == 0) ? 1024 : coo->capacity * 2; - coo->row_indices = - (int *)safe_realloc(coo->row_indices, new_capacity * sizeof(int)); - coo->col_indices = - (int *)safe_realloc(coo->col_indices, new_capacity * sizeof(int)); - coo->values = - (double *)safe_realloc(coo->values, new_capacity * sizeof(double)); + coo->row_indices = (int *)safe_realloc(coo->row_indices, new_capacity * sizeof(int)); + coo->col_indices = (int *)safe_realloc(coo->col_indices, new_capacity * sizeof(int)); + coo->values = (double *)safe_realloc(coo->values, new_capacity * sizeof(double)); coo->capacity = new_capacity; } coo->row_indices[coo->nnz] = row; @@ -371,12 +362,9 @@ static bool ensure_column_capacity(MpsParserState *state) return false; } - state->objective_coeffs = - (double *)safe_realloc(state->objective_coeffs, new_cap * sizeof(double)); - state->var_lower_bounds = - (double *)safe_realloc(state->var_lower_bounds, new_cap * sizeof(double)); - state->var_upper_bounds = - (double *)safe_realloc(state->var_upper_bounds, new_cap * sizeof(double)); + state->objective_coeffs = (double *)safe_realloc(state->objective_coeffs, new_cap * sizeof(double)); + state->var_lower_bounds = (double *)safe_realloc(state->var_lower_bounds, new_cap * sizeof(double)); + state->var_upper_bounds = (double *)safe_realloc(state->var_upper_bounds, new_cap * sizeof(double)); for (size_t i = state->col_capacity; i < new_cap; ++i) { @@ -391,18 +379,12 @@ static bool ensure_column_capacity(MpsParserState *state) static void free_parser_state(MpsParserState *state); static int finalize_rows(MpsParserState *state); -static int parse_rows_section(MpsParserState *state, char **tokens, - int n_tokens); -static int parse_columns_section(MpsParserState *state, char **tokens, - int n_tokens); -static int parse_rhs_section(MpsParserState *state, char **tokens, - int n_tokens); -static int parse_ranges_section(MpsParserState *state, char **tokens, - int n_tokens); -static int parse_bounds_section(MpsParserState *state, char **tokens, - int n_tokens); -static int mps_coo_to_csr(lp_problem_t *prob, CooMatrix *coo, - size_t num_constraints); +static int parse_rows_section(MpsParserState *state, char **tokens, int n_tokens); +static int parse_columns_section(MpsParserState *state, char **tokens, int n_tokens); +static int parse_rhs_section(MpsParserState *state, char **tokens, int n_tokens); +static int parse_ranges_section(MpsParserState *state, char **tokens, int n_tokens); +static int parse_bounds_section(MpsParserState *state, char **tokens, int n_tokens); +static int mps_coo_to_csr(lp_problem_t *prob, CooMatrix *coo, size_t num_constraints); typedef enum { @@ -472,8 +454,7 @@ lp_problem_t *read_mps_file(const char *filename) next_section = SEC_ENDATA; } - if (current_section == SEC_ROWS && next_section != SEC_ROWS && - !rows_finalized) + if (current_section == SEC_ROWS && next_section != SEC_ROWS && !rows_finalized) { if (finalize_rows(&state) != 0) state.error_flag = 1; @@ -488,36 +469,35 @@ lp_problem_t *read_mps_file(const char *filename) switch (current_section) { - case SEC_OBJSENSE: - if (n_tokens > 0 && (strcmp(tokens[0], "MAX") == 0 || - strcmp(tokens[0], "MAXIMIZE") == 0)) - { - state.is_maximize = true; - } - break; - case SEC_ROWS: - if (parse_rows_section(&state, tokens, n_tokens) != 0) - state.error_flag = 1; - break; - case SEC_COLUMNS: - if (parse_columns_section(&state, tokens, n_tokens) != 0) - state.error_flag = 1; - break; - case SEC_RHS: - if (parse_rhs_section(&state, tokens, n_tokens) != 0) - state.error_flag = 1; - break; - case SEC_RANGES: - if (parse_ranges_section(&state, tokens, n_tokens) != 0) - state.error_flag = 1; - break; - case SEC_BOUNDS: - if (parse_bounds_section(&state, tokens, n_tokens) != 0) - state.error_flag = 1; - break; - default: + case SEC_OBJSENSE: + if (n_tokens > 0 && (strcmp(tokens[0], "MAX") == 0 || strcmp(tokens[0], "MAXIMIZE") == 0)) + { + state.is_maximize = true; + } + break; + case SEC_ROWS: + if (parse_rows_section(&state, tokens, n_tokens) != 0) + state.error_flag = 1; + break; + case SEC_COLUMNS: + if (parse_columns_section(&state, tokens, n_tokens) != 0) + state.error_flag = 1; + break; + case SEC_RHS: + if (parse_rhs_section(&state, tokens, n_tokens) != 0) + state.error_flag = 1; + break; + case SEC_RANGES: + if (parse_ranges_section(&state, tokens, n_tokens) != 0) + state.error_flag = 1; + break; + case SEC_BOUNDS: + if (parse_bounds_section(&state, tokens, n_tokens) != 0) + state.error_flag = 1; + break; + default: - break; + break; } } @@ -535,8 +515,7 @@ lp_problem_t *read_mps_file(const char *filename) prob->num_variables = state.col_map.size; prob->num_constraints = state.row_map.size; prob->constraint_matrix_num_nonzeros = state.coo_matrix.nnz; - prob->objective_constant = - state.is_maximize ? -state.objective_constant : state.objective_constant; + prob->objective_constant = state.is_maximize ? -state.objective_constant : state.objective_constant; prob->objective_vector = state.objective_coeffs; prob->variable_lower_bound = state.var_lower_bounds; @@ -573,20 +552,16 @@ lp_problem_t *read_mps_file(const char *filename) return prob; } -static int parse_rows_section(MpsParserState *state, char **tokens, - int n_tokens) +static int parse_rows_section(MpsParserState *state, char **tokens, int n_tokens) { if (n_tokens < 2) return 0; if (state->num_buffered_rows >= state->buffered_rows_capacity) { - state->buffered_rows_capacity = (state->buffered_rows_capacity == 0) - ? 64 - : state->buffered_rows_capacity * 2; - state->buffered_rows = (BufferedRow *)safe_realloc( - state->buffered_rows, - state->buffered_rows_capacity * sizeof(BufferedRow)); + state->buffered_rows_capacity = (state->buffered_rows_capacity == 0) ? 64 : state->buffered_rows_capacity * 2; + state->buffered_rows = + (BufferedRow *)safe_realloc(state->buffered_rows, state->buffered_rows_capacity * sizeof(BufferedRow)); } BufferedRow *new_row = &state->buffered_rows[state->num_buffered_rows]; @@ -635,11 +610,9 @@ static int finalize_rows(MpsParserState *state) size_t current_size = state->row_map.size; if (current_size >= state->constraint_capacity) { - state->constraint_capacity = (state->constraint_capacity == 0) - ? 64 - : state->constraint_capacity * 2; - state->constraint_types = (char *)safe_realloc( - state->constraint_types, state->constraint_capacity * sizeof(char)); + state->constraint_capacity = (state->constraint_capacity == 0) ? 64 : state->constraint_capacity * 2; + state->constraint_types = + (char *)safe_realloc(state->constraint_types, state->constraint_capacity * sizeof(char)); } namemap_put(&state->row_map, state->buffered_rows[i].name); state->constraint_types[current_size] = type; @@ -648,10 +621,8 @@ static int finalize_rows(MpsParserState *state) size_t num_constraints = state->row_map.size; if (num_constraints > 0) { - state->constraint_lower_bounds = - safe_malloc(num_constraints * sizeof(double)); - state->constraint_upper_bounds = - safe_malloc(num_constraints * sizeof(double)); + state->constraint_lower_bounds = safe_malloc(num_constraints * sizeof(double)); + state->constraint_upper_bounds = safe_malloc(num_constraints * sizeof(double)); for (size_t i = 0; i < num_constraints; ++i) { @@ -676,8 +647,7 @@ static int finalize_rows(MpsParserState *state) return 0; } -static int parse_columns_section(MpsParserState *state, char **tokens, - int n_tokens) +static int parse_columns_section(MpsParserState *state, char **tokens, int n_tokens) { if (n_tokens < 2) return 0; @@ -704,8 +674,7 @@ static int parse_columns_section(MpsParserState *state, char **tokens, { if (!state->current_col_name) { - fprintf(stderr, - "ERROR: Column data found before any column name was defined.\n"); + fprintf(stderr, "ERROR: Column data found before any column name was defined.\n"); return -1; } col_name = state->current_col_name; @@ -724,8 +693,7 @@ static int parse_columns_section(MpsParserState *state, char **tokens, const char *row_name = tokens[i]; double value = atof(tokens[i + 1]); - if (state->objective_row_name && - strcmp(row_name, state->objective_row_name) == 0) + if (state->objective_row_name && strcmp(row_name, state->objective_row_name) == 0) { state->objective_coeffs[col_idx] += value; } @@ -744,8 +712,7 @@ static int parse_columns_section(MpsParserState *state, char **tokens, return 0; } -static int parse_rhs_section(MpsParserState *state, char **tokens, - int n_tokens) +static int parse_rhs_section(MpsParserState *state, char **tokens, int n_tokens) { for (int i = 1; i + 1 < n_tokens; i += 2) @@ -753,8 +720,7 @@ static int parse_rhs_section(MpsParserState *state, char **tokens, const char *row_name = tokens[i]; double value = atof(tokens[i + 1]); - if (state->objective_row_name && - strcmp(row_name, state->objective_row_name) == 0) + if (state->objective_row_name && strcmp(row_name, state->objective_row_name) == 0) { state->objective_constant = -value; } @@ -779,8 +745,7 @@ static int parse_rhs_section(MpsParserState *state, char **tokens, return 0; } -static int parse_ranges_section(MpsParserState *state, char **tokens, - int n_tokens) +static int parse_ranges_section(MpsParserState *state, char **tokens, int n_tokens) { for (int i = 1; i + 1 < n_tokens; i += 2) @@ -792,8 +757,8 @@ static int parse_ranges_section(MpsParserState *state, char **tokens, if (row_idx != -1) { char type = state->constraint_types[row_idx]; - double rhs = (type == 'L') ? state->constraint_upper_bounds[row_idx] - : state->constraint_lower_bounds[row_idx]; + double rhs = + (type == 'L') ? state->constraint_upper_bounds[row_idx] : state->constraint_lower_bounds[row_idx]; if (type == 'G') { @@ -819,8 +784,7 @@ static int parse_ranges_section(MpsParserState *state, char **tokens, return 0; } -static int parse_bounds_section(MpsParserState *state, char **tokens, - int n_tokens) +static int parse_bounds_section(MpsParserState *state, char **tokens, int n_tokens) { if (n_tokens < 3) return 0; @@ -868,12 +832,10 @@ static int parse_bounds_section(MpsParserState *state, char **tokens, return 0; } -static int mps_coo_to_csr(lp_problem_t *prob, CooMatrix *coo, - size_t num_constraints) +static int mps_coo_to_csr(lp_problem_t *prob, CooMatrix *coo, size_t num_constraints) { - prob->constraint_matrix_row_pointers = - safe_calloc(num_constraints + 1, sizeof(int)); + prob->constraint_matrix_row_pointers = safe_calloc(num_constraints + 1, sizeof(int)); prob->constraint_matrix_col_indices = safe_malloc(coo->nnz * sizeof(int)); prob->constraint_matrix_values = safe_malloc(coo->nnz * sizeof(double)); @@ -884,13 +846,11 @@ static int mps_coo_to_csr(lp_problem_t *prob, CooMatrix *coo, for (size_t i = 1; i <= num_constraints; ++i) { - prob->constraint_matrix_row_pointers[i] += - prob->constraint_matrix_row_pointers[i - 1]; + prob->constraint_matrix_row_pointers[i] += prob->constraint_matrix_row_pointers[i - 1]; } int *row_pos = safe_malloc((num_constraints + 1) * sizeof(int)); - memcpy(row_pos, prob->constraint_matrix_row_pointers, - (num_constraints + 1) * sizeof(int)); + memcpy(row_pos, prob->constraint_matrix_row_pointers, (num_constraints + 1) * sizeof(int)); for (size_t i = 0; i < coo->nnz; ++i) { @@ -936,4 +896,4 @@ static void free_parser_state(MpsParserState *state) free(state->constraint_upper_bounds); free(state->objective_row_name); free(state->current_col_name); -} \ No newline at end of file +} diff --git a/src/preconditioner.cu b/src/preconditioner.cu index 11fc23b..8883c8c 100644 --- a/src/preconditioner.cu +++ b/src/preconditioner.cu @@ -16,12 +16,12 @@ limitations under the License. #include "preconditioner.h" #include "utils.h" -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include #define SCALING_EPSILON 1e-12 @@ -59,80 +59,86 @@ __global__ void clamp_sqrt_and_accum_kernel(double *__restrict__ scaling_factors double *__restrict__ inverse_scaling_factors, double *__restrict__ cumulative_rescaling, int num_variables); -__global__ void compute_bound_contrib_kernel( - const double *__restrict__ constraint_lower_bound, - const double *__restrict__ constraint_upper_bound, - int num_constraints, - double *__restrict__ contrib); -__global__ void scale_bounds_kernel( - double *__restrict__ constraint_lower_bound, - double *__restrict__ constraint_upper_bound, - double *__restrict__ initial_dual_solution, - int num_constraints, - double constraint_scale, - double objective_scale); -__global__ void scale_objective_kernel( - double *__restrict__ objective_vector, - double *__restrict__ variable_lower_bound, - double *__restrict__ variable_upper_bound, - double *__restrict__ initial_primal_solution, - int num_variables, - double constraint_scale, - double objective_scale); +__global__ void compute_bound_contrib_kernel(const double *__restrict__ constraint_lower_bound, + const double *__restrict__ constraint_upper_bound, + int num_constraints, + double *__restrict__ contrib); +__global__ void scale_bounds_kernel(double *__restrict__ constraint_lower_bound, + double *__restrict__ constraint_upper_bound, + double *__restrict__ initial_dual_solution, + int num_constraints, + double constraint_scale, + double objective_scale); +__global__ void scale_objective_kernel(double *__restrict__ objective_vector, + double *__restrict__ variable_lower_bound, + double *__restrict__ variable_upper_bound, + double *__restrict__ initial_primal_solution, + int num_variables, + double constraint_scale, + double objective_scale); __global__ void fill_ones_kernel(double *__restrict__ x, int num_variables); -static void scale_problem(pdhg_solver_state_t *state, double *constraint_rescaling, double *variable_rescaling, double *inverse_constraint_rescaling, double *inverse_variable_rescaling); -static void ruiz_rescaling(pdhg_solver_state_t *state, int num_iters, rescale_info_t *rescale_info, - double *constraint_rescaling, double *variable_rescaling, double *inverse_constraint_rescaling, double *inverse_variable_rescaling); -static void pock_chambolle_rescaling(pdhg_solver_state_t *state, const double alpha, rescale_info_t *rescale_info, - double *constraint_rescaling, double *variable_rescaling, double *inverse_constraint_rescaling, double *inverse_variable_rescaling); +static void scale_problem(pdhg_solver_state_t *state, + double *constraint_rescaling, + double *variable_rescaling, + double *inverse_constraint_rescaling, + double *inverse_variable_rescaling); +static void ruiz_rescaling(pdhg_solver_state_t *state, + int num_iters, + rescale_info_t *rescale_info, + double *constraint_rescaling, + double *variable_rescaling, + double *inverse_constraint_rescaling, + double *inverse_variable_rescaling); +static void pock_chambolle_rescaling(pdhg_solver_state_t *state, + const double alpha, + rescale_info_t *rescale_info, + double *constraint_rescaling, + double *variable_rescaling, + double *inverse_constraint_rescaling, + double *inverse_variable_rescaling); static void bound_objective_rescaling(pdhg_solver_state_t *state, rescale_info_t *rescale_info); -static void scale_problem( - pdhg_solver_state_t *state, - double *constraint_rescaling, - double *variable_rescaling, - double *inverse_constraint_rescaling, - double *inverse_variable_rescaling) +static void scale_problem(pdhg_solver_state_t *state, + double *constraint_rescaling, + double *variable_rescaling, + double *inverse_constraint_rescaling, + double *inverse_variable_rescaling) { int num_variables = state->num_variables; int num_constraints = state->num_constraints; - scale_variables_kernel<<num_blocks_primal, THREADS_PER_BLOCK>>>( - state->objective_vector, - state->variable_lower_bound, - state->variable_upper_bound, - state->initial_primal_solution, - variable_rescaling, - inverse_variable_rescaling, - num_variables); - - scale_constraints_kernel<<num_blocks_dual, THREADS_PER_BLOCK>>>( - state->constraint_lower_bound, - state->constraint_upper_bound, - state->initial_dual_solution, - constraint_rescaling, - inverse_constraint_rescaling, - num_constraints); - - scale_csr_nnz_kernel<<num_blocks_nnz, THREADS_PER_BLOCK>>>( - state->constraint_matrix->row_ind, - state->constraint_matrix->col_ind, - state->constraint_matrix->val, - state->constraint_matrix_t->val, - state->constraint_matrix->transpose_map, - inverse_variable_rescaling, - inverse_constraint_rescaling, - state->constraint_matrix->num_nonzeros); + scale_variables_kernel<<num_blocks_primal, THREADS_PER_BLOCK>>>(state->objective_vector, + state->variable_lower_bound, + state->variable_upper_bound, + state->initial_primal_solution, + variable_rescaling, + inverse_variable_rescaling, + num_variables); + + scale_constraints_kernel<<num_blocks_dual, THREADS_PER_BLOCK>>>(state->constraint_lower_bound, + state->constraint_upper_bound, + state->initial_dual_solution, + constraint_rescaling, + inverse_constraint_rescaling, + num_constraints); + + scale_csr_nnz_kernel<<num_blocks_nnz, THREADS_PER_BLOCK>>>(state->constraint_matrix->row_ind, + state->constraint_matrix->col_ind, + state->constraint_matrix->val, + state->constraint_matrix_t->val, + state->constraint_matrix->transpose_map, + inverse_variable_rescaling, + inverse_constraint_rescaling, + state->constraint_matrix->num_nonzeros); } -static void ruiz_rescaling( - pdhg_solver_state_t *state, - int num_iterations, - rescale_info_t *rescale_info, - double *constraint_rescaling, - double *variable_rescaling, - double *inverse_constraint_rescaling, - double *inverse_variable_rescaling) +static void ruiz_rescaling(pdhg_solver_state_t *state, + int num_iterations, + rescale_info_t *rescale_info, + double *constraint_rescaling, + double *variable_rescaling, + double *inverse_constraint_rescaling, + double *inverse_variable_rescaling) { const int num_constraints = state->num_constraints; const int num_variables = state->num_variables; @@ -140,73 +146,49 @@ static void ruiz_rescaling( for (int iter = 0; iter < num_iterations; ++iter) { compute_csr_row_absmax_kernel<<num_blocks_dual, THREADS_PER_BLOCK>>>( - state->constraint_matrix->row_ptr, - state->constraint_matrix->val, - num_constraints, - constraint_rescaling); + state->constraint_matrix->row_ptr, state->constraint_matrix->val, num_constraints, constraint_rescaling); clamp_sqrt_and_accum_kernel<<num_blocks_dual, THREADS_PER_BLOCK>>>( - constraint_rescaling, - inverse_constraint_rescaling, - rescale_info->con_rescale, - num_constraints); + constraint_rescaling, inverse_constraint_rescaling, rescale_info->con_rescale, num_constraints); compute_csr_row_absmax_kernel<<num_blocks_primal, THREADS_PER_BLOCK>>>( - state->constraint_matrix_t->row_ptr, - state->constraint_matrix_t->val, - num_variables, - variable_rescaling); + state->constraint_matrix_t->row_ptr, state->constraint_matrix_t->val, num_variables, variable_rescaling); clamp_sqrt_and_accum_kernel<<num_blocks_primal, THREADS_PER_BLOCK>>>( - variable_rescaling, - inverse_variable_rescaling, - rescale_info->var_rescale, - num_variables); + variable_rescaling, inverse_variable_rescaling, rescale_info->var_rescale, num_variables); - scale_problem(state, constraint_rescaling, variable_rescaling, inverse_constraint_rescaling, inverse_variable_rescaling); + scale_problem( + state, constraint_rescaling, variable_rescaling, inverse_constraint_rescaling, inverse_variable_rescaling); } } -static void pock_chambolle_rescaling( - pdhg_solver_state_t *state, - const double alpha, - rescale_info_t *rescale_info, - double *constraint_rescaling, - double *variable_rescaling, - double *inverse_constraint_rescaling, - double *inverse_variable_rescaling) +static void pock_chambolle_rescaling(pdhg_solver_state_t *state, + const double alpha, + rescale_info_t *rescale_info, + double *constraint_rescaling, + double *variable_rescaling, + double *inverse_constraint_rescaling, + double *inverse_variable_rescaling) { const int num_constraints = state->num_constraints; const int num_variables = state->num_variables; compute_csr_row_powsum_kernel<<num_blocks_dual, THREADS_PER_BLOCK>>>( - state->constraint_matrix->row_ptr, - state->constraint_matrix->val, - num_constraints, - alpha, - constraint_rescaling); + state->constraint_matrix->row_ptr, state->constraint_matrix->val, num_constraints, alpha, constraint_rescaling); clamp_sqrt_and_accum_kernel<<num_blocks_dual, THREADS_PER_BLOCK>>>( - constraint_rescaling, - inverse_constraint_rescaling, - rescale_info->con_rescale, - num_constraints); - - compute_csr_row_powsum_kernel<<num_blocks_primal, THREADS_PER_BLOCK>>>( - state->constraint_matrix_t->row_ptr, - state->constraint_matrix_t->val, - num_variables, - 2.0 - alpha, - variable_rescaling); + constraint_rescaling, inverse_constraint_rescaling, rescale_info->con_rescale, num_constraints); + + compute_csr_row_powsum_kernel<<num_blocks_primal, THREADS_PER_BLOCK>>>(state->constraint_matrix_t->row_ptr, + state->constraint_matrix_t->val, + num_variables, + 2.0 - alpha, + variable_rescaling); clamp_sqrt_and_accum_kernel<<num_blocks_primal, THREADS_PER_BLOCK>>>( - variable_rescaling, - inverse_variable_rescaling, - rescale_info->var_rescale, - num_variables); + variable_rescaling, inverse_variable_rescaling, rescale_info->var_rescale, num_variables); - scale_problem(state, constraint_rescaling, variable_rescaling, inverse_constraint_rescaling, inverse_variable_rescaling); + scale_problem( + state, constraint_rescaling, variable_rescaling, inverse_constraint_rescaling, inverse_variable_rescaling); } -static void bound_objective_rescaling( - pdhg_solver_state_t *state, - rescale_info_t *rescale_info) +static void bound_objective_rescaling(pdhg_solver_state_t *state, rescale_info_t *rescale_info) { const int num_constraints = state->num_constraints; const int num_variables = state->num_variables; @@ -214,10 +196,7 @@ static void bound_objective_rescaling( double *contrib_d = nullptr; CUDA_CHECK(cudaMalloc(&contrib_d, num_constraints * sizeof(double))); compute_bound_contrib_kernel<<num_blocks_dual, THREADS_PER_BLOCK>>>( - state->constraint_lower_bound, - state->constraint_upper_bound, - num_constraints, - contrib_d); + state->constraint_lower_bound, state->constraint_upper_bound, num_constraints, contrib_d); double *bnd_norm_sq_d = nullptr; CUDA_CHECK(cudaMalloc(&bnd_norm_sq_d, sizeof(double))); @@ -235,40 +214,34 @@ static void bound_objective_rescaling( double bnd_norm = sqrt(bnd_norm_sq_h); double obj_norm = 0.0; - CUBLAS_CHECK(cublasDnrm2(state->blas_handle, - state->num_variables, - state->objective_vector, 1, - &obj_norm)); + CUBLAS_CHECK(cublasDnrm2(state->blas_handle, state->num_variables, state->objective_vector, 1, &obj_norm)); double constraint_scale = 1.0 / (bnd_norm + 1.0); double objective_scale = 1.0 / (obj_norm + 1.0); - scale_bounds_kernel<<num_blocks_dual, THREADS_PER_BLOCK>>>( - state->constraint_lower_bound, - state->constraint_upper_bound, - state->initial_dual_solution, - num_constraints, - constraint_scale, - objective_scale); - - scale_objective_kernel<<num_blocks_primal, THREADS_PER_BLOCK>>>( - state->objective_vector, - state->variable_lower_bound, - state->variable_upper_bound, - state->initial_primal_solution, - num_variables, - constraint_scale, - objective_scale); + scale_bounds_kernel<<num_blocks_dual, THREADS_PER_BLOCK>>>(state->constraint_lower_bound, + state->constraint_upper_bound, + state->initial_dual_solution, + num_constraints, + constraint_scale, + objective_scale); + + scale_objective_kernel<<num_blocks_primal, THREADS_PER_BLOCK>>>(state->objective_vector, + state->variable_lower_bound, + state->variable_upper_bound, + state->initial_primal_solution, + num_variables, + constraint_scale, + objective_scale); rescale_info->con_bound_rescale = constraint_scale; rescale_info->obj_vec_rescale = objective_scale; } -rescale_info_t *rescale_problem( - const pdhg_parameters_t *params, - pdhg_solver_state_t *state) +rescale_info_t *rescale_problem(const pdhg_parameters_t *params, pdhg_solver_state_t *state) { - if (params->verbose) { + if (params->verbose) + { printf("\nPreconditioning\n"); } @@ -279,12 +252,11 @@ rescale_info_t *rescale_problem( rescale_info_t *rescale_info = (rescale_info_t *)safe_calloc(1, sizeof(rescale_info_t)); CUDA_CHECK(cudaMalloc(&rescale_info->con_rescale, num_constraints * sizeof(double))); CUDA_CHECK(cudaMalloc(&rescale_info->var_rescale, num_variables * sizeof(double))); - fill_ones_kernel<<num_blocks_dual, THREADS_PER_BLOCK>>>( - rescale_info->con_rescale, num_constraints); - fill_ones_kernel<<num_blocks_primal, THREADS_PER_BLOCK>>>( - rescale_info->var_rescale, num_variables); + fill_ones_kernel<<num_blocks_dual, THREADS_PER_BLOCK>>>(rescale_info->con_rescale, num_constraints); + fill_ones_kernel<<num_blocks_primal, THREADS_PER_BLOCK>>>(rescale_info->var_rescale, num_variables); - double *constraint_rescaling = NULL, *variable_rescaling = NULL, *inverse_constraint_rescaling = NULL, *inverse_variable_rescaling = NULL; + double *constraint_rescaling = NULL, *variable_rescaling = NULL, *inverse_constraint_rescaling = NULL, + *inverse_variable_rescaling = NULL; CUDA_CHECK(cudaMalloc(&constraint_rescaling, num_constraints * sizeof(double))); CUDA_CHECK(cudaMalloc(&variable_rescaling, num_variables * sizeof(double))); CUDA_CHECK(cudaMalloc(&inverse_constraint_rescaling, num_constraints * sizeof(double))); @@ -292,24 +264,39 @@ rescale_info_t *rescale_problem( if (params->l_inf_ruiz_iterations > 0) { - if (params->verbose) { + if (params->verbose) + { printf(" Ruiz scaling (%d iterations)\n", params->l_inf_ruiz_iterations); } - ruiz_rescaling(state, params->l_inf_ruiz_iterations, rescale_info, constraint_rescaling, variable_rescaling, inverse_constraint_rescaling, inverse_variable_rescaling); + ruiz_rescaling(state, + params->l_inf_ruiz_iterations, + rescale_info, + constraint_rescaling, + variable_rescaling, + inverse_constraint_rescaling, + inverse_variable_rescaling); } if (params->has_pock_chambolle_alpha) { - if (params->verbose) { + if (params->verbose) + { printf(" Pock-Chambolle scaling (alpha=%.4f)\n", params->pock_chambolle_alpha); } - pock_chambolle_rescaling(state, params->pock_chambolle_alpha, rescale_info, constraint_rescaling, variable_rescaling, inverse_constraint_rescaling, inverse_variable_rescaling); + pock_chambolle_rescaling(state, + params->pock_chambolle_alpha, + rescale_info, + constraint_rescaling, + variable_rescaling, + inverse_constraint_rescaling, + inverse_variable_rescaling); } rescale_info->con_bound_rescale = 1.0; rescale_info->obj_vec_rescale = 1.0; if (params->bound_objective_rescaling) { - if (params->verbose) { + if (params->verbose) + { printf(" Bound-objective scaling\n"); } bound_objective_rescaling(state, rescale_info); @@ -370,9 +357,7 @@ __global__ void scale_csr_nnz_kernel(const int *__restrict__ constraint_row_ind, const double *__restrict__ inverse_constraint_rescaling, int nnz) { - for (int k = blockIdx.x * blockDim.x + threadIdx.x; - k < nnz; - k += gridDim.x * blockDim.x) + for (int k = blockIdx.x * blockDim.x + threadIdx.x; k < nnz; k += gridDim.x * blockDim.x) { int i = constraint_row_ind[k]; int j = constraint_col_ind[k]; @@ -450,14 +435,14 @@ __global__ void clamp_sqrt_and_accum_kernel(double *__restrict__ scaling_factors } } -__global__ void compute_bound_contrib_kernel( - const double *__restrict__ constraint_lower_bound, - const double *__restrict__ constraint_upper_bound, - int num_constraints, - double *__restrict__ contrib) +__global__ void compute_bound_contrib_kernel(const double *__restrict__ constraint_lower_bound, + const double *__restrict__ constraint_upper_bound, + int num_constraints, + double *__restrict__ contrib) { int i = blockIdx.x * blockDim.x + threadIdx.x; - if (i >= num_constraints) return; + if (i >= num_constraints) + return; double Li = constraint_lower_bound[i]; double Ui = constraint_upper_bound[i]; @@ -475,13 +460,12 @@ __global__ void compute_bound_contrib_kernel( contrib[i] = acc; } -__global__ void scale_bounds_kernel( - double *__restrict__ constraint_lower_bound, - double *__restrict__ constraint_upper_bound, - double *__restrict__ initial_dual_solution, - int num_constraints, - double constraint_scale, - double objective_scale) +__global__ void scale_bounds_kernel(double *__restrict__ constraint_lower_bound, + double *__restrict__ constraint_upper_bound, + double *__restrict__ initial_dual_solution, + int num_constraints, + double constraint_scale, + double objective_scale) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i >= num_constraints) @@ -491,14 +475,13 @@ __global__ void scale_bounds_kernel( initial_dual_solution[i] *= objective_scale; } -__global__ void scale_objective_kernel( - double *__restrict__ objective_vector, - double *__restrict__ variable_lower_bound, - double *__restrict__ variable_upper_bound, - double *__restrict__ initial_primal_solution, - int num_variables, - double constraint_scale, - double objective_scale) +__global__ void scale_objective_kernel(double *__restrict__ objective_vector, + double *__restrict__ variable_lower_bound, + double *__restrict__ variable_upper_bound, + double *__restrict__ initial_primal_solution, + int num_variables, + double constraint_scale, + double objective_scale) { int j = blockIdx.x * blockDim.x + threadIdx.x; if (j >= num_variables) diff --git a/src/presolve.c b/src/presolve.c index f5933dc..c2f788f 100644 --- a/src/presolve.c +++ b/src/presolve.c @@ -17,16 +17,16 @@ const char *get_presolve_status_str(enum PresolveStatus_ status) { switch (status) { - case UNCHANGED: - return "UNCHANGED"; - case REDUCED: - return "REDUCED"; - case INFEASIBLE: - return "INFEASIBLE"; - case UNBNDORINFEAS: - return "INFEASIBLE_OR_UNBOUNDED"; - default: - return "UNKNOWN_STATUS"; + case UNCHANGED: + return "UNCHANGED"; + case REDUCED: + return "REDUCED"; + case INFEASIBLE: + return "INFEASIBLE"; + case UNBNDORINFEAS: + return "INFEASIBLE_OR_UNBOUNDED"; + default: + return "UNKNOWN_STATUS"; } } @@ -77,19 +77,18 @@ cupdlpx_presolve_info_t *pslp_presolve(const lp_problem_t *original_prob, const info->settings->verbose = false; // 2. Init Presolver - info->presolver = new_presolver( - original_prob->constraint_matrix_values, - original_prob->constraint_matrix_col_indices, - original_prob->constraint_matrix_row_pointers, - original_prob->num_constraints, - original_prob->num_variables, - original_prob->constraint_matrix_num_nonzeros, - original_prob->constraint_lower_bound, - original_prob->constraint_upper_bound, - original_prob->variable_lower_bound, - original_prob->variable_upper_bound, - original_prob->objective_vector, - info->settings); + info->presolver = new_presolver(original_prob->constraint_matrix_values, + original_prob->constraint_matrix_col_indices, + original_prob->constraint_matrix_row_pointers, + original_prob->num_constraints, + original_prob->num_variables, + original_prob->constraint_matrix_num_nonzeros, + original_prob->constraint_lower_bound, + original_prob->constraint_upper_bound, + original_prob->variable_lower_bound, + original_prob->variable_upper_bound, + original_prob->objective_vector, + info->settings); // 3. Run Presolve PresolveStatus status = run_presolver(info->presolver); @@ -101,10 +100,10 @@ cupdlpx_presolve_info_t *pslp_presolve(const lp_problem_t *original_prob, const printf(" %-15s : %s\n", "status", get_presolve_status_str(status)); printf(" %-15s : %.3g sec\n", "presolve time", info->presolve_time); printf(" %-15s : %d rows, %d columns, %d nonzeros\n", - "reduced problem", - info->presolver->reduced_prob->m, - info->presolver->reduced_prob->n, - info->presolver->reduced_prob->nnz); + "reduced problem", + info->presolver->reduced_prob->m, + info->presolver->reduced_prob->n, + info->presolver->reduced_prob->nnz); } if (status & INFEASIBLE || status & UNBNDORINFEAS || info->presolver->reduced_prob->n == 0) @@ -180,14 +179,9 @@ cupdlpx_result_t *create_result_from_presolve(const cupdlpx_presolve_info_t *inf return result; } -void pslp_postsolve(const cupdlpx_presolve_info_t *info, - cupdlpx_result_t *result, - const lp_problem_t *original_prob) +void pslp_postsolve(const cupdlpx_presolve_info_t *info, cupdlpx_result_t *result, const lp_problem_t *original_prob) { - postsolve(info->presolver, - result->primal_solution, - result->dual_solution, - result->reduced_cost); + postsolve(info->presolver, result->primal_solution, result->dual_solution, result->reduced_cost); result->num_reduced_variables = info->presolver->reduced_prob->n; result->num_reduced_constraints = info->presolver->reduced_prob->m; @@ -202,7 +196,7 @@ void pslp_postsolve(const cupdlpx_presolve_info_t *info, memcpy(result->dual_solution, info->presolver->sol->y, original_prob->num_constraints * sizeof(double)); memcpy(result->reduced_cost, info->presolver->sol->z, original_prob->num_variables * sizeof(double)); // TODO: this can be removed if PSLP implements it. - for (int i=0; i num_variables; i++) + for (int i = 0; i < original_prob->num_variables; i++) { if (!isfinite(original_prob->variable_lower_bound[i])) { @@ -241,4 +235,4 @@ void cupdlpx_presolve_info_free(cupdlpx_presolve_info_t *info) if (info->settings) free_settings(info->settings); free(info); -} \ No newline at end of file +} diff --git a/src/solver.cu b/src/solver.cu index 6cb9648..896f19d 100644 --- a/src/solver.cu +++ b/src/solver.cu @@ -29,9 +29,7 @@ limitations under the License. #include #include -__global__ void build_row_ind(const int *__restrict__ row_ptr, - int num_rows, - int *__restrict__ row_ind); +__global__ void build_row_ind(const int *__restrict__ row_ptr, int num_rows, int *__restrict__ row_ind); __global__ void build_transpose_map(const int *__restrict__ A_row_ind, const int *__restrict__ A_col_ind, const int *__restrict__ At_row_ptr, @@ -49,82 +47,101 @@ __global__ void rescale_solution_kernel(double *__restrict__ primal_solution, const double *__restrict__ constraint_rescaling, const double objective_vector_rescaling, const double constraint_bound_rescaling, - int n_vars, int n_cons); -__global__ void compute_delta_solution_kernel( - const double *__restrict__ initial_primal, - const double *__restrict__ pdhg_primal, double *__restrict__ delta_primal, - const double *__restrict__ initial_dual, - const double *__restrict__ pdhg_dual, double *__restrict__ delta_dual, - int n_vars, int n_cons); + int n_vars, + int n_cons); +__global__ void compute_delta_solution_kernel(const double *__restrict__ initial_primal, + const double *__restrict__ pdhg_primal, + double *__restrict__ delta_primal, + const double *__restrict__ initial_dual, + const double *__restrict__ pdhg_dual, + double *__restrict__ delta_dual, + int n_vars, + int n_cons); static cupdlpx_result_t *create_result_from_state(pdhg_solver_state_t *state, const lp_problem_t *original_problem); static void perform_restart(pdhg_solver_state_t *state, const pdhg_parameters_t *params); static void initialize_step_size_and_primal_weight(pdhg_solver_state_t *state, const pdhg_parameters_t *params); -static pdhg_solver_state_t *initialize_solver_state(const lp_problem_t *working_problem, const pdhg_parameters_t* params); +static pdhg_solver_state_t *initialize_solver_state(const lp_problem_t *working_problem, + const pdhg_parameters_t *params); static void compute_fixed_point_error(pdhg_solver_state_t *state); void pdhg_solver_state_free(pdhg_solver_state_t *state); void rescale_info_free(rescale_info_t *info); static void perform_primal_restart(pdhg_solver_state_t *state); static void perform_dual_restart(pdhg_solver_state_t *state); -void primal_feasibility_polish(const pdhg_parameters_t *params, pdhg_solver_state_t *state, const pdhg_solver_state_t *ori_state); -void dual_feasibility_polish(const pdhg_parameters_t *params, pdhg_solver_state_t *state, const pdhg_solver_state_t *ori_state); +void primal_feasibility_polish(const pdhg_parameters_t *params, + pdhg_solver_state_t *state, + const pdhg_solver_state_t *ori_state); +void dual_feasibility_polish(const pdhg_parameters_t *params, + pdhg_solver_state_t *state, + const pdhg_solver_state_t *ori_state); void primal_feas_polish_state_free(pdhg_solver_state_t *state); void dual_feas_polish_state_free(pdhg_solver_state_t *state); void feasibility_polish(const pdhg_parameters_t *params, pdhg_solver_state_t *state); static void compute_primal_fixed_point_error(pdhg_solver_state_t *state); static void compute_dual_fixed_point_error(pdhg_solver_state_t *state); -static pdhg_solver_state_t *initialize_primal_feas_polish_state( - const pdhg_solver_state_t *original_state); -static pdhg_solver_state_t *initialize_dual_feas_polish_state( - const pdhg_solver_state_t *original_state); -__global__ void compute_next_primal_solution_kernel( - double *__restrict__ current_primal, - double *__restrict__ reflected_primal, - const double *__restrict__ initial_primal, - const double *__restrict__ dual_product, - const double *__restrict__ objective, - const double *__restrict__ var_lb, const double *__restrict__ var_ub, - int n, const double *__restrict__ d_step_size, - const int *__restrict__ d_base_count, int k_offset, - double reflection_coeff); -__global__ void compute_next_primal_solution_major_kernel( - double *__restrict__ current_primal, double *__restrict__ pdhg_primal, - double *__restrict__ reflected_primal, - const double *__restrict__ initial_primal, - const double *__restrict__ dual_product, - const double *__restrict__ objective, - const double *__restrict__ var_lb, const double *__restrict__ var_ub, - int n, const double *__restrict__ d_step_size, - double *__restrict__ dual_slack, - const int *__restrict__ d_base_count, int k_offset, - double reflection_coeff); -__global__ void compute_next_dual_solution_kernel( - double *__restrict__ current_dual, - const double *__restrict__ initial_dual, - const double *__restrict__ primal_product, - const double *__restrict__ const_lb, - const double *__restrict__ const_ub, int n, - const double *__restrict__ d_step_size, - const int *__restrict__ d_base_count, int k_offset, - double reflection_coeff); -__global__ void compute_next_dual_solution_major_kernel( - double *__restrict__ current_dual, double *__restrict__ pdhg_dual, - double *__restrict__ reflected_dual, - const double *__restrict__ initial_dual, - const double *__restrict__ primal_product, - const double *__restrict__ const_lb, - const double *__restrict__ const_ub, int n, - const double *__restrict__ d_step_size, - const int *__restrict__ d_base_count, int k_offset, - double reflection_coeff); -static void compute_next_primal_solution(pdhg_solver_state_t *state, const int k_offset, const double reflection_coefficient, bool is_major); -static void compute_next_dual_solution(pdhg_solver_state_t *state,const int k_offset, const double reflection_coefficient, bool is_major); +static pdhg_solver_state_t *initialize_primal_feas_polish_state(const pdhg_solver_state_t *original_state); +static pdhg_solver_state_t *initialize_dual_feas_polish_state(const pdhg_solver_state_t *original_state); +__global__ void compute_next_primal_solution_kernel(double *__restrict__ current_primal, + double *__restrict__ reflected_primal, + const double *__restrict__ initial_primal, + const double *__restrict__ dual_product, + const double *__restrict__ objective, + const double *__restrict__ var_lb, + const double *__restrict__ var_ub, + int n, + const double *__restrict__ d_step_size, + const int *__restrict__ d_base_count, + int k_offset, + double reflection_coeff); +__global__ void compute_next_primal_solution_major_kernel(double *__restrict__ current_primal, + double *__restrict__ pdhg_primal, + double *__restrict__ reflected_primal, + const double *__restrict__ initial_primal, + const double *__restrict__ dual_product, + const double *__restrict__ objective, + const double *__restrict__ var_lb, + const double *__restrict__ var_ub, + int n, + const double *__restrict__ d_step_size, + double *__restrict__ dual_slack, + const int *__restrict__ d_base_count, + int k_offset, + double reflection_coeff); +__global__ void compute_next_dual_solution_kernel(double *__restrict__ current_dual, + const double *__restrict__ initial_dual, + const double *__restrict__ primal_product, + const double *__restrict__ const_lb, + const double *__restrict__ const_ub, + int n, + const double *__restrict__ d_step_size, + const int *__restrict__ d_base_count, + int k_offset, + double reflection_coeff); +__global__ void compute_next_dual_solution_major_kernel(double *__restrict__ current_dual, + double *__restrict__ pdhg_dual, + double *__restrict__ reflected_dual, + const double *__restrict__ initial_dual, + const double *__restrict__ primal_product, + const double *__restrict__ const_lb, + const double *__restrict__ const_ub, + int n, + const double *__restrict__ d_step_size, + const int *__restrict__ d_base_count, + int k_offset, + double reflection_coeff); +static void compute_next_primal_solution(pdhg_solver_state_t *state, + const int k_offset, + const double reflection_coefficient, + bool is_major); +static void compute_next_dual_solution(pdhg_solver_state_t *state, + const int k_offset, + const double reflection_coefficient, + bool is_major); static void sync_step_sizes_to_gpu(pdhg_solver_state_t *state); static void sync_inner_count_to_gpu(pdhg_solver_state_t *state); static void check_params_validity(const pdhg_parameters_t *params); -cupdlpx_result_t *optimize(const pdhg_parameters_t *params, - lp_problem_t *original_problem) +cupdlpx_result_t *optimize(const pdhg_parameters_t *params, lp_problem_t *original_problem) { check_params_validity(params); print_initial_info(params, original_problem); @@ -144,7 +161,7 @@ cupdlpx_result_t *optimize(const pdhg_parameters_t *params, } working_problem = presolve_info->reduced_problem; } - + pdhg_solver_state_t *state = initialize_solver_state(working_problem, params); display_iteration_stats(state, params->verbose); @@ -181,8 +198,10 @@ cupdlpx_result_t *optimize(const pdhg_parameters_t *params, compute_next_dual_solution(state, i, params->reflection_coefficient, false); } - compute_next_primal_solution(state, params->termination_evaluation_frequency, params->reflection_coefficient, true); - compute_next_dual_solution(state, params->termination_evaluation_frequency, params->reflection_coefficient, true); + compute_next_primal_solution( + state, params->termination_evaluation_frequency, params->reflection_coefficient, true); + compute_next_dual_solution( + state, params->termination_evaluation_frequency, params->reflection_coefficient, true); // end CUDA graph capture cudaGraph_t graph; @@ -212,8 +231,8 @@ cupdlpx_result_t *optimize(const pdhg_parameters_t *params, } // Check Adaptive Restart - do_restart = should_do_adaptive_restart(state, ¶ms->restart_params, - params->termination_evaluation_frequency); + do_restart = + should_do_adaptive_restart(state, ¶ms->restart_params, params->termination_evaluation_frequency); if (do_restart) { perform_restart(state, params); @@ -233,8 +252,7 @@ cupdlpx_result_t *optimize(const pdhg_parameters_t *params, display_iteration_stats(state, params->verbose); } - if (params->feasibility_polishing && - state->termination_reason != TERMINATION_REASON_DUAL_INFEASIBLE && + if (params->feasibility_polishing && state->termination_reason != TERMINATION_REASON_DUAL_INFEASIBLE && state->termination_reason != TERMINATION_REASON_PRIMAL_INFEASIBLE) { feasibility_polish(params, state); @@ -259,16 +277,16 @@ static void sync_step_sizes_to_gpu(pdhg_solver_state_t *state) double current_primal_step = state->step_size / state->primal_weight; double current_dual_step = state->step_size * state->primal_weight; - CUDA_CHECK(cudaMemcpyAsync(state->d_primal_step_size, ¤t_primal_step, - sizeof(double), cudaMemcpyHostToDevice, state->stream)); - CUDA_CHECK(cudaMemcpyAsync(state->d_dual_step_size, ¤t_dual_step, - sizeof(double), cudaMemcpyHostToDevice, state->stream)); + CUDA_CHECK(cudaMemcpyAsync( + state->d_primal_step_size, ¤t_primal_step, sizeof(double), cudaMemcpyHostToDevice, state->stream)); + CUDA_CHECK(cudaMemcpyAsync( + state->d_dual_step_size, ¤t_dual_step, sizeof(double), cudaMemcpyHostToDevice, state->stream)); } static void sync_inner_count_to_gpu(pdhg_solver_state_t *state) { - CUDA_CHECK(cudaMemcpyAsync(state->d_inner_count, &state->inner_count, - sizeof(int), cudaMemcpyHostToDevice, state->stream)); + CUDA_CHECK( + cudaMemcpyAsync(state->d_inner_count, &state->inner_count, sizeof(int), cudaMemcpyHostToDevice, state->stream)); } static void check_params_validity(const pdhg_parameters_t *params) @@ -282,16 +300,15 @@ static void check_params_validity(const pdhg_parameters_t *params) } } -__global__ void compute_and_rescale_reduced_cost_kernel( - double *__restrict__ reduced_cost, - const double *__restrict__ objective, - const double *__restrict__ dual_product, - const double *__restrict__ variable_rescaling, - const double objective_vector_rescaling, - const double constraint_bound_rescaling, - const double *__restrict__ variable_lower_bound, - const double *__restrict__ variable_upper_bound, - int n_vars) +__global__ void compute_and_rescale_reduced_cost_kernel(double *__restrict__ reduced_cost, + const double *__restrict__ objective, + const double *__restrict__ dual_product, + const double *__restrict__ variable_rescaling, + const double objective_vector_rescaling, + const double constraint_bound_rescaling, + const double *__restrict__ variable_lower_bound, + const double *__restrict__ variable_upper_bound, + int n_vars) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < n_vars) @@ -310,9 +327,8 @@ __global__ void compute_and_rescale_reduced_cost_kernel( } } -static pdhg_solver_state_t *initialize_solver_state( - const lp_problem_t *working_problem, - const pdhg_parameters_t* params) +static pdhg_solver_state_t *initialize_solver_state(const lp_problem_t *working_problem, + const pdhg_parameters_t *params) { pdhg_solver_state_t *state = (pdhg_solver_state_t *)safe_calloc(1, sizeof(pdhg_solver_state_t)); @@ -341,20 +357,26 @@ static pdhg_solver_state_t *initialize_solver_state( state->num_blocks_primal = (state->num_variables + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; state->num_blocks_dual = (state->num_constraints + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - state->num_blocks_primal_dual = (state->num_variables + state->num_constraints + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + state->num_blocks_primal_dual = + (state->num_variables + state->num_constraints + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; state->num_blocks_nnz = (state->constraint_matrix->num_nonzeros + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; CUSPARSE_CHECK(cusparseCreate(&state->sparse_handle)); CUBLAS_CHECK(cublasCreate(&state->blas_handle)); CUBLAS_CHECK(cublasSetPointerMode(state->blas_handle, CUBLAS_POINTER_MODE_HOST)); -#define ALLOC_AND_COPY(dest, src, bytes) \ - CUDA_CHECK(cudaMalloc(&dest, bytes)); \ +#define ALLOC_AND_COPY(dest, src, bytes) \ + CUDA_CHECK(cudaMalloc(&dest, bytes)); \ CUDA_CHECK(cudaMemcpy(dest, src, bytes, cudaMemcpyHostToDevice)); - ALLOC_AND_COPY(state->constraint_matrix->row_ptr, working_problem->constraint_matrix_row_pointers, (n_cons + 1) * sizeof(int)); - ALLOC_AND_COPY(state->constraint_matrix->col_ind, working_problem->constraint_matrix_col_indices, working_problem->constraint_matrix_num_nonzeros * sizeof(int)); - ALLOC_AND_COPY(state->constraint_matrix->val, working_problem->constraint_matrix_values, working_problem->constraint_matrix_num_nonzeros * sizeof(double)); + ALLOC_AND_COPY( + state->constraint_matrix->row_ptr, working_problem->constraint_matrix_row_pointers, (n_cons + 1) * sizeof(int)); + ALLOC_AND_COPY(state->constraint_matrix->col_ind, + working_problem->constraint_matrix_col_indices, + working_problem->constraint_matrix_num_nonzeros * sizeof(int)); + ALLOC_AND_COPY(state->constraint_matrix->val, + working_problem->constraint_matrix_values, + working_problem->constraint_matrix_num_nonzeros * sizeof(double)); CUDA_CHECK(cudaMalloc(&state->constraint_matrix->row_ind, nnz * sizeof(int))); build_row_ind<<num_blocks_dual, THREADS_PER_BLOCK>>>( @@ -362,25 +384,45 @@ static pdhg_solver_state_t *initialize_solver_state( CUDA_CHECK(cudaGetLastError()); CUDA_CHECK(cudaMalloc(&state->constraint_matrix_t->row_ptr, (n_vars + 1) * sizeof(int))); - CUDA_CHECK(cudaMalloc(&state->constraint_matrix_t->col_ind, working_problem->constraint_matrix_num_nonzeros * sizeof(int))); - CUDA_CHECK(cudaMalloc(&state->constraint_matrix_t->val, working_problem->constraint_matrix_num_nonzeros * sizeof(double))); + CUDA_CHECK(cudaMalloc(&state->constraint_matrix_t->col_ind, + working_problem->constraint_matrix_num_nonzeros * sizeof(int))); + CUDA_CHECK( + cudaMalloc(&state->constraint_matrix_t->val, working_problem->constraint_matrix_num_nonzeros * sizeof(double))); size_t buffer_size = 0; void *buffer = nullptr; - CUSPARSE_CHECK(cusparseCsr2cscEx2_bufferSize( - state->sparse_handle, state->constraint_matrix->num_rows, state->constraint_matrix->num_cols, state->constraint_matrix->num_nonzeros, - state->constraint_matrix->val, state->constraint_matrix->row_ptr, state->constraint_matrix->col_ind, - state->constraint_matrix_t->val, state->constraint_matrix_t->row_ptr, state->constraint_matrix_t->col_ind, - CUDA_R_64F, CUSPARSE_ACTION_NUMERIC, CUSPARSE_INDEX_BASE_ZERO, - CUSPARSE_CSR2CSC_ALG_DEFAULT, &buffer_size)); + CUSPARSE_CHECK(cusparseCsr2cscEx2_bufferSize(state->sparse_handle, + state->constraint_matrix->num_rows, + state->constraint_matrix->num_cols, + state->constraint_matrix->num_nonzeros, + state->constraint_matrix->val, + state->constraint_matrix->row_ptr, + state->constraint_matrix->col_ind, + state->constraint_matrix_t->val, + state->constraint_matrix_t->row_ptr, + state->constraint_matrix_t->col_ind, + CUDA_R_64F, + CUSPARSE_ACTION_NUMERIC, + CUSPARSE_INDEX_BASE_ZERO, + CUSPARSE_CSR2CSC_ALG_DEFAULT, + &buffer_size)); CUDA_CHECK(cudaMalloc(&buffer, buffer_size)); - CUSPARSE_CHECK(cusparseCsr2cscEx2( - state->sparse_handle, state->constraint_matrix->num_rows, state->constraint_matrix->num_cols, state->constraint_matrix->num_nonzeros, - state->constraint_matrix->val, state->constraint_matrix->row_ptr, state->constraint_matrix->col_ind, - state->constraint_matrix_t->val, state->constraint_matrix_t->row_ptr, state->constraint_matrix_t->col_ind, - CUDA_R_64F, CUSPARSE_ACTION_NUMERIC, CUSPARSE_INDEX_BASE_ZERO, - CUSPARSE_CSR2CSC_ALG_DEFAULT, buffer)); + CUSPARSE_CHECK(cusparseCsr2cscEx2(state->sparse_handle, + state->constraint_matrix->num_rows, + state->constraint_matrix->num_cols, + state->constraint_matrix->num_nonzeros, + state->constraint_matrix->val, + state->constraint_matrix->row_ptr, + state->constraint_matrix->col_ind, + state->constraint_matrix_t->val, + state->constraint_matrix_t->row_ptr, + state->constraint_matrix_t->col_ind, + CUDA_R_64F, + CUSPARSE_ACTION_NUMERIC, + CUSPARSE_INDEX_BASE_ZERO, + CUSPARSE_CSR2CSC_ALG_DEFAULT, + buffer)); CUDA_CHECK(cudaFree(buffer)); @@ -391,13 +433,12 @@ static pdhg_solver_state_t *initialize_solver_state( CUDA_CHECK(cudaMalloc(&state->constraint_matrix->transpose_map, nnz * sizeof(int))); state->constraint_matrix_t->transpose_map = NULL; - build_transpose_map<<num_blocks_nnz, THREADS_PER_BLOCK>>>( - state->constraint_matrix->row_ind, - state->constraint_matrix->col_ind, - state->constraint_matrix_t->row_ptr, - state->constraint_matrix_t->col_ind, - nnz, - state->constraint_matrix->transpose_map); + build_transpose_map<<num_blocks_nnz, THREADS_PER_BLOCK>>>(state->constraint_matrix->row_ind, + state->constraint_matrix->col_ind, + state->constraint_matrix_t->row_ptr, + state->constraint_matrix_t->col_ind, + nnz, + state->constraint_matrix->transpose_map); CUDA_CHECK(cudaGetLastError()); ALLOC_AND_COPY(state->variable_lower_bound, working_problem->variable_lower_bound, var_bytes); @@ -405,9 +446,9 @@ static pdhg_solver_state_t *initialize_solver_state( ALLOC_AND_COPY(state->objective_vector, working_problem->objective_vector, var_bytes); ALLOC_AND_COPY(state->constraint_lower_bound, working_problem->constraint_lower_bound, con_bytes); ALLOC_AND_COPY(state->constraint_upper_bound, working_problem->constraint_upper_bound, con_bytes); - -#define ALLOC_ZERO(dest, bytes) \ - CUDA_CHECK(cudaMalloc(&dest, bytes)); \ + +#define ALLOC_ZERO(dest, bytes) \ + CUDA_CHECK(cudaMalloc(&dest, bytes)); \ CUDA_CHECK(cudaMemset(dest, 0, bytes)); ALLOC_ZERO(state->initial_primal_solution, var_bytes); @@ -430,15 +471,13 @@ static pdhg_solver_state_t *initialize_solver_state( if (working_problem->primal_start) { - CUDA_CHECK(cudaMemcpy(state->initial_primal_solution, - working_problem->primal_start, - var_bytes, cudaMemcpyHostToDevice)); + CUDA_CHECK(cudaMemcpy( + state->initial_primal_solution, working_problem->primal_start, var_bytes, cudaMemcpyHostToDevice)); } if (working_problem->dual_start) { - CUDA_CHECK(cudaMemcpy(state->initial_dual_solution, - working_problem->dual_start, - con_bytes, cudaMemcpyHostToDevice)); + CUDA_CHECK( + cudaMemcpy(state->initial_dual_solution, working_problem->dual_start, con_bytes, cudaMemcpyHostToDevice)); } rescale_info_t *rescale_info = rescale_problem(params, state); @@ -453,37 +492,31 @@ static pdhg_solver_state_t *initialize_solver_state( rescale_info->var_rescale = NULL; rescale_info_free(rescale_info); - CUDA_CHECK(cudaMemcpy(state->current_primal_solution, - state->initial_primal_solution, - var_bytes, cudaMemcpyDeviceToDevice)); - CUDA_CHECK(cudaMemcpy(state->current_dual_solution, - state->initial_dual_solution, - con_bytes, cudaMemcpyDeviceToDevice)); - CUDA_CHECK(cudaMemcpy(state->pdhg_primal_solution, - state->initial_primal_solution, - var_bytes, cudaMemcpyDeviceToDevice)); - CUDA_CHECK(cudaMemcpy(state->pdhg_dual_solution, - state->initial_dual_solution, - con_bytes, cudaMemcpyDeviceToDevice)); + CUDA_CHECK(cudaMemcpy( + state->current_primal_solution, state->initial_primal_solution, var_bytes, cudaMemcpyDeviceToDevice)); + CUDA_CHECK( + cudaMemcpy(state->current_dual_solution, state->initial_dual_solution, con_bytes, cudaMemcpyDeviceToDevice)); + CUDA_CHECK( + cudaMemcpy(state->pdhg_primal_solution, state->initial_primal_solution, var_bytes, cudaMemcpyDeviceToDevice)); + CUDA_CHECK( + cudaMemcpy(state->pdhg_dual_solution, state->initial_dual_solution, con_bytes, cudaMemcpyDeviceToDevice)); CUDA_CHECK(cudaMalloc(&state->constraint_lower_bound_finite_val, con_bytes)); CUDA_CHECK(cudaMalloc(&state->constraint_upper_bound_finite_val, con_bytes)); CUDA_CHECK(cudaMalloc(&state->variable_lower_bound_finite_val, var_bytes)); CUDA_CHECK(cudaMalloc(&state->variable_upper_bound_finite_val, var_bytes)); - fill_finite_bounds_kernel<<num_blocks_dual, THREADS_PER_BLOCK>>>( - state->constraint_lower_bound, - state->constraint_upper_bound, - state->constraint_lower_bound_finite_val, - state->constraint_upper_bound_finite_val, - n_cons); + fill_finite_bounds_kernel<<num_blocks_dual, THREADS_PER_BLOCK>>>(state->constraint_lower_bound, + state->constraint_upper_bound, + state->constraint_lower_bound_finite_val, + state->constraint_upper_bound_finite_val, + n_cons); - fill_finite_bounds_kernel<<num_blocks_primal, THREADS_PER_BLOCK>>>( - state->variable_lower_bound, - state->variable_upper_bound, - state->variable_lower_bound_finite_val, - state->variable_upper_bound_finite_val, - n_vars); + fill_finite_bounds_kernel<<num_blocks_primal, THREADS_PER_BLOCK>>>(state->variable_lower_bound, + state->variable_upper_bound, + state->variable_lower_bound_finite_val, + state->variable_upper_bound_finite_val, + n_vars); CUDA_CHECK(cudaFree(state->constraint_matrix->row_ind)); state->constraint_matrix->row_ind = NULL; @@ -497,17 +530,24 @@ static pdhg_solver_state_t *initialize_solver_state( double val = 0.0; for (int i = 0; i < n_vars; ++i) { - if (params->optimality_norm == NORM_TYPE_L_INF) { + if (params->optimality_norm == NORM_TYPE_L_INF) + { val = fabs(working_problem->objective_vector[i]); - if (val > max_val) max_val = val; - } else { + if (val > max_val) + max_val = val; + } + else + { sum_of_squares += working_problem->objective_vector[i] * working_problem->objective_vector[i]; } } - if (params->optimality_norm == NORM_TYPE_L_INF) { + if (params->optimality_norm == NORM_TYPE_L_INF) + { state->objective_vector_norm = max_val; - } else { + } + else + { state->objective_vector_norm = sqrt(sum_of_squares); } @@ -519,27 +559,39 @@ static pdhg_solver_state_t *initialize_solver_state( double lower = working_problem->constraint_lower_bound[i]; double upper = working_problem->constraint_upper_bound[i]; - if (params->optimality_norm == NORM_TYPE_L_INF) { - if (isfinite(lower) && (lower != upper)) { + if (params->optimality_norm == NORM_TYPE_L_INF) + { + if (isfinite(lower) && (lower != upper)) + { val = fabs(lower); - if (val > max_val) max_val = val; + if (val > max_val) + max_val = val; } - if (isfinite(upper)) { + if (isfinite(upper)) + { val = fabs(upper); - if (val > max_val) max_val = val; + if (val > max_val) + max_val = val; } - } else { - if (isfinite(lower) && (lower != upper)) { + } + else + { + if (isfinite(lower) && (lower != upper)) + { sum_of_squares += lower * lower; } - if (isfinite(upper)) { + if (isfinite(upper)) + { sum_of_squares += upper * upper; } } } - if (params->optimality_norm == NORM_TYPE_L_INF) { + if (params->optimality_norm == NORM_TYPE_L_INF) + { state->constraint_bound_norm = max_val; - } else { + } + else + { state->constraint_bound_norm = sqrt(sum_of_squares); } @@ -551,28 +603,83 @@ static pdhg_solver_state_t *initialize_solver_state( size_t primal_spmv_buffer_size; size_t dual_spmv_buffer_size; - CUSPARSE_CHECK(cusparseCreateCsr(&state->matA, state->num_constraints, state->num_variables, state->constraint_matrix->num_nonzeros, state->constraint_matrix->row_ptr, state->constraint_matrix->col_ind, state->constraint_matrix->val, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F)); + CUSPARSE_CHECK(cusparseCreateCsr(&state->matA, + state->num_constraints, + state->num_variables, + state->constraint_matrix->num_nonzeros, + state->constraint_matrix->row_ptr, + state->constraint_matrix->col_ind, + state->constraint_matrix->val, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_64F)); CUDA_CHECK(cudaGetLastError()); - CUSPARSE_CHECK(cusparseCreateCsr(&state->matAt, state->num_variables, state->num_constraints, state->constraint_matrix_t->num_nonzeros, state->constraint_matrix_t->row_ptr, state->constraint_matrix_t->col_ind, state->constraint_matrix_t->val, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F)); + CUSPARSE_CHECK(cusparseCreateCsr(&state->matAt, + state->num_variables, + state->num_constraints, + state->constraint_matrix_t->num_nonzeros, + state->constraint_matrix_t->row_ptr, + state->constraint_matrix_t->col_ind, + state->constraint_matrix_t->val, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_64F)); CUDA_CHECK(cudaGetLastError()); - CUSPARSE_CHECK(cusparseCreateDnVec(&state->vec_primal_sol, state->num_variables, state->pdhg_primal_solution, CUDA_R_64F)); - CUSPARSE_CHECK(cusparseCreateDnVec(&state->vec_dual_sol, state->num_constraints, state->pdhg_dual_solution, CUDA_R_64F)); - CUSPARSE_CHECK(cusparseCreateDnVec(&state->vec_primal_prod, state->num_constraints, state->primal_product, CUDA_R_64F)); + CUSPARSE_CHECK( + cusparseCreateDnVec(&state->vec_primal_sol, state->num_variables, state->pdhg_primal_solution, CUDA_R_64F)); + CUSPARSE_CHECK( + cusparseCreateDnVec(&state->vec_dual_sol, state->num_constraints, state->pdhg_dual_solution, CUDA_R_64F)); + CUSPARSE_CHECK( + cusparseCreateDnVec(&state->vec_primal_prod, state->num_constraints, state->primal_product, CUDA_R_64F)); CUSPARSE_CHECK(cusparseCreateDnVec(&state->vec_dual_prod, state->num_variables, state->dual_product, CUDA_R_64F)); - CUSPARSE_CHECK(cusparseSpMV_bufferSize(state->sparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &HOST_ONE, state->matA, state->vec_primal_sol, &HOST_ZERO, state->vec_primal_prod, CUDA_R_64F, CUSPARSE_SPMV_CSR_ALG2, &primal_spmv_buffer_size)); - - CUSPARSE_CHECK(cusparseSpMV_bufferSize(state->sparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &HOST_ONE, state->matAt, state->vec_dual_sol, &HOST_ZERO, state->vec_dual_prod, CUDA_R_64F, CUSPARSE_SPMV_CSR_ALG2, &dual_spmv_buffer_size)); + CUSPARSE_CHECK(cusparseSpMV_bufferSize(state->sparse_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &HOST_ONE, + state->matA, + state->vec_primal_sol, + &HOST_ZERO, + state->vec_primal_prod, + CUDA_R_64F, + CUSPARSE_SPMV_CSR_ALG2, + &primal_spmv_buffer_size)); + + CUSPARSE_CHECK(cusparseSpMV_bufferSize(state->sparse_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &HOST_ONE, + state->matAt, + state->vec_dual_sol, + &HOST_ZERO, + state->vec_dual_prod, + CUDA_R_64F, + CUSPARSE_SPMV_CSR_ALG2, + &dual_spmv_buffer_size)); CUDA_CHECK(cudaMalloc(&state->primal_spmv_buffer, primal_spmv_buffer_size)); - CUSPARSE_CHECK(cusparseSpMV_preprocess(state->sparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, - &HOST_ONE, state->matA, state->vec_primal_sol, &HOST_ZERO, state->vec_primal_prod, - CUDA_R_64F, CUSPARSE_SPMV_CSR_ALG2, state->primal_spmv_buffer)); + CUSPARSE_CHECK(cusparseSpMV_preprocess(state->sparse_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &HOST_ONE, + state->matA, + state->vec_primal_sol, + &HOST_ZERO, + state->vec_primal_prod, + CUDA_R_64F, + CUSPARSE_SPMV_CSR_ALG2, + state->primal_spmv_buffer)); CUDA_CHECK(cudaMalloc(&state->dual_spmv_buffer, dual_spmv_buffer_size)); - CUSPARSE_CHECK(cusparseSpMV_preprocess(state->sparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, - &HOST_ONE, state->matAt, state->vec_dual_sol, &HOST_ZERO, state->vec_dual_prod, - CUDA_R_64F, CUSPARSE_SPMV_CSR_ALG2, state->dual_spmv_buffer)); + CUSPARSE_CHECK(cusparseSpMV_preprocess(state->sparse_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &HOST_ONE, + state->matAt, + state->vec_dual_sol, + &HOST_ZERO, + state->vec_dual_prod, + CUDA_R_64F, + CUSPARSE_SPMV_CSR_ALG2, + state->dual_spmv_buffer)); CUDA_CHECK(cudaMalloc(&state->d_primal_step_size, sizeof(double))); CUDA_CHECK(cudaMalloc(&state->d_dual_step_size, sizeof(double))); @@ -582,23 +689,21 @@ static pdhg_solver_state_t *initialize_solver_state( CUDA_CHECK(cudaMemset(state->d_dual_step_size, 0, sizeof(double))); CUDA_CHECK(cudaMemset(state->d_inner_count, 0, sizeof(int))); - CUDA_CHECK( - cudaMalloc(&state->ones_primal_d, state->num_variables * sizeof(double))); - CUDA_CHECK( - cudaMalloc(&state->ones_dual_d, state->num_constraints * sizeof(double))); + CUDA_CHECK(cudaMalloc(&state->ones_primal_d, state->num_variables * sizeof(double))); + CUDA_CHECK(cudaMalloc(&state->ones_dual_d, state->num_constraints * sizeof(double))); double *ones_primal_h = (double *)safe_malloc(state->num_variables * sizeof(double)); for (int i = 0; i < state->num_variables; ++i) ones_primal_h[i] = 1.0; - CUDA_CHECK(cudaMemcpy(state->ones_primal_d, ones_primal_h, state->num_variables * sizeof(double), cudaMemcpyHostToDevice)); + CUDA_CHECK( + cudaMemcpy(state->ones_primal_d, ones_primal_h, state->num_variables * sizeof(double), cudaMemcpyHostToDevice)); free(ones_primal_h); double *ones_dual_h = (double *)safe_malloc(state->num_constraints * sizeof(double)); for (int i = 0; i < state->num_constraints; ++i) ones_dual_h[i] = 1.0; - CUDA_CHECK(cudaMemcpy(state->ones_dual_d, ones_dual_h, - state->num_constraints * sizeof(double), - cudaMemcpyHostToDevice)); + CUDA_CHECK( + cudaMemcpy(state->ones_dual_d, ones_dual_h, state->num_constraints * sizeof(double), cudaMemcpyHostToDevice)); // --- CUDA Graph Initialization --- CUDA_CHECK(cudaStreamCreate(&state->stream)); @@ -610,11 +715,22 @@ static pdhg_solver_state_t *initialize_solver_state( { printf("---------------------------------------------------------------------" "------------------\n"); - printf("%s | %s | %s | %s \n", " runtime ", " objective ", - " absolute residuals ", " relative residuals "); - printf("%s %s | %s %s | %s %s %s | %s %s %s \n", " iter", " time ", - " pr obj ", " du obj ", " pr res", " du res", " gap ", " pr res", - " du res", " gap "); + printf("%s | %s | %s | %s \n", + " runtime ", + " objective ", + " absolute residuals ", + " relative residuals "); + printf("%s %s | %s %s | %s %s %s | %s %s %s \n", + " iter", + " time ", + " pr obj ", + " du obj ", + " pr res", + " du res", + " gap ", + " pr res", + " du res", + " gap "); printf("---------------------------------------------------------------------" "------------------\n"); } @@ -622,9 +738,7 @@ static pdhg_solver_state_t *initialize_solver_state( return state; } -__global__ void build_row_ind(const int *__restrict__ row_ptr, - int num_rows, - int *__restrict__ row_ind) +__global__ void build_row_ind(const int *__restrict__ row_ptr, int num_rows, int *__restrict__ row_ind) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i >= num_rows) @@ -639,13 +753,12 @@ __global__ void build_row_ind(const int *__restrict__ row_ptr, } } -__global__ void build_transpose_map( - const int *__restrict__ A_row_ind, - const int *__restrict__ A_col_ind, - const int *__restrict__ At_row_ptr, - const int *__restrict__ At_col_ind, - int nnz, - int *__restrict__ A_to_At) +__global__ void build_transpose_map(const int *__restrict__ A_row_ind, + const int *__restrict__ A_col_ind, + const int *__restrict__ At_row_ptr, + const int *__restrict__ At_col_ind, + int nnz, + int *__restrict__ A_to_At) { int k = blockIdx.x * blockDim.x + threadIdx.x; if (k >= nnz) @@ -673,12 +786,11 @@ __global__ void build_transpose_map( A_to_At[k] = pos; } -__global__ void fill_finite_bounds_kernel( - const double *__restrict__ lb, - const double *__restrict__ ub, - double *__restrict__ lb_finite, - double *__restrict__ ub_finite, - int num_elements) +__global__ void fill_finite_bounds_kernel(const double *__restrict__ lb, + const double *__restrict__ ub, + double *__restrict__ lb_finite, + double *__restrict__ ub_finite, + int num_elements) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i >= num_elements) @@ -691,16 +803,18 @@ __global__ void fill_finite_bounds_kernel( ub_finite[i] = isfinite(Ui) ? Ui : 0.0; } -__global__ void compute_next_primal_solution_kernel( - double *__restrict__ current_primal, - double *__restrict__ reflected_primal, - const double *__restrict__ initial_primal, - const double *__restrict__ dual_product, - const double *__restrict__ objective, - const double *__restrict__ var_lb, const double *__restrict__ var_ub, - int n, const double *__restrict__ d_step_size, - const int *__restrict__ d_base_count, int k_offset, - double reflection_coeff) +__global__ void compute_next_primal_solution_kernel(double *__restrict__ current_primal, + double *__restrict__ reflected_primal, + const double *__restrict__ initial_primal, + const double *__restrict__ dual_product, + const double *__restrict__ objective, + const double *__restrict__ var_lb, + const double *__restrict__ var_ub, + int n, + const double *__restrict__ d_step_size, + const int *__restrict__ d_base_count, + int k_offset, + double reflection_coeff) { int i = blockIdx.x * blockDim.x + threadIdx.x; double step_size = *d_step_size; @@ -708,27 +822,28 @@ __global__ void compute_next_primal_solution_kernel( double weight = (double)(current_k) / (double)(current_k + 1); if (i < n) { - double temp = - current_primal[i] - step_size * (objective[i] - dual_product[i]); + double temp = current_primal[i] - step_size * (objective[i] - dual_product[i]); double temp_proj = fmax(var_lb[i], fmin(temp, var_ub[i])); reflected_primal[i] = 2.0 * temp_proj - current_primal[i]; - double reflected = reflection_coeff * reflected_primal[i] + - (1.0 - reflection_coeff) * current_primal[i]; + double reflected = reflection_coeff * reflected_primal[i] + (1.0 - reflection_coeff) * current_primal[i]; current_primal[i] = weight * reflected + (1.0 - weight) * initial_primal[i]; } } -__global__ void compute_next_primal_solution_major_kernel( - double *__restrict__ current_primal, double *__restrict__ pdhg_primal, - double *__restrict__ reflected_primal, - const double *__restrict__ initial_primal, - const double *__restrict__ dual_product, - const double *__restrict__ objective, - const double *__restrict__ var_lb, const double *__restrict__ var_ub, - int n, const double *__restrict__ d_step_size, - double *__restrict__ dual_slack, - const int *__restrict__ d_base_count, int k_offset, - double reflection_coeff) +__global__ void compute_next_primal_solution_major_kernel(double *__restrict__ current_primal, + double *__restrict__ pdhg_primal, + double *__restrict__ reflected_primal, + const double *__restrict__ initial_primal, + const double *__restrict__ dual_product, + const double *__restrict__ objective, + const double *__restrict__ var_lb, + const double *__restrict__ var_ub, + int n, + const double *__restrict__ d_step_size, + double *__restrict__ dual_slack, + const int *__restrict__ d_base_count, + int k_offset, + double reflection_coeff) { int i = blockIdx.x * blockDim.x + threadIdx.x; double step_size = *d_step_size; @@ -736,26 +851,25 @@ __global__ void compute_next_primal_solution_major_kernel( double weight = (double)(current_k) / (double)(current_k + 1); if (i < n) { - double temp = - current_primal[i] - step_size * (objective[i] - dual_product[i]); + double temp = current_primal[i] - step_size * (objective[i] - dual_product[i]); pdhg_primal[i] = fmax(var_lb[i], fmin(temp, var_ub[i])); dual_slack[i] = (pdhg_primal[i] - temp) / step_size; reflected_primal[i] = 2.0 * pdhg_primal[i] - current_primal[i]; - double reflected = reflection_coeff * reflected_primal[i] + - (1.0 - reflection_coeff) * current_primal[i]; + double reflected = reflection_coeff * reflected_primal[i] + (1.0 - reflection_coeff) * current_primal[i]; current_primal[i] = weight * reflected + (1.0 - weight) * initial_primal[i]; } } -__global__ void compute_next_dual_solution_kernel( - double *__restrict__ current_dual, - const double *__restrict__ initial_dual, - const double *__restrict__ primal_product, - const double *__restrict__ const_lb, - const double *__restrict__ const_ub, int n, - const double *__restrict__ d_step_size, - const int *__restrict__ d_base_count, int k_offset, - double reflection_coeff) +__global__ void compute_next_dual_solution_kernel(double *__restrict__ current_dual, + const double *__restrict__ initial_dual, + const double *__restrict__ primal_product, + const double *__restrict__ const_lb, + const double *__restrict__ const_ub, + int n, + const double *__restrict__ d_step_size, + const int *__restrict__ d_base_count, + int k_offset, + double reflection_coeff) { int i = blockIdx.x * blockDim.x + threadIdx.x; double step_size = *d_step_size; @@ -766,21 +880,23 @@ __global__ void compute_next_dual_solution_kernel( double temp = current_dual[i] / step_size - primal_product[i]; double temp_proj = fmax(-const_ub[i], fmin(temp, -const_lb[i])); double reflected = reflection_coeff * (2.0 * (temp - temp_proj) * step_size - current_dual[i]) + - (1.0 - reflection_coeff) * current_dual[i]; + (1.0 - reflection_coeff) * current_dual[i]; current_dual[i] = weight * reflected + (1.0 - weight) * initial_dual[i]; } } -__global__ void compute_next_dual_solution_major_kernel( - double *__restrict__ current_dual, double *__restrict__ pdhg_dual, - double *__restrict__ reflected_dual, - const double *__restrict__ initial_dual, - const double *__restrict__ primal_product, - const double *__restrict__ const_lb, - const double *__restrict__ const_ub, int n, - const double *__restrict__ d_step_size, - const int *__restrict__ d_base_count, int k_offset, - double reflection_coeff) +__global__ void compute_next_dual_solution_major_kernel(double *__restrict__ current_dual, + double *__restrict__ pdhg_dual, + double *__restrict__ reflected_dual, + const double *__restrict__ initial_dual, + const double *__restrict__ primal_product, + const double *__restrict__ const_lb, + const double *__restrict__ const_ub, + int n, + const double *__restrict__ d_step_size, + const int *__restrict__ d_base_count, + int k_offset, + double reflection_coeff) { int i = blockIdx.x * blockDim.x + threadIdx.x; double step_size = *d_step_size; @@ -792,41 +908,40 @@ __global__ void compute_next_dual_solution_major_kernel( double temp_proj = fmax(-const_ub[i], fmin(temp, -const_lb[i])); pdhg_dual[i] = (temp - temp_proj) * step_size; reflected_dual[i] = 2.0 * pdhg_dual[i] - current_dual[i]; - double reflected = reflection_coeff * reflected_dual[i] + - (1.0 - reflection_coeff) * current_dual[i]; + double reflected = reflection_coeff * reflected_dual[i] + (1.0 - reflection_coeff) * current_dual[i]; current_dual[i] = weight * reflected + (1.0 - weight) * initial_dual[i]; } } - __global__ void rescale_solution_kernel(double *__restrict__ primal_solution, double *__restrict__ dual_solution, const double *__restrict__ variable_rescaling, const double *__restrict__ constraint_rescaling, const double objective_vector_rescaling, const double constraint_bound_rescaling, - int n_vars, int n_cons) + int n_vars, + int n_cons) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < n_vars) { - primal_solution[i] = - primal_solution[i] / variable_rescaling[i] / constraint_bound_rescaling; + primal_solution[i] = primal_solution[i] / variable_rescaling[i] / constraint_bound_rescaling; } else if (i < n_vars + n_cons) { int idx = i - n_vars; - dual_solution[idx] = dual_solution[idx] / constraint_rescaling[idx] / - objective_vector_rescaling; + dual_solution[idx] = dual_solution[idx] / constraint_rescaling[idx] / objective_vector_rescaling; } } -__global__ void compute_delta_solution_kernel( - const double *__restrict__ initial_primal, - const double *__restrict__ pdhg_primal, double *__restrict__ delta_primal, - const double *__restrict__ initial_dual, - const double *__restrict__ pdhg_dual, double *__restrict__ delta_dual, - int n_vars, int n_cons) +__global__ void compute_delta_solution_kernel(const double *__restrict__ initial_primal, + const double *__restrict__ pdhg_primal, + double *__restrict__ delta_primal, + const double *__restrict__ initial_dual, + const double *__restrict__ pdhg_dual, + double *__restrict__ delta_dual, + int n_vars, + int n_cons) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < n_vars) @@ -840,110 +955,145 @@ __global__ void compute_delta_solution_kernel( } } -static void compute_next_primal_solution(pdhg_solver_state_t *state, const int k_offset, const double reflection_coefficient, bool is_major) +static void compute_next_primal_solution(pdhg_solver_state_t *state, + const int k_offset, + const double reflection_coefficient, + bool is_major) { - CUSPARSE_CHECK(cusparseDnVecSetValues(state->vec_dual_sol, - state->current_dual_solution)); - CUSPARSE_CHECK( - cusparseDnVecSetValues(state->vec_dual_prod, state->dual_product)); - - CUSPARSE_CHECK(cusparseSpMV( - state->sparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &HOST_ONE, - state->matAt, state->vec_dual_sol, &HOST_ZERO, state->vec_dual_prod, - CUDA_R_64F, CUSPARSE_SPMV_CSR_ALG2, state->dual_spmv_buffer)); + CUSPARSE_CHECK(cusparseDnVecSetValues(state->vec_dual_sol, state->current_dual_solution)); + CUSPARSE_CHECK(cusparseDnVecSetValues(state->vec_dual_prod, state->dual_product)); + + CUSPARSE_CHECK(cusparseSpMV(state->sparse_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &HOST_ONE, + state->matAt, + state->vec_dual_sol, + &HOST_ZERO, + state->vec_dual_prod, + CUDA_R_64F, + CUSPARSE_SPMV_CSR_ALG2, + state->dual_spmv_buffer)); double step = state->step_size / state->primal_weight; if (is_major) { - compute_next_primal_solution_major_kernel<<num_blocks_primal, - THREADS_PER_BLOCK, 0, state->stream>>>( - state->current_primal_solution, state->pdhg_primal_solution, - state->reflected_primal_solution, state->initial_primal_solution, state->dual_product, - state->objective_vector, state->variable_lower_bound, - state->variable_upper_bound, state->num_variables, state->d_primal_step_size, - state->dual_slack, state->d_inner_count, k_offset, reflection_coefficient); + compute_next_primal_solution_major_kernel<<num_blocks_primal, THREADS_PER_BLOCK, 0, state->stream>>>( + state->current_primal_solution, + state->pdhg_primal_solution, + state->reflected_primal_solution, + state->initial_primal_solution, + state->dual_product, + state->objective_vector, + state->variable_lower_bound, + state->variable_upper_bound, + state->num_variables, + state->d_primal_step_size, + state->dual_slack, + state->d_inner_count, + k_offset, + reflection_coefficient); } else { - compute_next_primal_solution_kernel<<num_blocks_primal, - THREADS_PER_BLOCK, 0, state->stream>>>( - state->current_primal_solution, state->reflected_primal_solution, state->initial_primal_solution, - state->dual_product, state->objective_vector, - state->variable_lower_bound, state->variable_upper_bound, - state->num_variables, state->d_primal_step_size, - state->d_inner_count, k_offset, reflection_coefficient - ); + compute_next_primal_solution_kernel<<num_blocks_primal, THREADS_PER_BLOCK, 0, state->stream>>>( + state->current_primal_solution, + state->reflected_primal_solution, + state->initial_primal_solution, + state->dual_product, + state->objective_vector, + state->variable_lower_bound, + state->variable_upper_bound, + state->num_variables, + state->d_primal_step_size, + state->d_inner_count, + k_offset, + reflection_coefficient); } } -static void compute_next_dual_solution(pdhg_solver_state_t *state,const int k_offset, const double reflection_coefficient, bool is_major) +static void compute_next_dual_solution(pdhg_solver_state_t *state, + const int k_offset, + const double reflection_coefficient, + bool is_major) { - CUSPARSE_CHECK(cusparseDnVecSetValues(state->vec_primal_sol, - state->reflected_primal_solution)); - CUSPARSE_CHECK( - cusparseDnVecSetValues(state->vec_primal_prod, state->primal_product)); - - CUSPARSE_CHECK(cusparseSpMV( - state->sparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &HOST_ONE, - state->matA, state->vec_primal_sol, &HOST_ZERO, state->vec_primal_prod, - CUDA_R_64F, CUSPARSE_SPMV_CSR_ALG2, state->primal_spmv_buffer)); + CUSPARSE_CHECK(cusparseDnVecSetValues(state->vec_primal_sol, state->reflected_primal_solution)); + CUSPARSE_CHECK(cusparseDnVecSetValues(state->vec_primal_prod, state->primal_product)); + + CUSPARSE_CHECK(cusparseSpMV(state->sparse_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &HOST_ONE, + state->matA, + state->vec_primal_sol, + &HOST_ZERO, + state->vec_primal_prod, + CUDA_R_64F, + CUSPARSE_SPMV_CSR_ALG2, + state->primal_spmv_buffer)); double step = state->step_size * state->primal_weight; if (is_major) { - compute_next_dual_solution_major_kernel<<num_blocks_dual, - THREADS_PER_BLOCK, 0, state->stream>>>( - state->current_dual_solution, state->pdhg_dual_solution, - state->reflected_dual_solution, state->initial_dual_solution, state->primal_product, - state->constraint_lower_bound, state->constraint_upper_bound, - state->num_constraints, state->d_dual_step_size, - state->d_inner_count, k_offset, reflection_coefficient - ); + compute_next_dual_solution_major_kernel<<num_blocks_dual, THREADS_PER_BLOCK, 0, state->stream>>>( + state->current_dual_solution, + state->pdhg_dual_solution, + state->reflected_dual_solution, + state->initial_dual_solution, + state->primal_product, + state->constraint_lower_bound, + state->constraint_upper_bound, + state->num_constraints, + state->d_dual_step_size, + state->d_inner_count, + k_offset, + reflection_coefficient); } else { - compute_next_dual_solution_kernel<<num_blocks_dual, - THREADS_PER_BLOCK, 0, state->stream>>>( - state->current_dual_solution, state->initial_dual_solution, - state->primal_product, state->constraint_lower_bound, - state->constraint_upper_bound, state->num_constraints, state->d_dual_step_size, - state->d_inner_count, k_offset, reflection_coefficient); + compute_next_dual_solution_kernel<<num_blocks_dual, THREADS_PER_BLOCK, 0, state->stream>>>( + state->current_dual_solution, + state->initial_dual_solution, + state->primal_product, + state->constraint_lower_bound, + state->constraint_upper_bound, + state->num_constraints, + state->d_dual_step_size, + state->d_inner_count, + k_offset, + reflection_coefficient); } } -static void perform_restart(pdhg_solver_state_t *state, - const pdhg_parameters_t *params) +static void perform_restart(pdhg_solver_state_t *state, const pdhg_parameters_t *params) { - compute_delta_solution_kernel<<num_blocks_primal_dual, - THREADS_PER_BLOCK, 0, state->stream>>>( - state->initial_primal_solution, state->pdhg_primal_solution, - state->delta_primal_solution, state->initial_dual_solution, - state->pdhg_dual_solution, state->delta_dual_solution, - state->num_variables, state->num_constraints); + compute_delta_solution_kernel<<num_blocks_primal_dual, THREADS_PER_BLOCK, 0, state->stream>>>( + state->initial_primal_solution, + state->pdhg_primal_solution, + state->delta_primal_solution, + state->initial_dual_solution, + state->pdhg_dual_solution, + state->delta_dual_solution, + state->num_variables, + state->num_constraints); double primal_dist, dual_dist; - CUBLAS_CHECK(cublasDnrm2_v2_64(state->blas_handle, state->num_variables, - state->delta_primal_solution, 1, - &primal_dist)); - CUBLAS_CHECK(cublasDnrm2_v2_64(state->blas_handle, state->num_constraints, - state->delta_dual_solution, 1, &dual_dist)); + CUBLAS_CHECK( + cublasDnrm2_v2_64(state->blas_handle, state->num_variables, state->delta_primal_solution, 1, &primal_dist)); + CUBLAS_CHECK( + cublasDnrm2_v2_64(state->blas_handle, state->num_constraints, state->delta_dual_solution, 1, &dual_dist)); - double ratio_infeas = - state->relative_dual_residual / state->relative_primal_residual; + double ratio_infeas = state->relative_dual_residual / state->relative_primal_residual; - if (primal_dist > 1e-16 && dual_dist > 1e-16 && primal_dist < 1e12 && - dual_dist < 1e12 && ratio_infeas > 1e-8 && ratio_infeas < 1e8) + if (primal_dist > 1e-16 && dual_dist > 1e-16 && primal_dist < 1e12 && dual_dist < 1e12 && ratio_infeas > 1e-8 && + ratio_infeas < 1e8) { - double error = - log(dual_dist) - log(primal_dist) - log(state->primal_weight); + double error = log(dual_dist) - log(primal_dist) - log(state->primal_weight); state->primal_weight_error_sum *= params->restart_params.i_smooth; state->primal_weight_error_sum += error; double delta_error = error - state->primal_weight_last_error; state->primal_weight *= - exp(params->restart_params.k_p * error + - params->restart_params.k_i * state->primal_weight_error_sum + + exp(params->restart_params.k_p * error + params->restart_params.k_i * state->primal_weight_error_sum + params->restart_params.k_d * delta_error); state->primal_weight_last_error = error; } @@ -954,24 +1104,27 @@ static void perform_restart(pdhg_solver_state_t *state, state->primal_weight_last_error = 0.0; } - double primal_dual_residual_gap = fabs( - log10(state->relative_dual_residual / state->relative_primal_residual)); + double primal_dual_residual_gap = fabs(log10(state->relative_dual_residual / state->relative_primal_residual)); if (primal_dual_residual_gap < state->best_primal_dual_residual_gap) { state->best_primal_dual_residual_gap = primal_dual_residual_gap; state->best_primal_weight = state->primal_weight; } - CUDA_CHECK(cudaMemcpy( - state->initial_primal_solution, state->pdhg_primal_solution, - state->num_variables * sizeof(double), cudaMemcpyDeviceToDevice)); - CUDA_CHECK(cudaMemcpy( - state->current_primal_solution, state->pdhg_primal_solution, - state->num_variables * sizeof(double), cudaMemcpyDeviceToDevice)); - CUDA_CHECK(cudaMemcpy(state->initial_dual_solution, state->pdhg_dual_solution, + CUDA_CHECK(cudaMemcpy(state->initial_primal_solution, + state->pdhg_primal_solution, + state->num_variables * sizeof(double), + cudaMemcpyDeviceToDevice)); + CUDA_CHECK(cudaMemcpy(state->current_primal_solution, + state->pdhg_primal_solution, + state->num_variables * sizeof(double), + cudaMemcpyDeviceToDevice)); + CUDA_CHECK(cudaMemcpy(state->initial_dual_solution, + state->pdhg_dual_solution, state->num_constraints * sizeof(double), cudaMemcpyDeviceToDevice)); - CUDA_CHECK(cudaMemcpy(state->current_dual_solution, state->pdhg_dual_solution, + CUDA_CHECK(cudaMemcpy(state->current_dual_solution, + state->pdhg_dual_solution, state->num_constraints * sizeof(double), cudaMemcpyDeviceToDevice)); @@ -979,8 +1132,7 @@ static void perform_restart(pdhg_solver_state_t *state, state->last_trial_fixed_point_error = INFINITY; } -static void initialize_step_size_and_primal_weight(pdhg_solver_state_t *state, - const pdhg_parameters_t *params) +static void initialize_step_size_and_primal_weight(pdhg_solver_state_t *state, const pdhg_parameters_t *params) { if (state->constraint_matrix->num_nonzeros == 0) { @@ -988,9 +1140,12 @@ static void initialize_step_size_and_primal_weight(pdhg_solver_state_t *state, } else { - double max_sv = estimate_maximum_singular_value( - state->sparse_handle, state->blas_handle, state->constraint_matrix, - state->constraint_matrix_t, params->sv_max_iter, params->sv_tol); + double max_sv = estimate_maximum_singular_value(state->sparse_handle, + state->blas_handle, + state->constraint_matrix, + state->constraint_matrix_t, + params->sv_max_iter, + params->sv_tol); state->step_size = 0.998 / max_sv; } @@ -1000,30 +1155,36 @@ static void initialize_step_size_and_primal_weight(pdhg_solver_state_t *state, } else { - state->primal_weight = (state->objective_vector_norm + 1.0) / - (state->constraint_bound_norm + 1.0); + state->primal_weight = (state->objective_vector_norm + 1.0) / (state->constraint_bound_norm + 1.0); } state->best_primal_weight = state->primal_weight; } static void compute_fixed_point_error(pdhg_solver_state_t *state) { - compute_delta_solution_kernel<<num_blocks_primal_dual, - THREADS_PER_BLOCK, 0, state->stream>>>( - state->pdhg_primal_solution, state->reflected_primal_solution, - state->delta_primal_solution, state->pdhg_dual_solution, - state->reflected_dual_solution, state->delta_dual_solution, - state->num_variables, state->num_constraints); + compute_delta_solution_kernel<<num_blocks_primal_dual, THREADS_PER_BLOCK, 0, state->stream>>>( + state->pdhg_primal_solution, + state->reflected_primal_solution, + state->delta_primal_solution, + state->pdhg_dual_solution, + state->reflected_dual_solution, + state->delta_dual_solution, + state->num_variables, + state->num_constraints); - CUSPARSE_CHECK( - cusparseDnVecSetValues(state->vec_dual_sol, state->delta_dual_solution)); - CUSPARSE_CHECK( - cusparseDnVecSetValues(state->vec_dual_prod, state->dual_product)); + CUSPARSE_CHECK(cusparseDnVecSetValues(state->vec_dual_sol, state->delta_dual_solution)); + CUSPARSE_CHECK(cusparseDnVecSetValues(state->vec_dual_prod, state->dual_product)); - CUSPARSE_CHECK(cusparseSpMV( - state->sparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &HOST_ONE, - state->matAt, state->vec_dual_sol, &HOST_ZERO, state->vec_dual_prod, - CUDA_R_64F, CUSPARSE_SPMV_CSR_ALG2, state->dual_spmv_buffer)); + CUSPARSE_CHECK(cusparseSpMV(state->sparse_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &HOST_ONE, + state->matAt, + state->vec_dual_sol, + &HOST_ZERO, + state->vec_dual_prod, + CUDA_R_64F, + CUSPARSE_SPMV_CSR_ALG2, + state->dual_spmv_buffer)); double interaction, movement; @@ -1031,17 +1192,19 @@ static void compute_fixed_point_error(pdhg_solver_state_t *state) double dual_norm = 0.0; double cross_term = 0.0; - CUBLAS_CHECK(cublasDnrm2_v2_64(state->blas_handle, state->num_constraints, - state->delta_dual_solution, 1, &dual_norm)); - CUBLAS_CHECK(cublasDnrm2_v2_64(state->blas_handle, state->num_variables, - state->delta_primal_solution, 1, - &primal_norm)); - movement = primal_norm * primal_norm * state->primal_weight + - dual_norm * dual_norm / state->primal_weight; - - CUBLAS_CHECK(cublasDdot(state->blas_handle, state->num_variables, - state->dual_product, 1, state->delta_primal_solution, - 1, &cross_term)); + CUBLAS_CHECK( + cublasDnrm2_v2_64(state->blas_handle, state->num_constraints, state->delta_dual_solution, 1, &dual_norm)); + CUBLAS_CHECK( + cublasDnrm2_v2_64(state->blas_handle, state->num_variables, state->delta_primal_solution, 1, &primal_norm)); + movement = primal_norm * primal_norm * state->primal_weight + dual_norm * dual_norm / state->primal_weight; + + CUBLAS_CHECK(cublasDdot(state->blas_handle, + state->num_variables, + state->dual_product, + 1, + state->delta_primal_solution, + 1, + &cross_term)); interaction = 2 * state->step_size * cross_term; state->fixed_point_error = sqrt(movement + interaction); @@ -1183,19 +1346,22 @@ void rescale_info_free(rescale_info_t *info) static cupdlpx_result_t *create_result_from_state(pdhg_solver_state_t *state, const lp_problem_t *original_problem) { - cupdlpx_result_t *results = - (cupdlpx_result_t *)safe_calloc(1, sizeof(cupdlpx_result_t)); + cupdlpx_result_t *results = (cupdlpx_result_t *)safe_calloc(1, sizeof(cupdlpx_result_t)); // Compute reduced cost - CUSPARSE_CHECK(cusparseDnVecSetValues(state->vec_dual_sol, - state->pdhg_dual_solution)); - CUSPARSE_CHECK(cusparseDnVecSetValues(state->vec_dual_prod, - state->dual_product)); - - CUSPARSE_CHECK(cusparseSpMV( - state->sparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &HOST_ONE, - state->matAt, state->vec_dual_sol, &HOST_ZERO, state->vec_dual_prod, - CUDA_R_64F, CUSPARSE_SPMV_CSR_ALG2, state->dual_spmv_buffer)); + CUSPARSE_CHECK(cusparseDnVecSetValues(state->vec_dual_sol, state->pdhg_dual_solution)); + CUSPARSE_CHECK(cusparseDnVecSetValues(state->vec_dual_prod, state->dual_product)); + + CUSPARSE_CHECK(cusparseSpMV(state->sparse_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &HOST_ONE, + state->matAt, + state->vec_dual_sol, + &HOST_ZERO, + state->vec_dual_prod, + CUDA_R_64F, + CUSPARSE_SPMV_CSR_ALG2, + state->dual_spmv_buffer)); compute_and_rescale_reduced_cost_kernel<<num_blocks_primal, THREADS_PER_BLOCK, 0, state->stream>>>( state->dual_slack, @@ -1209,27 +1375,29 @@ static cupdlpx_result_t *create_result_from_state(pdhg_solver_state_t *state, co state->num_variables); rescale_solution_kernel<<num_blocks_primal_dual, THREADS_PER_BLOCK, 0, state->stream>>>( - state->pdhg_primal_solution, state->pdhg_dual_solution, - state->variable_rescaling, state->constraint_rescaling, - state->objective_vector_rescaling, state->constraint_bound_rescaling, - state->num_variables, state->num_constraints); - - results->primal_solution = - (double *)safe_malloc(state->num_variables * sizeof(double)); - results->dual_solution = - (double *)safe_malloc(state->num_constraints * sizeof(double)); - results->reduced_cost = - (double *)safe_malloc(state->num_variables * sizeof(double)); - - CUDA_CHECK(cudaMemcpy(results->primal_solution, state->pdhg_primal_solution, + state->pdhg_primal_solution, + state->pdhg_dual_solution, + state->variable_rescaling, + state->constraint_rescaling, + state->objective_vector_rescaling, + state->constraint_bound_rescaling, + state->num_variables, + state->num_constraints); + + results->primal_solution = (double *)safe_malloc(state->num_variables * sizeof(double)); + results->dual_solution = (double *)safe_malloc(state->num_constraints * sizeof(double)); + results->reduced_cost = (double *)safe_malloc(state->num_variables * sizeof(double)); + + CUDA_CHECK(cudaMemcpy(results->primal_solution, + state->pdhg_primal_solution, state->num_variables * sizeof(double), cudaMemcpyDeviceToHost)); - CUDA_CHECK(cudaMemcpy(results->dual_solution, state->pdhg_dual_solution, + CUDA_CHECK(cudaMemcpy(results->dual_solution, + state->pdhg_dual_solution, state->num_constraints * sizeof(double), cudaMemcpyDeviceToHost)); - CUDA_CHECK(cudaMemcpy(results->reduced_cost, state->dual_slack, - state->num_variables * sizeof(double), - cudaMemcpyDeviceToHost)); + CUDA_CHECK(cudaMemcpy( + results->reduced_cost, state->dual_slack, state->num_variables * sizeof(double), cudaMemcpyDeviceToHost)); results->num_variables = original_problem->num_variables; results->num_constraints = original_problem->num_constraints; @@ -1287,9 +1455,10 @@ void feasibility_polish(const pdhg_parameters_t *params, pdhg_solver_state_t *st if (primal_state->termination_reason == TERMINATION_REASON_FEAS_POLISH_SUCCESS) { - CUDA_CHECK(cudaMemcpy( - state->pdhg_primal_solution, primal_state->pdhg_primal_solution, - state->num_variables * sizeof(double), cudaMemcpyDeviceToDevice)); + CUDA_CHECK(cudaMemcpy(state->pdhg_primal_solution, + primal_state->pdhg_primal_solution, + state->num_variables * sizeof(double), + cudaMemcpyDeviceToDevice)); state->absolute_primal_residual = primal_state->absolute_primal_residual; state->relative_primal_residual = primal_state->relative_primal_residual; state->primal_objective_value = primal_state->primal_objective_value; @@ -1304,9 +1473,10 @@ void feasibility_polish(const pdhg_parameters_t *params, pdhg_solver_state_t *st if (dual_state->termination_reason == TERMINATION_REASON_FEAS_POLISH_SUCCESS) { - CUDA_CHECK(cudaMemcpy( - state->pdhg_dual_solution, dual_state->pdhg_dual_solution, - state->num_constraints * sizeof(double), cudaMemcpyDeviceToDevice)); + CUDA_CHECK(cudaMemcpy(state->pdhg_dual_solution, + dual_state->pdhg_dual_solution, + state->num_constraints * sizeof(double), + cudaMemcpyDeviceToDevice)); state->absolute_dual_residual = dual_state->absolute_dual_residual; state->relative_dual_residual = dual_state->relative_dual_residual; state->dual_objective_value = dual_state->dual_objective_value; @@ -1314,7 +1484,8 @@ void feasibility_polish(const pdhg_parameters_t *params, pdhg_solver_state_t *st state->feasibility_iteration += dual_state->total_count - 1; state->objective_gap = fabs(state->primal_objective_value - state->dual_objective_value); - state->relative_objective_gap = state->objective_gap / (1.0 + fabs(state->primal_objective_value) + fabs(state->dual_objective_value)); + state->relative_objective_gap = + state->objective_gap / (1.0 + fabs(state->primal_objective_value) + fabs(state->dual_objective_value)); // FINAL LOGGING pdhg_feas_polish_final_log(primal_state, dual_state, params->verbose); @@ -1325,7 +1496,9 @@ void feasibility_polish(const pdhg_parameters_t *params, pdhg_solver_state_t *st return; } -void primal_feasibility_polish(const pdhg_parameters_t *params, pdhg_solver_state_t *state, const pdhg_solver_state_t *ori_state) +void primal_feasibility_polish(const pdhg_parameters_t *params, + pdhg_solver_state_t *state, + const pdhg_solver_state_t *ori_state) { print_initial_feas_polish_info(true, params); bool do_restart = false; @@ -1356,8 +1529,10 @@ void primal_feasibility_polish(const pdhg_parameters_t *params, pdhg_solver_stat compute_next_dual_solution(state, i, params->reflection_coefficient, false); } - compute_next_primal_solution(state, params->termination_evaluation_frequency, params->reflection_coefficient, true); - compute_next_dual_solution(state, params->termination_evaluation_frequency, params->reflection_coefficient, true); + compute_next_primal_solution( + state, params->termination_evaluation_frequency, params->reflection_coefficient, true); + compute_next_dual_solution( + state, params->termination_evaluation_frequency, params->reflection_coefficient, true); // end CUDA graph capture cudaGraph_t graph; @@ -1378,10 +1553,10 @@ void primal_feasibility_polish(const pdhg_parameters_t *params, pdhg_solver_stat { display_feas_polish_iteration_stats(state, params->verbose, true); } - + // Check Adaptive Restart - do_restart = should_do_adaptive_restart(state, ¶ms->restart_params, - params->termination_evaluation_frequency); + do_restart = + should_do_adaptive_restart(state, ¶ms->restart_params, params->termination_evaluation_frequency); if (do_restart) { perform_primal_restart(state); @@ -1396,7 +1571,9 @@ void primal_feasibility_polish(const pdhg_parameters_t *params, pdhg_solver_stat return; } -void dual_feasibility_polish(const pdhg_parameters_t *params, pdhg_solver_state_t *state, const pdhg_solver_state_t *ori_state) +void dual_feasibility_polish(const pdhg_parameters_t *params, + pdhg_solver_state_t *state, + const pdhg_solver_state_t *ori_state) { print_initial_feas_polish_info(false, params); bool do_restart = false; @@ -1427,8 +1604,10 @@ void dual_feasibility_polish(const pdhg_parameters_t *params, pdhg_solver_state_ compute_next_dual_solution(state, i, params->reflection_coefficient, false); } - compute_next_primal_solution(state, params->termination_evaluation_frequency, params->reflection_coefficient, true); - compute_next_dual_solution(state, params->termination_evaluation_frequency, params->reflection_coefficient, true); + compute_next_primal_solution( + state, params->termination_evaluation_frequency, params->reflection_coefficient, true); + compute_next_dual_solution( + state, params->termination_evaluation_frequency, params->reflection_coefficient, true); // end CUDA graph capture cudaGraph_t graph; @@ -1449,10 +1628,10 @@ void dual_feasibility_polish(const pdhg_parameters_t *params, pdhg_solver_state_ { display_feas_polish_iteration_stats(state, params->verbose, false); } - + // Check Adaptive Restart - do_restart = should_do_adaptive_restart(state, ¶ms->restart_params, - params->termination_evaluation_frequency); + do_restart = + should_do_adaptive_restart(state, ¶ms->restart_params, params->termination_evaluation_frequency); if (do_restart) { perform_dual_restart(state); @@ -1467,31 +1646,34 @@ void dual_feasibility_polish(const pdhg_parameters_t *params, pdhg_solver_state_ return; } -static pdhg_solver_state_t *initialize_primal_feas_polish_state( - const pdhg_solver_state_t *original_state) +static pdhg_solver_state_t *initialize_primal_feas_polish_state(const pdhg_solver_state_t *original_state) { pdhg_solver_state_t *primal_state = (pdhg_solver_state_t *)safe_malloc(sizeof(pdhg_solver_state_t)); *primal_state = *original_state; int num_var = original_state->num_variables; int num_cons = original_state->num_constraints; -#define ALLOC_ZERO(dest, bytes) \ - CUDA_CHECK(cudaMalloc(&dest, bytes)); \ +#define ALLOC_ZERO(dest, bytes) \ + CUDA_CHECK(cudaMalloc(&dest, bytes)); \ CUDA_CHECK(cudaMemset(dest, 0, bytes)); // RESET PROBLEM TO FEASIBILITY PROBLEM ALLOC_ZERO(primal_state->objective_vector, num_var * sizeof(double)); primal_state->objective_constant = 0.0; -#define ALLOC_AND_COPY_DEV(dest, src, bytes) \ - CUDA_CHECK(cudaMalloc(&dest, bytes)); \ +#define ALLOC_AND_COPY_DEV(dest, src, bytes) \ + CUDA_CHECK(cudaMalloc(&dest, bytes)); \ CUDA_CHECK(cudaMemcpy(dest, src, bytes, cudaMemcpyDeviceToDevice)); // ALLOCATE AND COPY SOLUTION VECTORS - ALLOC_AND_COPY_DEV(primal_state->initial_primal_solution, original_state->initial_primal_solution, num_var * sizeof(double)); - ALLOC_AND_COPY_DEV(primal_state->current_primal_solution, original_state->current_primal_solution, num_var * sizeof(double)); - ALLOC_AND_COPY_DEV(primal_state->pdhg_primal_solution, original_state->pdhg_primal_solution, num_var * sizeof(double)); - ALLOC_AND_COPY_DEV(primal_state->reflected_primal_solution, original_state->reflected_primal_solution, num_var * sizeof(double)); + ALLOC_AND_COPY_DEV( + primal_state->initial_primal_solution, original_state->initial_primal_solution, num_var * sizeof(double)); + ALLOC_AND_COPY_DEV( + primal_state->current_primal_solution, original_state->current_primal_solution, num_var * sizeof(double)); + ALLOC_AND_COPY_DEV( + primal_state->pdhg_primal_solution, original_state->pdhg_primal_solution, num_var * sizeof(double)); + ALLOC_AND_COPY_DEV( + primal_state->reflected_primal_solution, original_state->reflected_primal_solution, num_var * sizeof(double)); ALLOC_AND_COPY_DEV(primal_state->primal_product, original_state->primal_product, num_cons * sizeof(double)); // ALLOC ZERO FOR OTHERS @@ -1536,11 +1718,11 @@ static pdhg_solver_state_t *initialize_primal_feas_polish_state( void primal_feas_polish_state_free(pdhg_solver_state_t *state) { -#define SAFE_CUDA_FREE(p) \ - if ((p) != NULL) \ - { \ - CUDA_CHECK(cudaFree(p)); \ - (p) = NULL; \ +#define SAFE_CUDA_FREE(p) \ + if ((p) != NULL) \ + { \ + CUDA_CHECK(cudaFree(p)); \ + (p) = NULL; \ } if (!state) @@ -1565,8 +1747,7 @@ void primal_feas_polish_state_free(pdhg_solver_state_t *state) free(state); } -__global__ void zero_finite_value_vectors_kernel( - double *__restrict__ vec, int n) +__global__ void zero_finite_value_vectors_kernel(double *__restrict__ vec, int n) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < n) @@ -1576,39 +1757,42 @@ __global__ void zero_finite_value_vectors_kernel( } } -static pdhg_solver_state_t *initialize_dual_feas_polish_state( - const pdhg_solver_state_t *original_state) +static pdhg_solver_state_t *initialize_dual_feas_polish_state(const pdhg_solver_state_t *original_state) { pdhg_solver_state_t *dual_state = (pdhg_solver_state_t *)safe_malloc(sizeof(pdhg_solver_state_t)); *dual_state = *original_state; int num_var = original_state->num_variables; int num_cons = original_state->num_constraints; -#define ALLOC_AND_COPY_DEV(dest, src, bytes) \ - CUDA_CHECK(cudaMalloc(&dest, bytes)); \ +#define ALLOC_AND_COPY_DEV(dest, src, bytes) \ + CUDA_CHECK(cudaMalloc(&dest, bytes)); \ CUDA_CHECK(cudaMemcpy(dest, src, bytes, cudaMemcpyDeviceToDevice)); // RESET PROBLEM TO DUAL FEASIBILITY PROBLEM -#define SET_FINITE_TO_ZERO(vec, n) \ - { \ - int threads = 256; \ - int blocks = (n + threads - 1) / threads; \ - zero_finite_value_vectors_kernel<<>>(vec, n); \ - CUDA_CHECK(cudaDeviceSynchronize()); \ +#define SET_FINITE_TO_ZERO(vec, n) \ + { \ + int threads = 256; \ + int blocks = (n + threads - 1) / threads; \ + zero_finite_value_vectors_kernel<<>>(vec, n); \ + CUDA_CHECK(cudaDeviceSynchronize()); \ } - ALLOC_AND_COPY_DEV(dual_state->constraint_lower_bound, original_state->constraint_lower_bound, num_cons * sizeof(double)); - ALLOC_AND_COPY_DEV(dual_state->constraint_upper_bound, original_state->constraint_upper_bound, num_cons * sizeof(double)); - ALLOC_AND_COPY_DEV(dual_state->variable_lower_bound, original_state->variable_lower_bound, num_var * sizeof(double)); - ALLOC_AND_COPY_DEV(dual_state->variable_upper_bound, original_state->variable_upper_bound, num_var * sizeof(double)); + ALLOC_AND_COPY_DEV( + dual_state->constraint_lower_bound, original_state->constraint_lower_bound, num_cons * sizeof(double)); + ALLOC_AND_COPY_DEV( + dual_state->constraint_upper_bound, original_state->constraint_upper_bound, num_cons * sizeof(double)); + ALLOC_AND_COPY_DEV( + dual_state->variable_lower_bound, original_state->variable_lower_bound, num_var * sizeof(double)); + ALLOC_AND_COPY_DEV( + dual_state->variable_upper_bound, original_state->variable_upper_bound, num_var * sizeof(double)); SET_FINITE_TO_ZERO(dual_state->constraint_lower_bound, num_cons); SET_FINITE_TO_ZERO(dual_state->constraint_upper_bound, num_cons); SET_FINITE_TO_ZERO(dual_state->variable_lower_bound, num_var); SET_FINITE_TO_ZERO(dual_state->variable_upper_bound, num_var); -#define ALLOC_ZERO(dest, bytes) \ - CUDA_CHECK(cudaMalloc(&dest, bytes)); \ +#define ALLOC_ZERO(dest, bytes) \ + CUDA_CHECK(cudaMalloc(&dest, bytes)); \ CUDA_CHECK(cudaMemset(dest, 0, bytes)); ALLOC_ZERO(dual_state->constraint_lower_bound_finite_val, num_cons * sizeof(double)); @@ -1617,10 +1801,13 @@ static pdhg_solver_state_t *initialize_dual_feas_polish_state( ALLOC_ZERO(dual_state->variable_upper_bound_finite_val, num_var * sizeof(double)); // ALLOCATE AND COPY SOLUTION VECTORS - ALLOC_AND_COPY_DEV(dual_state->initial_dual_solution, original_state->initial_dual_solution, num_cons * sizeof(double)); - ALLOC_AND_COPY_DEV(dual_state->current_dual_solution, original_state->current_dual_solution, num_cons * sizeof(double)); + ALLOC_AND_COPY_DEV( + dual_state->initial_dual_solution, original_state->initial_dual_solution, num_cons * sizeof(double)); + ALLOC_AND_COPY_DEV( + dual_state->current_dual_solution, original_state->current_dual_solution, num_cons * sizeof(double)); ALLOC_AND_COPY_DEV(dual_state->pdhg_dual_solution, original_state->pdhg_dual_solution, num_cons * sizeof(double)); - ALLOC_AND_COPY_DEV(dual_state->reflected_dual_solution, original_state->reflected_dual_solution, num_cons * sizeof(double)); + ALLOC_AND_COPY_DEV( + dual_state->reflected_dual_solution, original_state->reflected_dual_solution, num_cons * sizeof(double)); ALLOC_AND_COPY_DEV(dual_state->dual_product, original_state->dual_product, num_var * sizeof(double)); ALLOC_AND_COPY_DEV(dual_state->dual_slack, original_state->dual_slack, num_var * sizeof(double)); @@ -1663,11 +1850,11 @@ static pdhg_solver_state_t *initialize_dual_feas_polish_state( void dual_feas_polish_state_free(pdhg_solver_state_t *state) { -#define SAFE_CUDA_FREE(p) \ - if ((p) != NULL) \ - { \ - CUDA_CHECK(cudaFree(p)); \ - (p) = NULL; \ +#define SAFE_CUDA_FREE(p) \ + if ((p) != NULL) \ + { \ + CUDA_CHECK(cudaFree(p)); \ + (p) = NULL; \ } if (!state) @@ -1704,24 +1891,36 @@ void dual_feas_polish_state_free(pdhg_solver_state_t *state) static void perform_primal_restart(pdhg_solver_state_t *state) { - CUDA_CHECK(cudaMemcpy(state->initial_primal_solution, state->pdhg_primal_solution, state->num_variables * sizeof(double), cudaMemcpyDeviceToDevice)); - CUDA_CHECK(cudaMemcpy(state->current_primal_solution, state->pdhg_primal_solution, state->num_variables * sizeof(double), cudaMemcpyDeviceToDevice)); + CUDA_CHECK(cudaMemcpy(state->initial_primal_solution, + state->pdhg_primal_solution, + state->num_variables * sizeof(double), + cudaMemcpyDeviceToDevice)); + CUDA_CHECK(cudaMemcpy(state->current_primal_solution, + state->pdhg_primal_solution, + state->num_variables * sizeof(double), + cudaMemcpyDeviceToDevice)); state->inner_count = 0; state->last_trial_fixed_point_error = INFINITY; } static void perform_dual_restart(pdhg_solver_state_t *state) { - CUDA_CHECK(cudaMemcpy(state->initial_dual_solution, state->pdhg_dual_solution, state->num_constraints * sizeof(double), cudaMemcpyDeviceToDevice)); - CUDA_CHECK(cudaMemcpy(state->current_dual_solution, state->pdhg_dual_solution, state->num_constraints * sizeof(double), cudaMemcpyDeviceToDevice)); + CUDA_CHECK(cudaMemcpy(state->initial_dual_solution, + state->pdhg_dual_solution, + state->num_constraints * sizeof(double), + cudaMemcpyDeviceToDevice)); + CUDA_CHECK(cudaMemcpy(state->current_dual_solution, + state->pdhg_dual_solution, + state->num_constraints * sizeof(double), + cudaMemcpyDeviceToDevice)); state->inner_count = 0; state->last_trial_fixed_point_error = INFINITY; } -__global__ void compute_delta_primal_solution_kernel( - const double *__restrict__ initial_primal, - const double *__restrict__ pdhg_primal, - double *__restrict__ delta_primal, int n_vars) +__global__ void compute_delta_primal_solution_kernel(const double *__restrict__ initial_primal, + const double *__restrict__ pdhg_primal, + double *__restrict__ delta_primal, + int n_vars) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < n_vars) @@ -1730,10 +1929,10 @@ __global__ void compute_delta_primal_solution_kernel( } } -__global__ void compute_delta_dual_solution_kernel( - const double *__restrict__ initial_dual, - const double *__restrict__ pdhg_dual, - double *__restrict__ delta_dual, int n_cons) +__global__ void compute_delta_dual_solution_kernel(const double *__restrict__ initial_dual, + const double *__restrict__ pdhg_dual, + double *__restrict__ delta_dual, + int n_cons) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < n_cons) @@ -1750,26 +1949,17 @@ static void compute_primal_fixed_point_error(pdhg_solver_state_t *state) state->delta_primal_solution, state->num_variables); double primal_norm = 0.0; - CUBLAS_CHECK(cublasDnrm2_v2_64(state->blas_handle, - state->num_variables, - state->delta_primal_solution, - 1, - &primal_norm)); + CUBLAS_CHECK( + cublasDnrm2_v2_64(state->blas_handle, state->num_variables, state->delta_primal_solution, 1, &primal_norm)); state->fixed_point_error = primal_norm * primal_norm * state->primal_weight; } static void compute_dual_fixed_point_error(pdhg_solver_state_t *state) { compute_delta_dual_solution_kernel<<num_blocks_dual, THREADS_PER_BLOCK, 0, state->stream>>>( - state->pdhg_dual_solution, - state->reflected_dual_solution, - state->delta_dual_solution, - state->num_constraints); + state->pdhg_dual_solution, state->reflected_dual_solution, state->delta_dual_solution, state->num_constraints); double dual_norm = 0.0; - CUBLAS_CHECK(cublasDnrm2_v2_64(state->blas_handle, - state->num_constraints, - state->delta_dual_solution, - 1, - &dual_norm)); + CUBLAS_CHECK( + cublasDnrm2_v2_64(state->blas_handle, state->num_constraints, state->delta_dual_solution, 1, &dual_norm)); state->fixed_point_error = dual_norm * dual_norm / state->primal_weight; } diff --git a/src/utils.cu b/src/utils.cu index 0f7e0f7..c6e8fe4 100644 --- a/src/utils.cu +++ b/src/utils.cu @@ -70,7 +70,8 @@ double estimate_maximum_singular_value(cusparseHandle_t sparse_handle, cublasHandle_t blas_handle, const cu_sparse_matrix_csr_t *A, const cu_sparse_matrix_csr_t *AT, - int max_iterations, double tolerance) + int max_iterations, + double tolerance) { int m = A->num_rows; int n = A->num_cols; @@ -86,8 +87,7 @@ double estimate_maximum_singular_value(cusparseHandle_t sparse_handle, eigenvector_h[i] = dist(gen); } - CUDA_CHECK(cudaMemcpy(eigenvector_d, eigenvector_h, m * sizeof(double), - cudaMemcpyHostToDevice)); + CUDA_CHECK(cudaMemcpy(eigenvector_d, eigenvector_h, m * sizeof(double), cudaMemcpyHostToDevice)); free(eigenvector_h); double sigma_max_sq = 1.0; @@ -95,31 +95,57 @@ double estimate_maximum_singular_value(cusparseHandle_t sparse_handle, const double zero = 0.0; cusparseSpMatDescr_t matA, matAT; - CUSPARSE_CHECK(cusparseCreateCsr( - &matA, A->num_rows, A->num_cols, A->num_nonzeros, A->row_ptr, A->col_ind, - A->val, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_64F)); - CUSPARSE_CHECK(cusparseCreateCsr( - &matAT, AT->num_rows, AT->num_cols, AT->num_nonzeros, AT->row_ptr, - AT->col_ind, AT->val, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F)); + CUSPARSE_CHECK(cusparseCreateCsr(&matA, + A->num_rows, + A->num_cols, + A->num_nonzeros, + A->row_ptr, + A->col_ind, + A->val, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_64F)); + CUSPARSE_CHECK(cusparseCreateCsr(&matAT, + AT->num_rows, + AT->num_cols, + AT->num_nonzeros, + AT->row_ptr, + AT->col_ind, + AT->val, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_64F)); cusparseDnVecDescr_t vecEigen, vecNextEigen, vecDual; CUSPARSE_CHECK(cusparseCreateDnVec(&vecEigen, m, eigenvector_d, CUDA_R_64F)); - CUSPARSE_CHECK( - cusparseCreateDnVec(&vecNextEigen, m, next_eigenvector_d, CUDA_R_64F)); + CUSPARSE_CHECK(cusparseCreateDnVec(&vecNextEigen, m, next_eigenvector_d, CUDA_R_64F)); CUSPARSE_CHECK(cusparseCreateDnVec(&vecDual, n, dual_product_d, CUDA_R_64F)); void *dBufferAT = NULL; void *dBufferA = NULL; size_t bufferSizeAT = 0, bufferSizeA = 0; - CUSPARSE_CHECK(cusparseSpMV_bufferSize( - sparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, matAT, - vecNextEigen, &zero, vecDual, CUDA_R_64F, CUSPARSE_SPMV_CSR_ALG2, - &bufferSizeAT)); - CUSPARSE_CHECK(cusparseSpMV_bufferSize( - sparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, matA, vecDual, - &zero, vecEigen, CUDA_R_64F, CUSPARSE_SPMV_CSR_ALG2, &bufferSizeA)); + CUSPARSE_CHECK(cusparseSpMV_bufferSize(sparse_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &one, + matAT, + vecNextEigen, + &zero, + vecDual, + CUDA_R_64F, + CUSPARSE_SPMV_CSR_ALG2, + &bufferSizeAT)); + CUSPARSE_CHECK(cusparseSpMV_bufferSize(sparse_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &one, + matA, + vecDual, + &zero, + vecEigen, + CUDA_R_64F, + CUSPARSE_SPMV_CSR_ALG2, + &bufferSizeA)); CUDA_CHECK(cudaMalloc(&dBufferAT, bufferSizeAT)); CUDA_CHECK(cudaMalloc(&dBufferA, bufferSizeA)); @@ -127,36 +153,43 @@ double estimate_maximum_singular_value(cusparseHandle_t sparse_handle, for (int i = 0; i < max_iterations; ++i) { - CUDA_CHECK(cudaMemcpy(next_eigenvector_d, eigenvector_d, m * sizeof(double), - cudaMemcpyDeviceToDevice)); + CUDA_CHECK(cudaMemcpy(next_eigenvector_d, eigenvector_d, m * sizeof(double), cudaMemcpyDeviceToDevice)); double eigenvector_norm; - CUBLAS_CHECK(cublasDnrm2_v2_64(blas_handle, m, next_eigenvector_d, 1, - &eigenvector_norm)); + CUBLAS_CHECK(cublasDnrm2_v2_64(blas_handle, m, next_eigenvector_d, 1, &eigenvector_norm)); double inv_eigenvector_norm = 1.0 / eigenvector_norm; - CUBLAS_CHECK(cublasDscal(blas_handle, m, &inv_eigenvector_norm, - next_eigenvector_d, 1)); - - CUSPARSE_CHECK(cusparseSpMV(sparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, - &one, matAT, vecNextEigen, &zero, vecDual, - CUDA_R_64F, CUSPARSE_SPMV_CSR_ALG2, dBufferAT)); - - CUSPARSE_CHECK(cusparseSpMV(sparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, - &one, matA, vecDual, &zero, vecEigen, - CUDA_R_64F, CUSPARSE_SPMV_CSR_ALG2, dBufferA)); - - CUBLAS_CHECK(cublasDdot(blas_handle, m, next_eigenvector_d, 1, - eigenvector_d, 1, &sigma_max_sq)); + CUBLAS_CHECK(cublasDscal(blas_handle, m, &inv_eigenvector_norm, next_eigenvector_d, 1)); + + CUSPARSE_CHECK(cusparseSpMV(sparse_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &one, + matAT, + vecNextEigen, + &zero, + vecDual, + CUDA_R_64F, + CUSPARSE_SPMV_CSR_ALG2, + dBufferAT)); + + CUSPARSE_CHECK(cusparseSpMV(sparse_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &one, + matA, + vecDual, + &zero, + vecEigen, + CUDA_R_64F, + CUSPARSE_SPMV_CSR_ALG2, + dBufferA)); + + CUBLAS_CHECK(cublasDdot(blas_handle, m, next_eigenvector_d, 1, eigenvector_d, 1, &sigma_max_sq)); double neg_sigma_sq = -sigma_max_sq; - CUBLAS_CHECK( - cublasDscal(blas_handle, m, &neg_sigma_sq, next_eigenvector_d, 1)); - CUBLAS_CHECK(cublasDaxpy(blas_handle, m, &one, eigenvector_d, 1, - next_eigenvector_d, 1)); + CUBLAS_CHECK(cublasDscal(blas_handle, m, &neg_sigma_sq, next_eigenvector_d, 1)); + CUBLAS_CHECK(cublasDaxpy(blas_handle, m, &one, eigenvector_d, 1, next_eigenvector_d, 1)); double residual_norm; - CUBLAS_CHECK(cublasDnrm2_v2_64(blas_handle, m, next_eigenvector_d, 1, - &residual_norm)); + CUBLAS_CHECK(cublasDnrm2_v2_64(blas_handle, m, next_eigenvector_d, 1, &residual_norm)); if (residual_norm < tolerance) break; @@ -176,22 +209,23 @@ double estimate_maximum_singular_value(cusparseHandle_t sparse_handle, return sqrt(sigma_max_sq); } -void compute_interaction_and_movement(pdhg_solver_state_t *state, - double *interaction, double *movement) +void compute_interaction_and_movement(pdhg_solver_state_t *state, double *interaction, double *movement) { double dual_norm, primal_norm, cross_term; - CUBLAS_CHECK(cublasDnrm2_v2_64(state->blas_handle, state->num_constraints, - state->delta_dual_solution, 1, &dual_norm)); - CUBLAS_CHECK(cublasDnrm2_v2_64(state->blas_handle, state->num_variables, - state->delta_primal_solution, 1, - &primal_norm)); - *movement = 0.5 * (primal_norm * primal_norm * state->primal_weight + - dual_norm * dual_norm / state->primal_weight); - - CUBLAS_CHECK(cublasDdot(state->blas_handle, state->num_variables, - state->dual_product, 1, state->delta_primal_solution, - 1, &cross_term)); + CUBLAS_CHECK( + cublasDnrm2_v2_64(state->blas_handle, state->num_constraints, state->delta_dual_solution, 1, &dual_norm)); + CUBLAS_CHECK( + cublasDnrm2_v2_64(state->blas_handle, state->num_variables, state->delta_primal_solution, 1, &primal_norm)); + *movement = 0.5 * (primal_norm * primal_norm * state->primal_weight + dual_norm * dual_norm / state->primal_weight); + + CUBLAS_CHECK(cublasDdot(state->blas_handle, + state->num_variables, + state->dual_product, + 1, + state->delta_primal_solution, + 1, + &cross_term)); *interaction = fabs(cross_term); } @@ -199,37 +233,34 @@ const char *termination_reason_to_string(termination_reason_t reason) { switch (reason) { - case TERMINATION_REASON_OPTIMAL: - return "OPTIMAL"; - case TERMINATION_REASON_PRIMAL_INFEASIBLE: - return "PRIMAL_INFEASIBLE"; - case TERMINATION_REASON_DUAL_INFEASIBLE: - return "DUAL_INFEASIBLE"; - case TERMINATION_REASON_INFEASIBLE_OR_UNBOUNDED: - return "INFEASIBLE_OR_UNBOUNDED"; - case TERMINATION_REASON_TIME_LIMIT: - return "TIME_LIMIT"; - case TERMINATION_REASON_ITERATION_LIMIT: - return "ITERATION_LIMIT"; - case TERMINATION_REASON_UNSPECIFIED: - return "UNSPECIFIED"; - case TERMINATION_REASON_FEAS_POLISH_SUCCESS: - return "FEAS_POLISH_SUCCESS"; - default: - return "UNKNOWN"; + case TERMINATION_REASON_OPTIMAL: + return "OPTIMAL"; + case TERMINATION_REASON_PRIMAL_INFEASIBLE: + return "PRIMAL_INFEASIBLE"; + case TERMINATION_REASON_DUAL_INFEASIBLE: + return "DUAL_INFEASIBLE"; + case TERMINATION_REASON_INFEASIBLE_OR_UNBOUNDED: + return "INFEASIBLE_OR_UNBOUNDED"; + case TERMINATION_REASON_TIME_LIMIT: + return "TIME_LIMIT"; + case TERMINATION_REASON_ITERATION_LIMIT: + return "ITERATION_LIMIT"; + case TERMINATION_REASON_UNSPECIFIED: + return "UNSPECIFIED"; + case TERMINATION_REASON_FEAS_POLISH_SUCCESS: + return "FEAS_POLISH_SUCCESS"; + default: + return "UNKNOWN"; } } -bool optimality_criteria_met(const pdhg_solver_state_t *state, - double rel_opt_tol, double rel_feas_tol) +bool optimality_criteria_met(const pdhg_solver_state_t *state, double rel_opt_tol, double rel_feas_tol) { - return state->relative_dual_residual < rel_feas_tol && - state->relative_primal_residual < rel_feas_tol && - state->relative_objective_gap < rel_opt_tol; + return state->relative_dual_residual < rel_feas_tol && state->relative_primal_residual < rel_feas_tol && + state->relative_objective_gap < rel_opt_tol; } -bool primal_infeasibility_criteria_met(const pdhg_solver_state_t *state, - double eps) +bool primal_infeasibility_criteria_met(const pdhg_solver_state_t *state, double eps) { if (state->dual_ray_objective <= 0.0) { @@ -238,24 +269,19 @@ bool primal_infeasibility_criteria_met(const pdhg_solver_state_t *state, return state->max_dual_ray_infeasibility / state->dual_ray_objective <= eps; } -bool dual_infeasibility_criteria_met(const pdhg_solver_state_t *state, - double eps) +bool dual_infeasibility_criteria_met(const pdhg_solver_state_t *state, double eps) { if (state->primal_ray_linear_objective >= 0.0) { return false; } - return state->max_primal_ray_infeasibility / - (-state->primal_ray_linear_objective) <= - eps; + return state->max_primal_ray_infeasibility / (-state->primal_ray_linear_objective) <= eps; } -void check_termination_criteria(pdhg_solver_state_t *solver_state, - const termination_criteria_t *criteria) +void check_termination_criteria(pdhg_solver_state_t *solver_state, const termination_criteria_t *criteria) { solver_state->cumulative_time_sec = (double)(clock() - solver_state->start_time) / CLOCKS_PER_SEC; - if (optimality_criteria_met(solver_state, criteria->eps_optimal_relative, - criteria->eps_feasible_relative)) + if (optimality_criteria_met(solver_state, criteria->eps_optimal_relative, criteria->eps_feasible_relative)) { solver_state->termination_reason = TERMINATION_REASON_OPTIMAL; return; @@ -284,24 +310,19 @@ bool should_do_adaptive_restart(pdhg_solver_state_t *solver_state, else if (solver_state->total_count > termination_evaluation_frequency) { if (solver_state->fixed_point_error <= - restart_params->sufficient_reduction_for_restart * - solver_state->initial_fixed_point_error) + restart_params->sufficient_reduction_for_restart * solver_state->initial_fixed_point_error) { do_restart = true; } if (solver_state->fixed_point_error <= - restart_params->necessary_reduction_for_restart * - solver_state->initial_fixed_point_error) + restart_params->necessary_reduction_for_restart * solver_state->initial_fixed_point_error) { - if (solver_state->fixed_point_error > - solver_state->last_trial_fixed_point_error) + if (solver_state->fixed_point_error > solver_state->last_trial_fixed_point_error) { do_restart = true; } } - if (solver_state->inner_count >= - restart_params->artificial_restart_threshold * - solver_state->total_count) + if (solver_state->inner_count >= restart_params->artificial_restart_threshold * solver_state->total_count) { do_restart = true; } @@ -421,33 +442,40 @@ static void filter_constraint_matrix_entries(lp_problem_t *problem, const pdhg_p if (((nnz - filtered_nnz) > 0) && params->verbose) { printf("Dropped %d near-zero %s (|value| <= %.1e) from the constraint matrix\n", - nnz - filtered_nnz, (nnz - filtered_nnz == 1 ? "entry" : "entries"), params->matrix_zero_tol); + nnz - filtered_nnz, + (nnz - filtered_nnz == 1 ? "entry" : "entries"), + params->matrix_zero_tol); } } -#define PRINT_DIFF_INT(name, current, default_val) \ - do { \ - if ((current) != (default_val)) { \ - printf(" %-18s : %d\n", name, current); \ - } \ - } while(0) - -#define PRINT_DIFF_DBL(name, current, default_val) \ - do { \ - if (fabs((current) - (default_val)) > 1e-9) { \ - printf(" %-18s : %.1e\n", name, (double)(current)); \ - } \ - } while(0) - -#define PRINT_DIFF_BOOL(name, current, default_val) \ - do { \ - if ((current) != (default_val)) { \ - printf(" %-18s : %s\n", name, (current) ? "on" : "off"); \ - } \ - } while(0) - -void print_initial_info(const pdhg_parameters_t *params, - lp_problem_t *problem) +#define PRINT_DIFF_INT(name, current, default_val) \ + do \ + { \ + if ((current) != (default_val)) \ + { \ + printf(" %-18s : %d\n", name, current); \ + } \ + } while (0) + +#define PRINT_DIFF_DBL(name, current, default_val) \ + do \ + { \ + if (fabs((current) - (default_val)) > 1e-9) \ + { \ + printf(" %-18s : %.1e\n", name, (double)(current)); \ + } \ + } while (0) + +#define PRINT_DIFF_BOOL(name, current, default_val) \ + do \ + { \ + if ((current) != (default_val)) \ + { \ + printf(" %-18s : %s\n", name, (current) ? "on" : "off"); \ + } \ + } while (0) + +void print_initial_info(const pdhg_parameters_t *params, lp_problem_t *problem) { pdhg_parameters_t default_params; set_default_parameters(&default_params); @@ -468,64 +496,43 @@ void print_initial_info(const pdhg_parameters_t *params, "------------------\n"); filter_constraint_matrix_entries(problem, params); - printf("Problem: %d rows, %d columns, %d nonzeros\n", problem->num_constraints, problem->num_variables, problem->constraint_matrix_num_nonzeros); + printf("Problem: %d rows, %d columns, %d nonzeros\n", + problem->num_constraints, + problem->num_variables, + problem->constraint_matrix_num_nonzeros); printf("Settings:\n"); - printf(" iter_limit : %d\n", - params->termination_criteria.iteration_limit); - printf(" time_limit : %.2f sec\n", - params->termination_criteria.time_sec_limit); - printf(" eps_opt : %.1e\n", - params->termination_criteria.eps_optimal_relative); - printf(" eps_feas : %.1e\n", - params->termination_criteria.eps_feasible_relative); - if (params->optimality_norm != default_params.optimality_norm) { - printf(" optimality_norm : %s\n", - params->optimality_norm == NORM_TYPE_L_INF ? "L_inf" : "L2"); - } - - PRINT_DIFF_INT("l_inf_ruiz_iter", - params->l_inf_ruiz_iterations, - default_params.l_inf_ruiz_iterations); - PRINT_DIFF_DBL("pock_chambolle_alpha", - params->pock_chambolle_alpha, - default_params.pock_chambolle_alpha); - PRINT_DIFF_BOOL("has_pock_chambolle_alpha", - params->has_pock_chambolle_alpha, - default_params.has_pock_chambolle_alpha); - PRINT_DIFF_BOOL("bound_obj_rescaling", - params->bound_objective_rescaling, - default_params.bound_objective_rescaling); - PRINT_DIFF_INT("sv_max_iter", - params->sv_max_iter, - default_params.sv_max_iter); - PRINT_DIFF_DBL("sv_tol", - params->sv_tol, - default_params.sv_tol); - PRINT_DIFF_INT("evaluation_freq", - params->termination_evaluation_frequency, - default_params.termination_evaluation_frequency); - PRINT_DIFF_BOOL("feasibility_polishing", - params->feasibility_polishing, - default_params.feasibility_polishing); + printf(" iter_limit : %d\n", params->termination_criteria.iteration_limit); + printf(" time_limit : %.2f sec\n", params->termination_criteria.time_sec_limit); + printf(" eps_opt : %.1e\n", params->termination_criteria.eps_optimal_relative); + printf(" eps_feas : %.1e\n", params->termination_criteria.eps_feasible_relative); + if (params->optimality_norm != default_params.optimality_norm) + { + printf(" optimality_norm : %s\n", params->optimality_norm == NORM_TYPE_L_INF ? "L_inf" : "L2"); + } + + PRINT_DIFF_INT("l_inf_ruiz_iter", params->l_inf_ruiz_iterations, default_params.l_inf_ruiz_iterations); + PRINT_DIFF_DBL("pock_chambolle_alpha", params->pock_chambolle_alpha, default_params.pock_chambolle_alpha); + PRINT_DIFF_BOOL( + "has_pock_chambolle_alpha", params->has_pock_chambolle_alpha, default_params.has_pock_chambolle_alpha); + PRINT_DIFF_BOOL("bound_obj_rescaling", params->bound_objective_rescaling, default_params.bound_objective_rescaling); + PRINT_DIFF_INT("sv_max_iter", params->sv_max_iter, default_params.sv_max_iter); + PRINT_DIFF_DBL("sv_tol", params->sv_tol, default_params.sv_tol); + PRINT_DIFF_INT( + "evaluation_freq", params->termination_evaluation_frequency, default_params.termination_evaluation_frequency); + PRINT_DIFF_BOOL("feasibility_polishing", params->feasibility_polishing, default_params.feasibility_polishing); PRINT_DIFF_DBL("eps_feas_polish_relative", params->termination_criteria.eps_feas_polish_relative, default_params.termination_criteria.eps_feas_polish_relative); - PRINT_DIFF_BOOL("presolve", - params->presolve, - default_params.presolve); - PRINT_DIFF_DBL("matrix_zero_tol", - params->matrix_zero_tol, - default_params.matrix_zero_tol); + PRINT_DIFF_BOOL("presolve", params->presolve, default_params.presolve); + PRINT_DIFF_DBL("matrix_zero_tol", params->matrix_zero_tol, default_params.matrix_zero_tol); } #undef PRINT_DIFF_INT #undef PRINT_DIFF_DBL #undef PRINT_DIFF_BOOL -void pdhg_final_log( - const cupdlpx_result_t *result, - const pdhg_parameters_t *params) +void pdhg_final_log(const cupdlpx_result_t *result, const pdhg_parameters_t *params) { if (params->verbose) { @@ -583,11 +590,16 @@ void display_iteration_stats(const pdhg_solver_state_t *state, bool verbose) if (state->total_count % get_print_frequency(state->total_count) == 0) { printf("%6d %.1e | %8.1e %8.1e | %.1e %.1e %.1e | %.1e %.1e %.1e \n", - state->total_count, state->cumulative_time_sec, - state->primal_objective_value, state->dual_objective_value, - state->absolute_primal_residual, state->absolute_dual_residual, - state->objective_gap, state->relative_primal_residual, - state->relative_dual_residual, state->relative_objective_gap); + state->total_count, + state->cumulative_time_sec, + state->primal_objective_value, + state->dual_objective_value, + state->absolute_primal_residual, + state->absolute_dual_residual, + state->objective_gap, + state->relative_primal_residual, + state->relative_dual_residual, + state->relative_objective_gap); } } @@ -604,51 +616,45 @@ int get_print_frequency(int iter) return step; } -__global__ void compute_residual_kernel( - double *__restrict__ primal_residual, - const double *__restrict__ primal_product, - const double *__restrict__ constraint_lower_bound, - const double *__restrict__ constraint_upper_bound, - const double *__restrict__ dual_solution, - double *__restrict__ dual_residual, - const double *__restrict__ dual_product, - const double *__restrict__ dual_slack, - const double *__restrict__ objective_vector, - const double *__restrict__ constraint_rescaling, - const double *__restrict__ variable_rescaling, - double *__restrict__ dual_obj_contribution, - const double *__restrict__ const_lb_finite, - const double *__restrict__ const_ub_finite, - int num_constraints, int num_variables) +__global__ void compute_residual_kernel(double *__restrict__ primal_residual, + const double *__restrict__ primal_product, + const double *__restrict__ constraint_lower_bound, + const double *__restrict__ constraint_upper_bound, + const double *__restrict__ dual_solution, + double *__restrict__ dual_residual, + const double *__restrict__ dual_product, + const double *__restrict__ dual_slack, + const double *__restrict__ objective_vector, + const double *__restrict__ constraint_rescaling, + const double *__restrict__ variable_rescaling, + double *__restrict__ dual_obj_contribution, + const double *__restrict__ const_lb_finite, + const double *__restrict__ const_ub_finite, + int num_constraints, + int num_variables) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < num_constraints) { - double clamped_val = - fmax(constraint_lower_bound[i], - fmin(primal_product[i], constraint_upper_bound[i])); - primal_residual[i] = - (primal_product[i] - clamped_val) * constraint_rescaling[i]; + double clamped_val = fmax(constraint_lower_bound[i], fmin(primal_product[i], constraint_upper_bound[i])); + primal_residual[i] = (primal_product[i] - clamped_val) * constraint_rescaling[i]; dual_obj_contribution[i] = - fmax(dual_solution[i], 0.0) * const_lb_finite[i] + - fmin(dual_solution[i], 0.0) * const_ub_finite[i]; + fmax(dual_solution[i], 0.0) * const_lb_finite[i] + fmin(dual_solution[i], 0.0) * const_ub_finite[i]; } else if (i < num_constraints + num_variables) { int idx = i - num_constraints; - dual_residual[idx] = - (objective_vector[idx] - dual_product[idx] - dual_slack[idx]) * - variable_rescaling[idx]; + dual_residual[idx] = (objective_vector[idx] - dual_product[idx] - dual_slack[idx]) * variable_rescaling[idx]; } } -__global__ void primal_infeasibility_project_kernel( - double *__restrict__ primal_ray_estimate, - const double *__restrict__ variable_lower_bound, - const double *__restrict__ variable_upper_bound, int num_variables) +__global__ void primal_infeasibility_project_kernel(double *__restrict__ primal_ray_estimate, + const double *__restrict__ variable_lower_bound, + const double *__restrict__ variable_upper_bound, + int num_variables) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < num_variables) @@ -664,10 +670,10 @@ __global__ void primal_infeasibility_project_kernel( } } -__global__ void dual_infeasibility_project_kernel( - double *__restrict__ dual_ray_estimate, - const double *__restrict__ constraint_lower_bound, - const double *__restrict__ constraint_upper_bound, int num_constraints) +__global__ void dual_infeasibility_project_kernel(double *__restrict__ dual_ray_estimate, + const double *__restrict__ constraint_lower_bound, + const double *__restrict__ constraint_upper_bound, + int num_constraints) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < num_constraints) @@ -683,46 +689,45 @@ __global__ void dual_infeasibility_project_kernel( } } -__global__ void compute_primal_infeasibility_kernel( - const double *__restrict__ primal_product, - const double *__restrict__ const_lb, - const double *__restrict__ const_ub, int num_constraints, - double *__restrict__ primal_infeasibility, - const double *__restrict__ constraint_rescaling) +__global__ void compute_primal_infeasibility_kernel(const double *__restrict__ primal_product, + const double *__restrict__ const_lb, + const double *__restrict__ const_ub, + int num_constraints, + double *__restrict__ primal_infeasibility, + const double *__restrict__ constraint_rescaling) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < num_constraints) { double pp_val = primal_product[i]; - primal_infeasibility[i] = (fmax(0.0, -pp_val) * isfinite(const_lb[i]) + - fmax(0.0, pp_val) * isfinite(const_ub[i])) * - constraint_rescaling[i]; + primal_infeasibility[i] = + (fmax(0.0, -pp_val) * isfinite(const_lb[i]) + fmax(0.0, pp_val) * isfinite(const_ub[i])) * + constraint_rescaling[i]; } } -__global__ void -compute_dual_infeasibility_kernel(const double *dual_product, - const double *__restrict__ var_lb, - const double *__restrict__ var_ub, - int num_variables, - double *__restrict__ dual_infeasibility, - const double *__restrict__ variable_rescaling) +__global__ void compute_dual_infeasibility_kernel(const double *dual_product, + const double *__restrict__ var_lb, + const double *__restrict__ var_ub, + int num_variables, + double *__restrict__ dual_infeasibility, + const double *__restrict__ variable_rescaling) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < num_variables) { double dp_val = -dual_product[i]; - dual_infeasibility[i] = (fmax(0.0, dp_val) * !isfinite(var_lb[i]) - - fmin(0.0, dp_val) * !isfinite(var_ub[i])) * - variable_rescaling[i]; + dual_infeasibility[i] = (fmax(0.0, dp_val) * !isfinite(var_lb[i]) - fmin(0.0, dp_val) * !isfinite(var_ub[i])) * + variable_rescaling[i]; } } -__global__ void dual_solution_dual_objective_contribution_kernel( - const double *__restrict__ constraint_lower_bound_finite_val, - const double *__restrict__ constraint_upper_bound_finite_val, - const double *__restrict__ dual_solution, int num_constraints, - double *__restrict__ dual_objective_dual_solution_contribution_array) +__global__ void +dual_solution_dual_objective_contribution_kernel(const double *__restrict__ constraint_lower_bound_finite_val, + const double *__restrict__ constraint_upper_bound_finite_val, + const double *__restrict__ dual_solution, + int num_constraints, + double *__restrict__ dual_objective_dual_solution_contribution_array) { int i = blockIdx.x * blockDim.x + threadIdx.x; @@ -734,12 +739,12 @@ __global__ void dual_solution_dual_objective_contribution_kernel( } } -__global__ void dual_objective_dual_slack_contribution_array_kernel( - const double *__restrict__ dual_slack, - double *__restrict__ dual_objective_dual_slack_contribution_array, - const double *__restrict__ variable_lower_bound_finite_val, - const double *__restrict__ variable_upper_bound_finite_val, - int num_variables) +__global__ void +dual_objective_dual_slack_contribution_array_kernel(const double *__restrict__ dual_slack, + double *__restrict__ dual_objective_dual_slack_contribution_array, + const double *__restrict__ variable_lower_bound_finite_val, + const double *__restrict__ variable_upper_bound_finite_val, + int num_variables) { int i = blockIdx.x * blockDim.x + threadIdx.x; @@ -751,8 +756,7 @@ __global__ void dual_objective_dual_slack_contribution_array_kernel( } } -static double get_vector_inf_norm(cublasHandle_t handle, int n, - const double *x_d) +static double get_vector_inf_norm(cublasHandle_t handle, int n, const double *x_d) { if (n <= 0) return 0.0; @@ -761,13 +765,11 @@ static double get_vector_inf_norm(cublasHandle_t handle, int n, cublasIdamax(handle, n, x_d, 1, &index); double max_val; - CUDA_CHECK(cudaMemcpy(&max_val, x_d + (index - 1), sizeof(double), - cudaMemcpyDeviceToHost)); + CUDA_CHECK(cudaMemcpy(&max_val, x_d + (index - 1), sizeof(double), cudaMemcpyDeviceToHost)); return fabs(max_val); } -static double get_vector_sum(cublasHandle_t handle, int n, double *ones_d, - const double *x_d) +static double get_vector_sum(cublasHandle_t handle, int n, double *ones_d, const double *x_d) { if (n <= 0) return 0.0; @@ -784,166 +786,203 @@ void compute_residual(pdhg_solver_state_t *state, norm_type_t optimality_norm) cusparseDnVecSetValues(state->vec_primal_prod, state->primal_product); cusparseDnVecSetValues(state->vec_dual_prod, state->dual_product); - CUSPARSE_CHECK(cusparseSpMV( - state->sparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &HOST_ONE, - state->matA, state->vec_primal_sol, &HOST_ZERO, state->vec_primal_prod, - CUDA_R_64F, CUSPARSE_SPMV_CSR_ALG2, state->primal_spmv_buffer)); - - CUSPARSE_CHECK(cusparseSpMV( - state->sparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &HOST_ONE, - state->matAt, state->vec_dual_sol, &HOST_ZERO, state->vec_dual_prod, - CUDA_R_64F, CUSPARSE_SPMV_CSR_ALG2, state->dual_spmv_buffer)); + CUSPARSE_CHECK(cusparseSpMV(state->sparse_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &HOST_ONE, + state->matA, + state->vec_primal_sol, + &HOST_ZERO, + state->vec_primal_prod, + CUDA_R_64F, + CUSPARSE_SPMV_CSR_ALG2, + state->primal_spmv_buffer)); + + CUSPARSE_CHECK(cusparseSpMV(state->sparse_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &HOST_ONE, + state->matAt, + state->vec_dual_sol, + &HOST_ZERO, + state->vec_dual_prod, + CUDA_R_64F, + CUSPARSE_SPMV_CSR_ALG2, + state->dual_spmv_buffer)); compute_residual_kernel<<num_blocks_primal_dual, THREADS_PER_BLOCK, 0, state->stream>>>( - state->primal_residual, state->primal_product, - state->constraint_lower_bound, state->constraint_upper_bound, - state->pdhg_dual_solution, state->dual_residual, state->dual_product, - state->dual_slack, state->objective_vector, state->constraint_rescaling, - state->variable_rescaling, state->primal_slack, + state->primal_residual, + state->primal_product, + state->constraint_lower_bound, + state->constraint_upper_bound, + state->pdhg_dual_solution, + state->dual_residual, + state->dual_product, + state->dual_slack, + state->objective_vector, + state->constraint_rescaling, + state->variable_rescaling, + state->primal_slack, state->constraint_lower_bound_finite_val, - state->constraint_upper_bound_finite_val, state->num_constraints, + state->constraint_upper_bound_finite_val, + state->num_constraints, state->num_variables); - if (optimality_norm == NORM_TYPE_L_INF) { - state->absolute_primal_residual = get_vector_inf_norm(state->blas_handle, - state->num_constraints, state->primal_residual); - } else { - CUBLAS_CHECK(cublasDnrm2_v2_64(state->blas_handle, state->num_constraints, - state->primal_residual, 1, - &state->absolute_primal_residual)); + if (optimality_norm == NORM_TYPE_L_INF) + { + state->absolute_primal_residual = + get_vector_inf_norm(state->blas_handle, state->num_constraints, state->primal_residual); + } + else + { + CUBLAS_CHECK(cublasDnrm2_v2_64( + state->blas_handle, state->num_constraints, state->primal_residual, 1, &state->absolute_primal_residual)); } - - if (optimality_norm == NORM_TYPE_L_INF) { - state->absolute_dual_residual = get_vector_inf_norm(state->blas_handle, - state->num_variables, state->dual_residual); - } else { + if (optimality_norm == NORM_TYPE_L_INF) + { + state->absolute_dual_residual = + get_vector_inf_norm(state->blas_handle, state->num_variables, state->dual_residual); + } + else + { state->absolute_primal_residual /= state->constraint_bound_rescaling; - CUBLAS_CHECK(cublasDnrm2_v2_64(state->blas_handle, state->num_variables, - state->dual_residual, 1, - &state->absolute_dual_residual)); + CUBLAS_CHECK(cublasDnrm2_v2_64( + state->blas_handle, state->num_variables, state->dual_residual, 1, &state->absolute_dual_residual)); } state->absolute_dual_residual /= state->objective_vector_rescaling; - CUBLAS_CHECK(cublasDdot( - state->blas_handle, state->num_variables, state->objective_vector, 1, - state->pdhg_primal_solution, 1, &state->primal_objective_value)); + CUBLAS_CHECK(cublasDdot(state->blas_handle, + state->num_variables, + state->objective_vector, + 1, + state->pdhg_primal_solution, + 1, + &state->primal_objective_value)); state->primal_objective_value = - state->primal_objective_value / (state->constraint_bound_rescaling * - state->objective_vector_rescaling) + + state->primal_objective_value / (state->constraint_bound_rescaling * state->objective_vector_rescaling) + state->objective_constant; double base_dual_objective; - CUBLAS_CHECK(cublasDdot(state->blas_handle, state->num_variables, - state->dual_slack, 1, state->pdhg_primal_solution, 1, + CUBLAS_CHECK(cublasDdot(state->blas_handle, + state->num_variables, + state->dual_slack, + 1, + state->pdhg_primal_solution, + 1, &base_dual_objective)); double dual_slack_sum = - get_vector_sum(state->blas_handle, state->num_constraints, - state->ones_dual_d, state->primal_slack); + get_vector_sum(state->blas_handle, state->num_constraints, state->ones_dual_d, state->primal_slack); state->dual_objective_value = (base_dual_objective + dual_slack_sum) / - (state->constraint_bound_rescaling * - state->objective_vector_rescaling) + - state->objective_constant; + (state->constraint_bound_rescaling * state->objective_vector_rescaling) + + state->objective_constant; + + state->relative_primal_residual = state->absolute_primal_residual / (1.0 + state->constraint_bound_norm); - state->relative_primal_residual = - state->absolute_primal_residual / (1.0 + state->constraint_bound_norm); - - state->relative_dual_residual = - state->absolute_dual_residual / (1.0 + state->objective_vector_norm); + state->relative_dual_residual = state->absolute_dual_residual / (1.0 + state->objective_vector_norm); - state->objective_gap = - fabs(state->primal_objective_value - state->dual_objective_value); + state->objective_gap = fabs(state->primal_objective_value - state->dual_objective_value); state->relative_objective_gap = - state->objective_gap / (1.0 + fabs(state->primal_objective_value) + - fabs(state->dual_objective_value)); + state->objective_gap / (1.0 + fabs(state->primal_objective_value) + fabs(state->dual_objective_value)); } void compute_infeasibility_information(pdhg_solver_state_t *state) { - primal_infeasibility_project_kernel<<num_blocks_primal, - THREADS_PER_BLOCK, 0, state->stream>>>( - state->delta_primal_solution, state->variable_lower_bound, - state->variable_upper_bound, state->num_variables); - dual_infeasibility_project_kernel<<num_blocks_dual, - THREADS_PER_BLOCK, 0, state->stream>>>( - state->delta_dual_solution, state->constraint_lower_bound, - state->constraint_upper_bound, state->num_constraints); - - double primal_ray_inf_norm = get_vector_inf_norm( - state->blas_handle, state->num_variables, state->delta_primal_solution); + primal_infeasibility_project_kernel<<num_blocks_primal, THREADS_PER_BLOCK, 0, state->stream>>>( + state->delta_primal_solution, state->variable_lower_bound, state->variable_upper_bound, state->num_variables); + dual_infeasibility_project_kernel<<num_blocks_dual, THREADS_PER_BLOCK, 0, state->stream>>>( + state->delta_dual_solution, + state->constraint_lower_bound, + state->constraint_upper_bound, + state->num_constraints); + + double primal_ray_inf_norm = + get_vector_inf_norm(state->blas_handle, state->num_variables, state->delta_primal_solution); if (primal_ray_inf_norm > 0.0) { double scale = 1.0 / primal_ray_inf_norm; - cublasDscal(state->blas_handle, state->num_variables, &scale, - state->delta_primal_solution, 1); - } - double dual_ray_inf_norm = get_vector_inf_norm( - state->blas_handle, state->num_constraints, state->delta_dual_solution); - - CUSPARSE_CHECK(cusparseDnVecSetValues(state->vec_primal_sol, - state->delta_primal_solution)); - CUSPARSE_CHECK( - cusparseDnVecSetValues(state->vec_dual_sol, state->delta_dual_solution)); - CUSPARSE_CHECK( - cusparseDnVecSetValues(state->vec_primal_prod, state->primal_product)); - CUSPARSE_CHECK( - cusparseDnVecSetValues(state->vec_dual_prod, state->dual_product)); - - CUSPARSE_CHECK(cusparseSpMV( - state->sparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &HOST_ONE, - state->matA, state->vec_primal_sol, &HOST_ZERO, state->vec_primal_prod, - CUDA_R_64F, CUSPARSE_SPMV_CSR_ALG2, state->primal_spmv_buffer)); - - CUSPARSE_CHECK(cusparseSpMV( - state->sparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &HOST_ONE, - state->matAt, state->vec_dual_sol, &HOST_ZERO, state->vec_dual_prod, - CUDA_R_64F, CUSPARSE_SPMV_CSR_ALG2, state->dual_spmv_buffer)); - - CUBLAS_CHECK(cublasDdot( - state->blas_handle, state->num_variables, state->objective_vector, 1, - state->delta_primal_solution, 1, &state->primal_ray_linear_objective)); - state->primal_ray_linear_objective /= - (state->constraint_bound_rescaling * state->objective_vector_rescaling); - - dual_solution_dual_objective_contribution_kernel<<num_blocks_dual, - THREADS_PER_BLOCK, 0, state->stream>>>( - state->constraint_lower_bound_finite_val, - state->constraint_upper_bound_finite_val, state->delta_dual_solution, - state->num_constraints, state->primal_slack); + cublasDscal(state->blas_handle, state->num_variables, &scale, state->delta_primal_solution, 1); + } + double dual_ray_inf_norm = + get_vector_inf_norm(state->blas_handle, state->num_constraints, state->delta_dual_solution); + + CUSPARSE_CHECK(cusparseDnVecSetValues(state->vec_primal_sol, state->delta_primal_solution)); + CUSPARSE_CHECK(cusparseDnVecSetValues(state->vec_dual_sol, state->delta_dual_solution)); + CUSPARSE_CHECK(cusparseDnVecSetValues(state->vec_primal_prod, state->primal_product)); + CUSPARSE_CHECK(cusparseDnVecSetValues(state->vec_dual_prod, state->dual_product)); - dual_objective_dual_slack_contribution_array_kernel<<< - state->num_blocks_primal, THREADS_PER_BLOCK, 0, state->stream>>>( - state->dual_product, state->dual_slack, - state->variable_lower_bound_finite_val, - state->variable_upper_bound_finite_val, state->num_variables); + CUSPARSE_CHECK(cusparseSpMV(state->sparse_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &HOST_ONE, + state->matA, + state->vec_primal_sol, + &HOST_ZERO, + state->vec_primal_prod, + CUDA_R_64F, + CUSPARSE_SPMV_CSR_ALG2, + state->primal_spmv_buffer)); + + CUSPARSE_CHECK(cusparseSpMV(state->sparse_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &HOST_ONE, + state->matAt, + state->vec_dual_sol, + &HOST_ZERO, + state->vec_dual_prod, + CUDA_R_64F, + CUSPARSE_SPMV_CSR_ALG2, + state->dual_spmv_buffer)); + + CUBLAS_CHECK(cublasDdot(state->blas_handle, + state->num_variables, + state->objective_vector, + 1, + state->delta_primal_solution, + 1, + &state->primal_ray_linear_objective)); + state->primal_ray_linear_objective /= (state->constraint_bound_rescaling * state->objective_vector_rescaling); + + dual_solution_dual_objective_contribution_kernel<<num_blocks_dual, THREADS_PER_BLOCK, 0, state->stream>>>( + state->constraint_lower_bound_finite_val, + state->constraint_upper_bound_finite_val, + state->delta_dual_solution, + state->num_constraints, + state->primal_slack); + + dual_objective_dual_slack_contribution_array_kernel<<num_blocks_primal, + THREADS_PER_BLOCK, + 0, + state->stream>>>(state->dual_product, + state->dual_slack, + state->variable_lower_bound_finite_val, + state->variable_upper_bound_finite_val, + state->num_variables); double sum_primal_slack = - get_vector_sum(state->blas_handle, state->num_constraints, - state->ones_dual_d, state->primal_slack); + get_vector_sum(state->blas_handle, state->num_constraints, state->ones_dual_d, state->primal_slack); double sum_dual_slack = - get_vector_sum(state->blas_handle, state->num_variables, - state->ones_primal_d, state->dual_slack); + get_vector_sum(state->blas_handle, state->num_variables, state->ones_primal_d, state->dual_slack); state->dual_ray_objective = - (sum_primal_slack + sum_dual_slack) / - (state->constraint_bound_rescaling * state->objective_vector_rescaling); - - compute_primal_infeasibility_kernel<<num_blocks_dual, - THREADS_PER_BLOCK, 0, state->stream>>>( - state->primal_product, state->constraint_lower_bound, - state->constraint_upper_bound, state->num_constraints, - state->primal_slack, state->constraint_rescaling); - compute_dual_infeasibility_kernel<<num_blocks_primal, - THREADS_PER_BLOCK, 0, state->stream>>>( - state->dual_product, state->variable_lower_bound, - state->variable_upper_bound, state->num_variables, state->dual_slack, + (sum_primal_slack + sum_dual_slack) / (state->constraint_bound_rescaling * state->objective_vector_rescaling); + + compute_primal_infeasibility_kernel<<num_blocks_dual, THREADS_PER_BLOCK, 0, state->stream>>>( + state->primal_product, + state->constraint_lower_bound, + state->constraint_upper_bound, + state->num_constraints, + state->primal_slack, + state->constraint_rescaling); + compute_dual_infeasibility_kernel<<num_blocks_primal, THREADS_PER_BLOCK, 0, state->stream>>>( + state->dual_product, + state->variable_lower_bound, + state->variable_upper_bound, + state->num_variables, + state->dual_slack, state->variable_rescaling); - state->max_primal_ray_infeasibility = get_vector_inf_norm( - state->blas_handle, state->num_constraints, state->primal_slack); - double dual_slack_norm = get_vector_inf_norm( - state->blas_handle, state->num_variables, state->dual_slack); + state->max_primal_ray_infeasibility = + get_vector_inf_norm(state->blas_handle, state->num_constraints, state->primal_slack); + double dual_slack_norm = get_vector_inf_norm(state->blas_handle, state->num_variables, state->dual_slack); state->max_dual_ray_infeasibility = dual_slack_norm; double scaling_factor = fmax(dual_ray_inf_norm, dual_slack_norm); @@ -971,8 +1010,7 @@ void fill_or_copy(double **dst, int n, const double *src, double fill_val) } // convert dense → CSR -int dense_to_csr(const matrix_desc_t *desc, int **row_ptr, int **col_ind, - double **vals, int *nnz_out) +int dense_to_csr(const matrix_desc_t *desc, int **row_ptr, int **col_ind, double **vals, int *nnz_out) { int m = desc->m, n = desc->n; @@ -1011,8 +1049,7 @@ int dense_to_csr(const matrix_desc_t *desc, int **row_ptr, int **col_ind, } // convert CSC → CSR -int csc_to_csr(const matrix_desc_t *desc, int **row_ptr, int **col_ind, - double **vals) +int csc_to_csr(const matrix_desc_t *desc, int **row_ptr, int **col_ind, double **vals) { const int m = desc->m, n = desc->n; const int *col_ptr = desc->data.csc.col_ptr; @@ -1069,8 +1106,7 @@ int csc_to_csr(const matrix_desc_t *desc, int **row_ptr, int **col_ind, } // convert COO → CSR -int coo_to_csr(const matrix_desc_t *desc, int **row_ptr, int **col_ind, - double **vals) +int coo_to_csr(const matrix_desc_t *desc, int **row_ptr, int **col_ind, double **vals) { const int m = desc->m, n = desc->n; const int nnz = desc->data.coo.nnz; @@ -1125,11 +1161,10 @@ int coo_to_csr(const matrix_desc_t *desc, int **row_ptr, int **col_ind, return 0; } -void check_feas_polishing_termination_criteria( - pdhg_solver_state_t *solver_state, - const pdhg_solver_state_t *ori_solver_state, - const termination_criteria_t *criteria, - bool is_primal_polish) +void check_feas_polishing_termination_criteria(pdhg_solver_state_t *solver_state, + const pdhg_solver_state_t *ori_solver_state, + const termination_criteria_t *criteria, + bool is_primal_polish) { solver_state->cumulative_time_sec = (double)(clock() - solver_state->start_time) / CLOCKS_PER_SEC; if (is_primal_polish) @@ -1179,7 +1214,9 @@ void print_initial_feas_polish_info(bool is_primal_polish, const pdhg_parameters printf("---------------------------------------------------------------------------------------\n"); } -void pdhg_feas_polish_final_log(const pdhg_solver_state_t *primal_state, const pdhg_solver_state_t *dual_state, bool verbose) +void pdhg_feas_polish_final_log(const pdhg_solver_state_t *primal_state, + const pdhg_solver_state_t *dual_state, + bool verbose) { if (verbose) { @@ -1194,7 +1231,9 @@ void pdhg_feas_polish_final_log(const pdhg_solver_state_t *primal_state, const p printf(" Dual Time Usage : %.3g sec\n", dual_state->cumulative_time_sec); printf(" Primal Residual : %.3e\n", primal_state->relative_primal_residual); printf(" Dual Residual : %.3e\n", dual_state->relative_dual_residual); - printf(" Primal Dual Gap : %.3e\n", fabs(primal_state->primal_objective_value - dual_state->dual_objective_value) / (1.0 + fabs(primal_state->primal_objective_value) + fabs(dual_state->dual_objective_value))); + printf(" Primal Dual Gap : %.3e\n", + fabs(primal_state->primal_objective_value - dual_state->dual_objective_value) / + (1.0 + fabs(primal_state->primal_objective_value) + fabs(dual_state->dual_objective_value))); } void display_feas_polish_iteration_stats(const pdhg_solver_state_t *state, bool verbose, bool is_primal_polish) @@ -1226,13 +1265,12 @@ void display_feas_polish_iteration_stats(const pdhg_solver_state_t *state, bool } } -__global__ void compute_primal_feas_polish_residual_kernel( - double *__restrict__ primal_residual, - const double *__restrict__ primal_product, - const double *__restrict__ constraint_lower_bound, - const double *__restrict__ constraint_upper_bound, - const double *__restrict__ constraint_rescaling, - int num_constraints) +__global__ void compute_primal_feas_polish_residual_kernel(double *__restrict__ primal_residual, + const double *__restrict__ primal_product, + const double *__restrict__ constraint_lower_bound, + const double *__restrict__ constraint_upper_bound, + const double *__restrict__ constraint_rescaling, + int num_constraints) { int i = blockIdx.x * blockDim.x + threadIdx.x; @@ -1244,18 +1282,17 @@ __global__ void compute_primal_feas_polish_residual_kernel( } } -__global__ void compute_dual_feas_polish_residual_kernel( - double *__restrict__ dual_residual, - const double *__restrict__ dual_solution, - const double *__restrict__ dual_product, - const double *__restrict__ dual_slack, - const double *__restrict__ objective_vector, - const double *__restrict__ variable_rescaling, - double *__restrict__ dual_obj_contribution, - const double *__restrict__ const_lb_finite, - const double *__restrict__ const_ub_finite, - int num_variables, - int num_constraints) +__global__ void compute_dual_feas_polish_residual_kernel(double *__restrict__ dual_residual, + const double *__restrict__ dual_solution, + const double *__restrict__ dual_product, + const double *__restrict__ dual_slack, + const double *__restrict__ objective_vector, + const double *__restrict__ variable_rescaling, + double *__restrict__ dual_obj_contribution, + const double *__restrict__ const_lb_finite, + const double *__restrict__ const_ub_finite, + int num_variables, + int num_constraints) { int i = blockIdx.x * blockDim.x + threadIdx.x; @@ -1266,63 +1303,104 @@ __global__ void compute_dual_feas_polish_residual_kernel( else if (i < num_constraints + num_variables) { int idx = i - num_variables; - dual_obj_contribution[idx] = fmax(dual_solution[idx], 0.0) * const_lb_finite[idx] + fmin(dual_solution[idx], 0.0) * const_ub_finite[idx]; + dual_obj_contribution[idx] = + fmax(dual_solution[idx], 0.0) * const_lb_finite[idx] + fmin(dual_solution[idx], 0.0) * const_ub_finite[idx]; } } -void compute_primal_feas_polish_residual(pdhg_solver_state_t *state, const pdhg_solver_state_t *ori_state, norm_type_t optimality_norm) +void compute_primal_feas_polish_residual(pdhg_solver_state_t *state, + const pdhg_solver_state_t *ori_state, + norm_type_t optimality_norm) { CUSPARSE_CHECK(cusparseDnVecSetValues(state->vec_primal_sol, state->pdhg_primal_solution)); CUSPARSE_CHECK(cusparseDnVecSetValues(state->vec_primal_prod, state->primal_product)); - CUSPARSE_CHECK(cusparseSpMV(state->sparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &HOST_ONE, state->matA, state->vec_primal_sol, &HOST_ZERO, state->vec_primal_prod, CUDA_R_64F, CUSPARSE_SPMV_CSR_ALG2, state->primal_spmv_buffer)); + CUSPARSE_CHECK(cusparseSpMV(state->sparse_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &HOST_ONE, + state->matA, + state->vec_primal_sol, + &HOST_ZERO, + state->vec_primal_prod, + CUDA_R_64F, + CUSPARSE_SPMV_CSR_ALG2, + state->primal_spmv_buffer)); compute_primal_feas_polish_residual_kernel<<num_blocks_dual, THREADS_PER_BLOCK, 0, state->stream>>>( - state->primal_residual, state->primal_product, state->constraint_lower_bound, - state->constraint_upper_bound, state->constraint_rescaling, + state->primal_residual, + state->primal_product, + state->constraint_lower_bound, + state->constraint_upper_bound, + state->constraint_rescaling, state->num_constraints); - if (optimality_norm == NORM_TYPE_L_INF) { - state->absolute_primal_residual = get_vector_inf_norm(state->blas_handle, - state->num_constraints, state->primal_residual); - } else { - CUBLAS_CHECK(cublasDnrm2_v2_64(state->blas_handle, state->num_constraints, - state->primal_residual, 1, - &state->absolute_primal_residual)); + if (optimality_norm == NORM_TYPE_L_INF) + { + state->absolute_primal_residual = + get_vector_inf_norm(state->blas_handle, state->num_constraints, state->primal_residual); + } + else + { + CUBLAS_CHECK(cublasDnrm2_v2_64( + state->blas_handle, state->num_constraints, state->primal_residual, 1, &state->absolute_primal_residual)); } state->absolute_primal_residual /= state->constraint_bound_rescaling; state->relative_primal_residual = state->absolute_primal_residual / (1.0 + state->constraint_bound_norm); - CUBLAS_CHECK(cublasDdot(state->blas_handle, state->num_variables, ori_state->objective_vector, 1, state->pdhg_primal_solution, 1, &state->primal_objective_value)); - state->primal_objective_value = state->primal_objective_value / (state->constraint_bound_rescaling * state->objective_vector_rescaling) + state->objective_constant; + CUBLAS_CHECK(cublasDdot(state->blas_handle, + state->num_variables, + ori_state->objective_vector, + 1, + state->pdhg_primal_solution, + 1, + &state->primal_objective_value)); + state->primal_objective_value = + state->primal_objective_value / (state->constraint_bound_rescaling * state->objective_vector_rescaling) + + state->objective_constant; } -void compute_dual_feas_polish_residual(pdhg_solver_state_t *state, const pdhg_solver_state_t *ori_state, norm_type_t optimality_norm) +void compute_dual_feas_polish_residual(pdhg_solver_state_t *state, + const pdhg_solver_state_t *ori_state, + norm_type_t optimality_norm) { CUSPARSE_CHECK(cusparseDnVecSetValues(state->vec_dual_sol, state->pdhg_dual_solution)); CUSPARSE_CHECK(cusparseDnVecSetValues(state->vec_dual_prod, state->dual_product)); - CUSPARSE_CHECK(cusparseSpMV(state->sparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &HOST_ONE, state->matAt, state->vec_dual_sol, &HOST_ZERO, state->vec_dual_prod, CUDA_R_64F, CUSPARSE_SPMV_CSR_ALG2, state->dual_spmv_buffer)); + CUSPARSE_CHECK(cusparseSpMV(state->sparse_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &HOST_ONE, + state->matAt, + state->vec_dual_sol, + &HOST_ZERO, + state->vec_dual_prod, + CUDA_R_64F, + CUSPARSE_SPMV_CSR_ALG2, + state->dual_spmv_buffer)); compute_dual_feas_polish_residual_kernel<<num_blocks_primal_dual, THREADS_PER_BLOCK>>>( state->dual_residual, state->pdhg_dual_solution, state->dual_product, - state->dual_slack, state->objective_vector, + state->dual_slack, + state->objective_vector, state->variable_rescaling, state->primal_slack, ori_state->constraint_lower_bound_finite_val, ori_state->constraint_upper_bound_finite_val, - state->num_variables, state->num_constraints); + state->num_variables, + state->num_constraints); - if (optimality_norm == NORM_TYPE_L_INF) { - state->absolute_dual_residual = get_vector_inf_norm(state->blas_handle, - state->num_variables, state->dual_residual); - } else { - CUBLAS_CHECK(cublasDnrm2_v2_64(state->blas_handle, state->num_variables, - state->dual_residual, 1, &state->absolute_dual_residual)); + if (optimality_norm == NORM_TYPE_L_INF) + { + state->absolute_dual_residual = + get_vector_inf_norm(state->blas_handle, state->num_variables, state->dual_residual); + } + else + { + CUBLAS_CHECK(cublasDnrm2_v2_64( + state->blas_handle, state->num_variables, state->dual_residual, 1, &state->absolute_dual_residual)); } state->absolute_dual_residual /= state->objective_vector_rescaling; @@ -1330,7 +1408,16 @@ void compute_dual_feas_polish_residual(pdhg_solver_state_t *state, const pdhg_so state->relative_dual_residual = state->absolute_dual_residual / (1.0 + state->objective_vector_norm); double base_dual_objective; - CUBLAS_CHECK(cublasDdot(state->blas_handle, state->num_variables, state->dual_slack, 1, ori_state->pdhg_primal_solution, 1, &base_dual_objective)); - double dual_slack_sum = get_vector_sum(state->blas_handle, state->num_constraints, state->ones_dual_d, state->primal_slack); - state->dual_objective_value = (base_dual_objective + dual_slack_sum) / (state->constraint_bound_rescaling * state->objective_vector_rescaling) + state->objective_constant; + CUBLAS_CHECK(cublasDdot(state->blas_handle, + state->num_variables, + state->dual_slack, + 1, + ori_state->pdhg_primal_solution, + 1, + &base_dual_objective)); + double dual_slack_sum = + get_vector_sum(state->blas_handle, state->num_constraints, state->ones_dual_d, state->primal_slack); + state->dual_objective_value = (base_dual_objective + dual_slack_sum) / + (state->constraint_bound_rescaling * state->objective_vector_rescaling) + + state->objective_constant; } diff --git a/test/test_interface.c b/test/test_interface.c index ea71345..5a23077 100644 --- a/test/test_interface.c +++ b/test/test_interface.c @@ -15,40 +15,41 @@ limitations under the License. */ #include "cupdlpx.h" +#include #include -#include -static void print_vec(const char* name, const double* v, int n) { +static void print_vec(const char *name, const double *v, int n) +{ printf("%s:", name); - for (int i = 0; i < n; ++i) printf(" % .6g", v[i]); + for (int i = 0; i < n; ++i) + printf(" % .6g", v[i]); printf("\n"); } -static void run_once(const char* tag, - const matrix_desc_t* A_desc, - const double* c, const double* l, const double* u) +static void run_once(const char *tag, const matrix_desc_t *A_desc, const double *c, const double *l, const double *u) { printf("\n=== %s ===\n", tag); - + // build problem - lp_problem_t* prob = create_lp_problem( - c, // objective_c - A_desc, // constraint matrix A - l, // constraint lower bound con_lb - u, // constraint upper bound con_ub - NULL, // variable lower bound var_lb (defaults to 0) - NULL, // variable upper bound var_ub (defaults to +inf) - NULL // objective constant c0 (defaults to 0.0) + lp_problem_t *prob = create_lp_problem(c, // objective_c + A_desc, // constraint matrix A + l, // constraint lower bound con_lb + u, // constraint upper bound con_ub + NULL, // variable lower bound var_lb (defaults to 0) + NULL, // variable upper bound var_ub (defaults to +inf) + NULL // objective constant c0 (defaults to 0.0) ); - if (!prob) { + if (!prob) + { fprintf(stderr, "[test] create_lp_problem failed for %s.\n", tag); return; } // solve - cupdlpx_result_t* res = solve_lp_problem(prob, NULL); + cupdlpx_result_t *res = solve_lp_problem(prob, NULL); lp_problem_free(prob); - if (!res) { + if (!res) + { fprintf(stderr, "[test] solve_lp_problem failed for %s.\n", tag); return; } @@ -56,38 +57,39 @@ static void run_once(const char* tag, // print results print_vec("x", res->primal_solution, res->num_variables); print_vec("y", res->dual_solution, res->num_constraints); - + // free cupdlpx_result_free(res); } -static void test_warm_start(const char* tag, - const matrix_desc_t* A_desc, - const double* c, const double* l, const double* u) +static void +test_warm_start(const char *tag, const matrix_desc_t *A_desc, const double *c, const double *l, const double *u) { printf("\n=== %s (with initial solution) ===\n", tag); int n = A_desc->n; int m = A_desc->m; - lp_problem_t* prob = create_lp_problem( - c, A_desc, l, u, NULL, NULL, NULL - ); - if (!prob) { + lp_problem_t *prob = create_lp_problem(c, A_desc, l, u, NULL, NULL, NULL); + if (!prob) + { fprintf(stderr, "[test] create_lp_problem failed for %s.\n", tag); return; } // Allocate and set initial solutions (e.g., zeros) - double* primal = (double*)malloc(n * sizeof(double)); - double* dual = (double*)malloc(m * sizeof(double)); - for (int i = 0; i < n; ++i) primal[i] = 1.0; - for (int i = 0; i < m; ++i) dual[i] = 1.0; + double *primal = (double *)malloc(n * sizeof(double)); + double *dual = (double *)malloc(m * sizeof(double)); + for (int i = 0; i < n; ++i) + primal[i] = 1.0; + for (int i = 0; i < m; ++i) + dual[i] = 1.0; set_start_values(prob, primal, dual); - cupdlpx_result_t* res = solve_lp_problem(prob, NULL); - if (!res) { + cupdlpx_result_t *res = solve_lp_problem(prob, NULL); + if (!res) + { fprintf(stderr, "[test] solve_lp_problem failed for %s.\n", tag); lp_problem_free(prob); return; @@ -96,14 +98,14 @@ static void test_warm_start(const char* tag, print_vec("x", res->primal_solution, res->num_variables); print_vec("y", res->dual_solution, res->num_constraints); - free(primal); free(dual); cupdlpx_result_free(res); lp_problem_free(prob); } -int main() { +int main() +{ // Example: min c^T x // s.t. l <= A x <= u, x >= 0 @@ -111,11 +113,7 @@ int main() { int n = 2; // number of variables // A as a dense matrix - double A[3][2] = { - {1.0, 2.0}, - {0.0, 1.0}, - {3.0, 2.0} - }; + double A[3][2] = {{1.0, 2.0}, {0.0, 1.0}, {3.0, 2.0}}; // describe A using matrix_desc_t matrix_desc_t A_dense; @@ -132,7 +130,8 @@ int main() { // describe A using matrix_desc_t matrix_desc_t A_csr; - A_csr.m = m; A_csr.n = n; + A_csr.m = m; + A_csr.n = n; A_csr.fmt = matrix_csr; A_csr.zero_tolerance = 0.0; A_csr.data.csr.nnz = 5; @@ -147,7 +146,8 @@ int main() { // describe A using matrix_desc_t matrix_desc_t A_csc; - A_csc.m = m; A_csc.n = n; + A_csc.m = m; + A_csc.n = n; A_csc.fmt = matrix_csc; A_csc.zero_tolerance = 0.0; A_csc.data.csc.nnz = 5; @@ -162,7 +162,8 @@ int main() { // describe A using matrix_desc_t matrix_desc_t A_coo; - A_coo.m = m; A_coo.n = n; + A_coo.m = m; + A_coo.n = n; A_coo.fmt = matrix_coo; A_coo.zero_tolerance = 0.0; A_coo.data.coo.nnz = 5; @@ -174,47 +175,51 @@ int main() { double c[2] = {1.0, 1.0}; // l&u: constraintbounds - double l[3] = {5.0, -INFINITY, -INFINITY}; // lower bounds - double u[3] = {5.0, 2.0, 8.0}; // upper bounds + double l[3] = {5.0, -INFINITY, -INFINITY}; // lower bounds + double u[3] = {5.0, 2.0, 8.0}; // upper bounds printf("Objective c = ["); - for (int j = 0; j < n; j++) { + for (int j = 0; j < n; j++) + { printf(" %g", c[j]); } printf(" ]\n"); printf("Matrix A:\n"); - for (int i = 0; i < m; i++) { + for (int i = 0; i < m; i++) + { printf(" ["); - for (int j = 0; j < n; j++) { + for (int j = 0; j < n; j++) + { printf(" %g", A[i][j]); } printf(" ]\n"); } printf("Constraint bounds (l <= A x <= b):\n"); - for (int i = 0; i < m; i++) { + for (int i = 0; i < m; i++) + { printf(" row %d: l = %g, u = %g\n", i, l[i], u[i]); } - lp_problem_t* prob = create_lp_problem( - c, // c - &A_dense, // A - l, // con_lb - u, // con_ub - NULL, // var_lb - NULL, // var_ub - NULL // objective_constant + lp_problem_t *prob = create_lp_problem(c, // c + &A_dense, // A + l, // con_lb + u, // con_ub + NULL, // var_lb + NULL, // var_ub + NULL // objective_constant ); - if (!prob) { + if (!prob) + { fprintf(stderr, "[test] create_lp_problem failed.\n"); return 1; } run_once("Test 1: Dense Matrix", &A_dense, c, l, u); - run_once("Test 2: CSR Matrix", &A_csr, c, l, u); - run_once("Test 3: CSC Matrix", &A_csc, c, l, u); - run_once("Test 4: COO Matrix", &A_coo, c, l, u); + run_once("Test 2: CSR Matrix", &A_csr, c, l, u); + run_once("Test 3: CSC Matrix", &A_csc, c, l, u); + run_once("Test 4: COO Matrix", &A_coo, c, l, u); test_warm_start("Test 5: Dense Matrix", &A_dense, c, l, u); test_warm_start("Test 6: CSR Matrix", &A_csr, c, l, u); @@ -222,4 +227,4 @@ int main() { test_warm_start("Test 8: COO Matrix", &A_coo, c, l, u); return 0; -} \ No newline at end of file +}