/*******************************************************************************
* Copyright 2024 Intel Corporation.
*
* This software and the related documents are Intel copyrighted  materials,  and
* your use of  them is  governed by the  express license  under which  they were
* provided to you (License).  Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute,  disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents  are provided as  is,  with no express
* or implied  warranties,  other  than those  that are  expressly stated  in the
* License.
*******************************************************************************/

/*
*   Content : Intel(R) oneAPI Math Kernel Library (oneMKL) IE Sparse BLAS C
*             example for mkl_sparse_convert_{csr, bsr} functions and
*             conversion from dense to sparse COO format
*
********************************************************************************
*
* Example program for using Intel oneMKL Inspector-Executor Sparse BLAS routines
* for conversions between different matrix formats.
*
* The following Inspector Executor Sparse Blas routines are used in the example:
*
*   Initialization/Destruction stage:
*          mkl_sparse_d_create_coo
*          mkl_sparse_d_create_csc
*          mkl_sparse_destroy
*
*   Format conversion functions:
*          mkl_sparse_convert_csr   mkl_sparse_convert_bsr
*
*   Sparse matrix export functions:
*          mkl_sparse_d_export_csr  mkl_sparse_d_export_bsr
*
* Consider the matrix A below to be represented in multiple sparse formats
* (see 'Sparse Matrix Storage Schemes' in the Intel oneMKL Reference Manual):
*
*       |  1  -2   0   0 |
*       |  3  -4   0   0 |
*   A = |  0   0   5  -6 |.
*       |  0   0   7  -8 |
*       |  9 -10   0   0 |
*       | 11 -12   0   0 |
*
*  A coordinate format (COO) representation of the matrix is:
*
*     cooNrows  = 6
*     cooNcols  = 4
*     cooNnz    = 12
*     cooIndex  = SPARSE_INDEX_BASE_ZERO
*     cooRowIdx = (0   0   1   1   2   2   3   3   4   4   5   5)
*     cooColIdx = (0   1   0   1   2   3   2   3   0   1   0   1)
*     cooValues = (1  -2   3  -4   5  -6   7  -8   9 -10  11 -12)
*
*  A compressed sparse row (CSR) representation of the matrix with
*  three arrays is:
*
*     csrNrows  = 6
*     csrNcols  = 4
*     csrNnz    = 12
*     csrIndex  = SPARSE_INDEX_BASE_ZERO
*     csrRowPtr = (0       2       4       6       8      10      12)
*     csrColIdx = (0   1   0   1   2   3   2   3   0   1   0   1)
*     csrValues = (1  -2   3  -4   5  -6   7  -8   9 -10  11 -12)
*
*  A compressed sparse row (CSR) representation of the transpose of the
*  matrix with three arrays is:
*
*     csrTNrows  = 4
*     csrTNcols  = 6
*     csrTNnz    = 12
*     csrTIndex  = SPARSE_INDEX_BASE_ZERO
*     csrTRowPtr = (0               4               8      10      12)
*     csrTColIdx = (0   1   4   5   0   1   4   5   2   3   2   3)
*     csrTValues = (1   3   9  11  -2  -4 -10 -12   5   7  -6  -8)

*
*  A compressed sparse column (CSC) representation of the matrix with
*  three arrays is:
*
*     cscNrows  = 6
*     cscNcols  = 4
*     cscNnz    = 12
*     cscIndex  = SPARSE_INDEX_BASE_ZERO
*     cscColPtr = (0               4               8      10      12)
*     cscRowIdx = (0   1   4   5   0   1   4   5   2   3   2   3)
*     cscValues = (1   3   9  11  -2  -4 -10 -12   5   7  -6  -8)
*
*  A block compressed sparse row (BSR) representation of the matrix with
*  three arrays and column-major 2x2 blocks is:
*
*     bsrNrows        = 3
*     bsrNcols        = 2
*     bsrBlock_layout = SPARSE_LAYOUT_COLUMN_MAJOR
*     bsrBlock_size   = 2
*     bsrNnz          = 3
*     bsrIndex        = SPARSE_INDEX_BASE_ZERO
*     bsrRowPtr       = (0               1               2               3)
*     bsrColIdx       = (0               1               0)
*     bsrValues       = (1   3  -2  -4   5   7  -6  -8   9  11 -10 -12)
*
*  This example presents:
*    * format conversion by hand from Dense matrix to COO format using a thresholding function
*      to determine whether each value in the dense matrix is non-zero or not
*    * mkl_sparse_convert_{csr/bsr}() usage to convert a sparse matrix:
*      * from COO to CSR,
*      * from CSR to CSR Transpose,
*      * from COO to BSR,
*    * mkl_sparse_d_export_{csr/bsr}() usage to extract CSR, CSR Transpose and BSR arrays,
*    * mkl_sparse_d_create_csc() usage to create a CSC matrix from copies of the exported
*      CSR Transpose arrays
*
*  Note that we will call mkl_sparse_order() after most conversions so that when printing out
*  we have it printed in sorted order in this example.  This is not strictly necessary as
*  most algorithms work with unsorted data (exception is unoptimized TRSV/M which requires lower and
*  upper parts to be separated within each row/column for CSR/CSC/BSR formats.  Algorithms can
*  sometimes be more efficient with sorted data due to better memory access patterns.
*
********************************************************************************
*/
#include <stdio.h>
#include <assert.h>
#include <math.h>
#include "mkl.h"

#ifdef MKL_ILP64
#define INT_PRINT_FORMAT "%4lld"
#else
#define INT_PRINT_FORMAT "%4d"
#endif

void print_int_value(const char *name, MKL_INT val) {
    printf("\t\t%s = " INT_PRINT_FORMAT "\n", name, val);
}

void print_int_array(const char *name, MKL_INT *array, MKL_INT len) {
    printf("\t\t%s =", name);
    for (MKL_INT i = 0; i < len; ++i) {
        printf(INT_PRINT_FORMAT ",", array[i]);
    }
    printf("\n");
}

void print_index(const char *name, sparse_index_base_t idxBase) {
    printf("\t\t%s = %s\n", name, idxBase == SPARSE_INDEX_BASE_ZERO ? "SPARSE_INDEX_BASE_ZERO" : "SPARSE_INDEX_BASE_ONE");
}

void print_flt_array(const char *name, double *array, MKL_INT len) {
    printf("\t\t%s =", name);
    for (MKL_INT i = 0; i < len; ++i) {
        printf("%4.0f,", array[i]);
    }
    printf("\n");
}


//
// Thresholding function for values from dense matrix format
// to decide if it is non-zero or not
//
int threshold_dense(double val) {
    return fabs(val) > 1e-15;
}

int main() {
    //*******************************************************************************
    //     Declaration and initialization of parameters for sparse representation of
    //     the matrix A in the COO format:
    //*******************************************************************************
#define M 6
#define N 4

    MKL_INT cooNrows = 0, cooNcols = 0, cooNnz = 0;
    MKL_INT csrNrows = 0, csrNcols = 0, csrNnz = 0;
    MKL_INT csrTNrows = 0, csrTNcols = 0, csrTNnz = 0;
    MKL_INT cscNrows = 0, cscNcols = 0, cscNnz = 0;
    MKL_INT bsrBlockNrows = 0, bsrBlockNcols = 0;
    MKL_INT bsrBlockNnz = 0, bsrNnz = 0, bsrBlockSize = 2, blockSize;
    sparse_index_base_t cooIdxBase = SPARSE_INDEX_BASE_ZERO;
    sparse_index_base_t csrIdxBase, csrTIdxBase, cscIdxBase, bsrIdxBase;
    sparse_layout_t bsrLayout = SPARSE_LAYOUT_COLUMN_MAJOR, layout;

    struct matrix_descr descrA;
    descrA.type = SPARSE_MATRIX_TYPE_GENERAL;

    MKL_INT csr_passed = 0, csrT_passed = 0, csc_passed = 0, bsr_passed = 0;
    double TOL = 1.0e-6;
    double csrSum, csrTSum, cscSum, bsrSum;

    //*******************************************************************************
    //    Vectors for testing correctness of matrix formats
    //*******************************************************************************
    double x_M[M] = {1, 1, 1, 1, 1, 1};
    double x_N[N] = {1, 1, 1, 1};
    double y_M[M] = {0, 0, 0, 0, 0, 0};
    double y_N[N] = {0, 0, 0, 0};

    //*******************************************************************************
    //    Representation of Matrix in Dense + Row-major format with ldx = N
    //*******************************************************************************

    double denseMatrix[M * N] = {  1,  -2,   0,   0,
                                   3,  -4,   0,   0,
                                   0,   0,   5,  -6,
                                   0,   0,   7,  -8,
                                   9, -10,   0,   0,
                                  11, -12,   0,   0 };

    //*******************************************************************************
    //    Sparse format arrays of the matrix A for COO/CSR/CSC/BSR formats
    //    to be filled later
    //*******************************************************************************

    MKL_INT *cooRowIdx = NULL, *cooColIdx = NULL;
    MKL_INT *csrRowStart = NULL, *csrRowEnd = NULL, *csrColIdx = NULL;
    MKL_INT *csrTRowStart = NULL, *csrTRowEnd = NULL, *csrTColIdx = NULL;
    MKL_INT *cscColPtr = NULL, *cscRowIdx = NULL;
    MKL_INT *bsrRowStart = NULL, *bsrRowEnd = NULL, *bsrColIdx = NULL;
    double *cooValues = NULL, *csrValues = NULL, *csrTValues = NULL, *cscValues = NULL, *bsrValues = NULL;

    // Sparse matrix handles to be used for conversion to different formats.
    // Note that we recommend to always initialize them to NULL before using.
    sparse_matrix_t cooA = NULL, csrA = NULL, csrAT = NULL, cscA = NULL, bsrA = NULL;

    sparse_status_t status;
    int exit_status = 0;

    printf( "\nEXAMPLE PROGRAM for matrix format conversion routines from IE Sparse BLAS\n" );
    printf( "-------------------------------------------------------------------------\n" );


    //******************************************************************************
    //    Convert Dense matrix into COO format with thresholding using custom
    //
    //      int threshold_dense(double val);
    //
    //    function to decide whether a value is non-zero and so should be kept
    //    in sparse format or not.
    //
    //******************************************************************************

    MKL_INT *row_counts = (MKL_INT *)mkl_malloc( (M + 1) * sizeof(MKL_INT), 32);

    //
    // process each row of dense matrix to get count of nonzeros
    //
    // note: this operation can be done in parallel over rows
    //
    for ( MKL_INT row = 0; row < M; ++row) {
        MKL_INT row_nnz = 0;
        for (MKL_INT col = 0; col < N; ++col) {
            if (threshold_dense(denseMatrix[row * N + col])) {
                row_nnz++;
            }
        }
        row_counts[row+1] = row_nnz; // place count in location for prefix_sum
    }
    row_counts[0] = 0;

    //
    // perform an inclusive prefix_sum to get total nnz
    //
    for (MKL_INT row = 0; row < M; ++row) {
        row_counts[row+1] += row_counts[row];
    }

    //
    // Extract NNZ and allocate COO format arrays
    //
    cooNrows   = M;
    cooNcols   = N;
    cooNnz     = row_counts[M];
    const MKL_INT cooIndexing = 0; // choose base for indexing of COO
    cooIdxBase = (cooIndexing == 1) ? SPARSE_INDEX_BASE_ONE : SPARSE_INDEX_BASE_ZERO;
    cooRowIdx  = (MKL_INT *)mkl_malloc( cooNnz * sizeof(MKL_INT), 32);
    cooColIdx  = (MKL_INT *)mkl_malloc( cooNnz * sizeof(MKL_INT), 32);
    cooValues  = (double *) mkl_malloc( cooNnz * sizeof(double), 32);


    //
    // Fill COO format arrays from Dense matrix
    //
    // note: this operation can be done in parallel over rows
    //
    for ( MKL_INT row = 0; row < M; ++row) {
        MKL_INT next = row_counts[row];
        for (MKL_INT col = 0; col < N; ++col) {
            const double val = denseMatrix[row * N + col];
            if ( threshold_dense(val) ) {
                cooRowIdx[next]   = row + cooIndexing;
                cooColIdx[next]   = col + cooIndexing;
                cooValues[next++] = val;
            }
        }
    }

    //
    // Note that the row_counts (up to "indexing" offset) is also a CSR rowptr
    // so this could easily be modified to produce a CSR format instead of COO
    // format by using csrRowptr = row_counts (+indexing), with
    // csrColIdx = cooColIdx and csrValues = cooValues.
    //

    mkl_free(row_counts);


    printf("\t[Input] Dense Matrix has been extracted into COO format:\n");
    print_int_value("nrows", cooNrows);
    print_int_value("ncols", cooNcols);
    print_int_value("nnz  ", cooNnz);
    print_index(    "index", cooIdxBase);
    printf("\n");

    //******************************************************************************
    //    Setup Test   sum_vals =  ones_M' * A * ones_N =  sum_i { cooValues[i] }
    //******************************************************************************
    double sum_vals = 0;
    for (MKL_INT i = 0; i < cooNnz; ++i) {
        sum_vals += cooValues[i];
    }

    //******************************************************************************
    //    Create COO sparse matrix handle
    //******************************************************************************
    status = mkl_sparse_d_create_coo(&cooA, cooIdxBase, cooNrows, cooNcols, cooNnz,
                                     cooRowIdx, cooColIdx, cooValues);

    if (status != SPARSE_STATUS_SUCCESS) {
        printf(" Error in mkl_sparse_d_create_coo: %d \n", status);
        exit_status = 1;
        goto exit;
    }

    printf("\t[Output] Matrix arrays COO format: (converted from Dense format)\n");
    print_int_value("nrows", cooNrows);
    print_int_value("ncols", cooNcols);
    print_int_value("nnz  ", cooNnz);
    print_index(    "index", cooIdxBase);
    print_int_array("cooRowIdx", cooRowIdx, cooNnz);
    print_int_array("cooColIdx", cooColIdx, cooNnz);
    print_flt_array("cooValues", cooValues, cooNnz);
    printf("\n");

    //******************************************************************************
    //    Convert from COO to CSR format
    //******************************************************************************
    status = mkl_sparse_convert_csr(cooA, SPARSE_OPERATION_NON_TRANSPOSE, &csrA);
    status = mkl_sparse_order(csrA);
    status = mkl_sparse_d_export_csr(csrA, &csrIdxBase, &csrNrows, &csrNcols,
                                     &csrRowStart, &csrRowEnd, &csrColIdx, &csrValues);

    if (status != SPARSE_STATUS_SUCCESS) {
        printf(" Error in mkl_sparse_convert_csr: %d \n", status);
        exit_status = 1;
        goto exit;
    }

    // Start Test of CSR matrix
    status = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, csrA, descrA, x_N, 0.0, y_M);
    csrSum = 0.0;
    for (MKL_INT i = 0; i < M; ++i) {
        csrSum += y_M[i];
    }
    if ( fabs(csrSum - sum_vals) < TOL ) {
        csr_passed = 1;
    }

    printf("\t[Output] Matrix arrays CSR format (converted from COO):\n");
    csrNnz = csrRowEnd[csrNrows - 1] - csrIdxBase;
    print_int_value("nrows", csrNrows);
    print_int_value("ncols", csrNcols);
    print_int_value("nnz  ", csrNnz);
    print_index(    "index", csrIdxBase);
    print_int_array("csrRowPtr", csrRowStart, csrNrows+1);
    print_int_array("csrColIdx", csrColIdx, csrNnz);
    print_flt_array("csrValues", csrValues, csrNnz);
    if (csr_passed) printf("\t CSR Test Passed\n");
    else printf("\t CSR Test Failed: COO sum = %4.0f, but CSR sum = %4.0f \n", sum_vals, csrSum);
    printf("\n");

    //******************************************************************************
    //    Convert from CSR to CSR Transpose format:
    //******************************************************************************
    status = mkl_sparse_convert_csr(csrA, SPARSE_OPERATION_TRANSPOSE, &csrAT);
    status = mkl_sparse_order(csrAT);

    status = mkl_sparse_d_export_csr(csrAT, &csrTIdxBase, &csrTNrows, &csrTNcols,
                                     &csrTRowStart, &csrTRowEnd, &csrTColIdx, &csrTValues);

    if (status != SPARSE_STATUS_SUCCESS) {
        printf(" Error in mkl_sparse_convert_csr with transpose (csrAT): %d \n", status);
        exit_status = 1;
        goto exit;
    }

    // Start Test of CSR Transpose matrix
    status = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, csrAT, descrA, x_M, 0.0, y_N);
    csrTSum = 0.0;
    for (MKL_INT i = 0; i < N; ++i) {
        csrTSum += y_N[i];
    }
    if ( fabs(csrTSum - sum_vals) < TOL ) {
        csrT_passed = 1;
    }

    csrTNnz = csrTRowEnd[csrTNrows - 1] - csrTIdxBase;
    printf("\t[Output] Matrix arrays CSR Transpose format (converted from CSR):\n");
    print_int_value("nrows", csrTNrows);
    print_int_value("ncols", csrTNcols);
    print_int_value("nnz  ", csrTNnz);
    print_index(    "index", csrTIdxBase);
    print_int_array("csrTRowPtr", csrTRowStart, csrTNrows+1);
    print_int_array("csrTColIdx", csrTColIdx, csrTNnz);
    print_flt_array("csrTValues", csrTValues, csrTNnz);
    if (csrT_passed) printf("\t CSR Transpose Test Passed\n");
    else printf("\t CSR Transpose Test Failed: COO sum = %4.0f, but CSR Transpose sum = %4.0f \n", sum_vals, csrTSum);
    printf("\n");

    //******************************************************************************
    //    Convert CSR Transpose to CSC format: arrays of A' in CSR are arrays of A
    //    in CSC format but the dimensions of the matrix cscA vs csrAT are
    //    transposed, so csrAT is not exactly A in CSC format, but can easily
    //    be converted to it by switching dimensions.
    //
    //    As we are just borrowing the arrays from export_csr and want
    //    to have them in true cscA format, we allocate new ones and copy into them
    //    to use in CSC format.
    //******************************************************************************
    cscNrows   = csrTNcols;
    cscNcols   = csrTNrows;
    cscIdxBase = csrTIdxBase;
    cscNnz     = csrTNnz;
    cscColPtr  = (MKL_INT *)mkl_malloc( (cscNcols + 1) * sizeof(MKL_INT), 32);
    cscRowIdx  = (MKL_INT *)mkl_malloc( cscNnz * sizeof(MKL_INT), 32);
    cscValues  = (double *)mkl_malloc( cscNnz * sizeof(double), 32);
    // convert to CSC 3-array format from 4-arrays exported format csrAT
    for (MKL_INT i = 0; i < cscNcols; ++i) {
        cscColPtr[i] = csrTRowStart[i];
    }
    cscColPtr[cscNcols] = csrTRowEnd[cscNcols - 1];
    for (MKL_INT i = 0; i < cscNnz; ++i) {
        cscRowIdx[i] = csrTColIdx[i];
        cscValues[i] = csrTValues[i];
    }

    status = mkl_sparse_d_create_csc(&cscA, cscIdxBase, cscNrows, cscNcols, cscColPtr,
                                     cscColPtr+1, cscRowIdx, cscValues);


    // Start Test of CSC matrix
    status = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, cscA, descrA, x_N, 0.0, y_M);
    cscSum = 0.0;
    for (MKL_INT i = 0; i < M; ++i) {
        cscSum += y_M[i];
    }
    if ( fabs(cscSum - sum_vals) < TOL ) {
        csc_passed = 1;
    }

    printf("\t[Output] Matrix arrays CSC format (created from CSR Transpose export):\n");
    print_int_value("nrows", cscNrows);
    print_int_value("ncols", cscNcols);
    print_int_value("nnz  ", cscNnz);
    print_index(    "index", cscIdxBase);
    print_int_array("cscColPtr", cscColPtr, cscNcols+1);
    print_int_array("cscRowIdx", cscRowIdx, cscNnz);
    print_flt_array("cscValues", cscValues, cscNnz);
    if (csc_passed) printf("\t CSC Test Passed\n");
    else printf("\t CSC Test Failed: COO sum = %4.0f, but CSC sum = %4.0f \n", sum_vals, cscSum);
    printf("\n");



    //******************************************************************************
    //    Convert from COO to BSR format
    //******************************************************************************
    status = mkl_sparse_convert_bsr(cooA, bsrBlockSize, bsrLayout,
                                    SPARSE_OPERATION_NON_TRANSPOSE, &bsrA);
    status = mkl_sparse_order(bsrA);
    status = mkl_sparse_d_export_bsr(bsrA, &bsrIdxBase, &layout, &bsrBlockNrows,
                                     &bsrBlockNcols, &blockSize, &bsrRowStart,
                                     &bsrRowEnd, &bsrColIdx, &bsrValues);

    if (status != SPARSE_STATUS_SUCCESS) {
        printf(" Error in mkl_sparse_convert_bsr: %d \n", status);
        exit_status = 1;
        goto exit;
    }

    // Start Test of CSR matrix
    status = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, bsrA, descrA, x_N, 0.0, y_M);
    bsrSum = 0.0;
    for (MKL_INT i = 0; i < M; ++i) {
        bsrSum += y_M[i];
    }
    if ( fabs(bsrSum - sum_vals) < TOL ) {
        bsr_passed = 1;
    }

    printf("\t[Output] Matrix arrays BSR format (converted from COO):\n");
    bsrBlockNnz = bsrRowEnd[bsrBlockNrows - 1] - bsrIdxBase;
    bsrNnz = bsrBlockNnz * blockSize * blockSize;
    printf(     "\t\t%s-major block layout\n", layout == SPARSE_LAYOUT_ROW_MAJOR ? "row" : "column");
    print_int_value("block_size ", blockSize);
    print_int_value("block_nrows", bsrBlockNrows);
    print_int_value("block_ncols", bsrBlockNcols);
    print_int_value("block_nnz  ", bsrBlockNnz);
    print_index(    "index      ", bsrIdxBase);
    print_int_array("bsrRowPtr", bsrRowStart, bsrBlockNrows+1);
    print_int_array("bsrColIdx", bsrColIdx, bsrBlockNnz);
    print_flt_array("bsrValues", bsrValues, bsrNnz);
    if (bsr_passed) printf("\t BSR Test Passed\n");
    else printf("\t BSR Test Failed: COO sum = %4.0f, but BSR sum = %4.0f \n", sum_vals, bsrSum);

    printf("\n");

    // Condense results of tests to single return value
    if ( csr_passed * csrT_passed * csc_passed * bsr_passed != 1) {
        exit_status = 1; // at least one test failed
    }

exit:
    // Release matrix handle and deallocate matrix
    if (cooA) mkl_sparse_destroy(cooA);
    if (csrA) mkl_sparse_destroy(csrA);
    if (csrAT) mkl_sparse_destroy(csrAT);
    if (cscA) mkl_sparse_destroy(cscA);
    if (bsrA) mkl_sparse_destroy(bsrA);

    // free allocated arrays
    if (cooRowIdx) mkl_free(cooRowIdx);
    if (cooColIdx) mkl_free(cooColIdx);
    if (cooValues) mkl_free(cooValues);
    if (cscColPtr) mkl_free(cscColPtr);
    if (cscRowIdx) mkl_free(cscRowIdx);
    if (cscValues) mkl_free(cscValues);

    return exit_status;
}
