1c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> 2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/bas/spbas.h> 3b2f1ab58SBarry Smith 4845006b9SBarry Smith /*MC 52692d6eeSBarry Smith MATSOLVERBAS - Provides ICC(k) with drop tolerance 6845006b9SBarry Smith 7845006b9SBarry Smith Works with MATAIJ matrices 8845006b9SBarry Smith 9845006b9SBarry Smith Options Database Keys: 100053dfc8SBarry Smith + -pc_factor_levels <l> - number of levels of fill 110053dfc8SBarry Smith - -pc_factor_drop_tolerance - is not currently hooked up to do anything 12845006b9SBarry Smith 130053dfc8SBarry Smith Level: intermediate 14845006b9SBarry Smith 15845006b9SBarry Smith Contributed by: Bas van 't Hof 16845006b9SBarry Smith 1795452b02SPatrick Sanan Notes: 1895452b02SPatrick Sanan Since this currently hooked up to use drop tolerance it should produce the same factors and hence convergence as the PETSc ICC, for higher 190053dfc8SBarry Smith levels of fill it does not. This needs to be investigated. Unless you are interested in drop tolerance ICC and willing to work through the code 200053dfc8SBarry Smith we recommend not using this functionality. 210053dfc8SBarry Smith 22db781477SPatrick Sanan .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `PCFactorSetLevels()`, `PCFactorSetDropTolerance()` 23845006b9SBarry Smith 24845006b9SBarry Smith M*/ 259ccc4a9bSBarry Smith 26b2f1ab58SBarry Smith /* 27b2f1ab58SBarry Smith spbas_memory_requirement: 28b2f1ab58SBarry Smith Calculate the number of bytes needed to store tha matrix 29b2f1ab58SBarry Smith */ 309371c9d4SSatish Balay size_t spbas_memory_requirement(spbas_matrix matrix) { 313dfa2556SBarry Smith size_t memreq = 6 * sizeof(PetscInt) + /* nrows, ncols, nnz, n_alloc_icol, n_alloc_val, col_idx_type */ 32ace3abfcSBarry Smith sizeof(PetscBool) + /* block_data */ 33c328eeadSBarry Smith sizeof(PetscScalar **) + /* values */ 34c328eeadSBarry Smith sizeof(PetscScalar *) + /* alloc_val */ 353dfa2556SBarry Smith 2 * sizeof(PetscInt **) + /* icols, icols0 */ 36c328eeadSBarry Smith 2 * sizeof(PetscInt *) + /* row_nnz, alloc_icol */ 37c328eeadSBarry Smith matrix.nrows * sizeof(PetscInt) + /* row_nnz[*] */ 38c328eeadSBarry Smith matrix.nrows * sizeof(PetscInt *); /* icols[*] */ 39b2f1ab58SBarry Smith 40c328eeadSBarry Smith /* icol0[*] */ 412205254eSKarl Rupp if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) memreq += matrix.nrows * sizeof(PetscInt); 42b2f1ab58SBarry Smith 43c328eeadSBarry Smith /* icols[*][*] */ 442205254eSKarl Rupp if (matrix.block_data) memreq += matrix.n_alloc_icol * sizeof(PetscInt); 452205254eSKarl Rupp else memreq += matrix.nnz * sizeof(PetscInt); 46b2f1ab58SBarry Smith 474e1ff37aSBarry Smith if (matrix.values) { 48c328eeadSBarry Smith memreq += matrix.nrows * sizeof(PetscScalar *); /* values[*] */ 49c328eeadSBarry Smith /* values[*][*] */ 502205254eSKarl Rupp if (matrix.block_data) memreq += matrix.n_alloc_val * sizeof(PetscScalar); 512205254eSKarl Rupp else memreq += matrix.nnz * sizeof(PetscScalar); 52b2f1ab58SBarry Smith } 53b2f1ab58SBarry Smith return memreq; 54b2f1ab58SBarry Smith } 55b2f1ab58SBarry Smith 56b2f1ab58SBarry Smith /* 57b2f1ab58SBarry Smith spbas_allocate_pattern: 58b2f1ab58SBarry Smith allocate the pattern arrays row_nnz, icols and optionally values 59b2f1ab58SBarry Smith */ 609371c9d4SSatish Balay PetscErrorCode spbas_allocate_pattern(spbas_matrix *result, PetscBool do_values) { 61b2f1ab58SBarry Smith PetscInt nrows = result->nrows; 62b2f1ab58SBarry Smith PetscInt col_idx_type = result->col_idx_type; 63b2f1ab58SBarry Smith 64b2f1ab58SBarry Smith PetscFunctionBegin; 65c328eeadSBarry Smith /* Allocate sparseness pattern */ 669566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrows, &result->row_nnz)); 679566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrows, &result->icols)); 68b2f1ab58SBarry Smith 69c328eeadSBarry Smith /* If offsets are given wrt an array, create array */ 704e1ff37aSBarry Smith if (col_idx_type == SPBAS_OFFSET_ARRAY) { 719566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrows, &result->icol0)); 724e1ff37aSBarry Smith } else { 730298fd71SBarry Smith result->icol0 = NULL; 74b2f1ab58SBarry Smith } 75b2f1ab58SBarry Smith 76c328eeadSBarry Smith /* If values are given, allocate values array */ 774e1ff37aSBarry Smith if (do_values) { 789566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrows, &result->values)); 794e1ff37aSBarry Smith } else { 800298fd71SBarry Smith result->values = NULL; 81b2f1ab58SBarry Smith } 82b2f1ab58SBarry Smith PetscFunctionReturn(0); 83b2f1ab58SBarry Smith } 84b2f1ab58SBarry Smith 85b2f1ab58SBarry Smith /* 86b2f1ab58SBarry Smith spbas_allocate_data: 87b2f1ab58SBarry Smith in case of block_data: 88b2f1ab58SBarry Smith Allocate the data arrays alloc_icol and optionally alloc_val, 89b2f1ab58SBarry Smith set appropriate pointers from icols and values; 90b2f1ab58SBarry Smith in case of !block_data: 91b2f1ab58SBarry Smith Allocate the arrays icols[i] and optionally values[i] 92b2f1ab58SBarry Smith */ 939371c9d4SSatish Balay PetscErrorCode spbas_allocate_data(spbas_matrix *result) { 94b2f1ab58SBarry Smith PetscInt i; 95b2f1ab58SBarry Smith PetscInt nnz = result->nnz; 96b2f1ab58SBarry Smith PetscInt nrows = result->nrows; 97b2f1ab58SBarry Smith PetscInt r_nnz; 986c4ed002SBarry Smith PetscBool do_values = (result->values) ? PETSC_TRUE : PETSC_FALSE; 99ace3abfcSBarry Smith PetscBool block_data = result->block_data; 100b2f1ab58SBarry Smith 101b2f1ab58SBarry Smith PetscFunctionBegin; 1024e1ff37aSBarry Smith if (block_data) { 103c328eeadSBarry Smith /* Allocate the column number array and point to it */ 104b2f1ab58SBarry Smith result->n_alloc_icol = nnz; 1052205254eSKarl Rupp 1069566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz, &result->alloc_icol)); 1072205254eSKarl Rupp 108b2f1ab58SBarry Smith result->icols[0] = result->alloc_icol; 1099371c9d4SSatish Balay for (i = 1; i < nrows; i++) { result->icols[i] = result->icols[i - 1] + result->row_nnz[i - 1]; } 110b2f1ab58SBarry Smith 111c328eeadSBarry Smith /* Allocate the value array and point to it */ 1124e1ff37aSBarry Smith if (do_values) { 113b2f1ab58SBarry Smith result->n_alloc_val = nnz; 1142205254eSKarl Rupp 1159566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz, &result->alloc_val)); 1162205254eSKarl Rupp 117b2f1ab58SBarry Smith result->values[0] = result->alloc_val; 1189371c9d4SSatish Balay for (i = 1; i < nrows; i++) { result->values[i] = result->values[i - 1] + result->row_nnz[i - 1]; } 119b2f1ab58SBarry Smith } 1204e1ff37aSBarry Smith } else { 1214e1ff37aSBarry Smith for (i = 0; i < nrows; i++) { 122b2f1ab58SBarry Smith r_nnz = result->row_nnz[i]; 1239566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(r_nnz, &result->icols[i])); 124b2f1ab58SBarry Smith } 1254e1ff37aSBarry Smith if (do_values) { 1264e1ff37aSBarry Smith for (i = 0; i < nrows; i++) { 127b2f1ab58SBarry Smith r_nnz = result->row_nnz[i]; 1289566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(r_nnz, &result->values[i])); 129b2f1ab58SBarry Smith } 130b2f1ab58SBarry Smith } 131b2f1ab58SBarry Smith } 132b2f1ab58SBarry Smith PetscFunctionReturn(0); 133b2f1ab58SBarry Smith } 134b2f1ab58SBarry Smith 135b2f1ab58SBarry Smith /* 136b2f1ab58SBarry Smith spbas_row_order_icol 137b2f1ab58SBarry Smith determine if row i1 should come 138b2f1ab58SBarry Smith + before row i2 in the sorted rows (return -1), 139b2f1ab58SBarry Smith + after (return 1) 140b2f1ab58SBarry Smith + is identical (return 0). 141b2f1ab58SBarry Smith */ 1429371c9d4SSatish Balay int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in, PetscInt col_idx_type) { 143b2f1ab58SBarry Smith PetscInt j; 144b2f1ab58SBarry Smith PetscInt nnz1 = irow_in[i1 + 1] - irow_in[i1]; 145b2f1ab58SBarry Smith PetscInt nnz2 = irow_in[i2 + 1] - irow_in[i2]; 146b2f1ab58SBarry Smith PetscInt *icol1 = &icol_in[irow_in[i1]]; 147b2f1ab58SBarry Smith PetscInt *icol2 = &icol_in[irow_in[i2]]; 148b2f1ab58SBarry Smith 1492205254eSKarl Rupp if (nnz1 < nnz2) return -1; 1502205254eSKarl Rupp if (nnz1 > nnz2) return 1; 151b2f1ab58SBarry Smith 1524e1ff37aSBarry Smith if (col_idx_type == SPBAS_COLUMN_NUMBERS) { 1534e1ff37aSBarry Smith for (j = 0; j < nnz1; j++) { 1542205254eSKarl Rupp if (icol1[j] < icol2[j]) return -1; 1552205254eSKarl Rupp if (icol1[j] > icol2[j]) return 1; 156b2f1ab58SBarry Smith } 1574e1ff37aSBarry Smith } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) { 1584e1ff37aSBarry Smith for (j = 0; j < nnz1; j++) { 1592205254eSKarl Rupp if (icol1[j] - i1 < icol2[j] - i2) return -1; 1602205254eSKarl Rupp if (icol1[j] - i1 > icol2[j] - i2) return 1; 161b2f1ab58SBarry Smith } 1624e1ff37aSBarry Smith } else if (col_idx_type == SPBAS_OFFSET_ARRAY) { 1634e1ff37aSBarry Smith for (j = 1; j < nnz1; j++) { 1642205254eSKarl Rupp if (icol1[j] - icol1[0] < icol2[j] - icol2[0]) return -1; 1652205254eSKarl Rupp if (icol1[j] - icol1[0] > icol2[j] - icol2[0]) return 1; 166b2f1ab58SBarry Smith } 167b2f1ab58SBarry Smith } 168b2f1ab58SBarry Smith return 0; 169b2f1ab58SBarry Smith } 170b2f1ab58SBarry Smith 171b2f1ab58SBarry Smith /* 172b2f1ab58SBarry Smith spbas_mergesort_icols: 173b2f1ab58SBarry Smith return a sorting of the rows in which identical sparseness patterns are 174b2f1ab58SBarry Smith next to each other 175b2f1ab58SBarry Smith */ 1769371c9d4SSatish Balay PetscErrorCode spbas_mergesort_icols(PetscInt nrows, PetscInt *irow_in, PetscInt *icol_in, PetscInt col_idx_type, PetscInt *isort) { 177c328eeadSBarry Smith PetscInt istep; /* Chunk-sizes of already sorted parts of arrays */ 178c328eeadSBarry Smith PetscInt i, i1, i2; /* Loop counters for (partly) sorted arrays */ 179c328eeadSBarry Smith PetscInt istart, i1end, i2end; /* start of newly sorted array part, end of both parts */ 180c328eeadSBarry Smith PetscInt *ialloc; /* Allocated arrays */ 181c328eeadSBarry Smith PetscInt *iswap; /* auxiliary pointers for swapping */ 182c328eeadSBarry Smith PetscInt *ihlp1; /* Pointers to new version of arrays, */ 183c328eeadSBarry Smith PetscInt *ihlp2; /* Pointers to previous version of arrays, */ 184b2f1ab58SBarry Smith 185b2f1ab58SBarry Smith PetscFunctionBegin; 1869566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrows, &ialloc)); 187b2f1ab58SBarry Smith 188b2f1ab58SBarry Smith ihlp1 = ialloc; 189b2f1ab58SBarry Smith ihlp2 = isort; 190b2f1ab58SBarry Smith 191c328eeadSBarry Smith /* Sorted array chunks are first 1 long, and increase until they are the complete array */ 1924e1ff37aSBarry Smith for (istep = 1; istep < nrows; istep *= 2) { 193c328eeadSBarry Smith /* 194c328eeadSBarry Smith Combine sorted parts 195c328eeadSBarry Smith istart:istart+istep-1 and istart+istep-1:istart+2*istep-1 196c328eeadSBarry Smith of ihlp2 and vhlp2 197c328eeadSBarry Smith 198c328eeadSBarry Smith into one sorted part 199c328eeadSBarry Smith istart:istart+2*istep-1 200c328eeadSBarry Smith of ihlp1 and vhlp1 201c328eeadSBarry Smith */ 2024e1ff37aSBarry Smith for (istart = 0; istart < nrows; istart += 2 * istep) { 203c328eeadSBarry Smith /* Set counters and bound array part endings */ 2049371c9d4SSatish Balay i1 = istart; 2059371c9d4SSatish Balay i1end = i1 + istep; 2069371c9d4SSatish Balay if (i1end > nrows) i1end = nrows; 2079371c9d4SSatish Balay i2 = istart + istep; 2089371c9d4SSatish Balay i2end = i2 + istep; 2099371c9d4SSatish Balay if (i2end > nrows) i2end = nrows; 210b2f1ab58SBarry Smith 211c328eeadSBarry Smith /* Merge the two array parts */ 2124e1ff37aSBarry Smith for (i = istart; i < i2end; i++) { 2134e1ff37aSBarry Smith if (i1 < i1end && i2 < i2end && spbas_row_order_icol(ihlp2[i1], ihlp2[i2], irow_in, icol_in, col_idx_type) < 0) { 214b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i1]; 215b2f1ab58SBarry Smith i1++; 2164e1ff37aSBarry Smith } else if (i2 < i2end) { 217b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i2]; 218b2f1ab58SBarry Smith i2++; 2194e1ff37aSBarry Smith } else { 220b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i1]; 221b2f1ab58SBarry Smith i1++; 222b2f1ab58SBarry Smith } 223b2f1ab58SBarry Smith } 224b2f1ab58SBarry Smith } 225b2f1ab58SBarry Smith 226c328eeadSBarry Smith /* Swap the two array sets */ 2279371c9d4SSatish Balay iswap = ihlp2; 2289371c9d4SSatish Balay ihlp2 = ihlp1; 2299371c9d4SSatish Balay ihlp1 = iswap; 230b2f1ab58SBarry Smith } 231b2f1ab58SBarry Smith 232c328eeadSBarry Smith /* Copy one more time in case the sorted arrays are the temporary ones */ 2334e1ff37aSBarry Smith if (ihlp2 != isort) { 2342205254eSKarl Rupp for (i = 0; i < nrows; i++) isort[i] = ihlp2[i]; 235b2f1ab58SBarry Smith } 2369566063dSJacob Faibussowitsch PetscCall(PetscFree(ialloc)); 237b2f1ab58SBarry Smith PetscFunctionReturn(0); 238b2f1ab58SBarry Smith } 239b2f1ab58SBarry Smith 240b2f1ab58SBarry Smith /* 241b2f1ab58SBarry Smith spbas_compress_pattern: 242b2f1ab58SBarry Smith calculate a compressed sparseness pattern for a sparseness pattern 243b2f1ab58SBarry Smith given in compressed row storage. The compressed sparseness pattern may 244b2f1ab58SBarry Smith require (much) less memory. 245b2f1ab58SBarry Smith */ 2469371c9d4SSatish Balay PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, PetscInt col_idx_type, spbas_matrix *B, PetscReal *mem_reduction) { 247b2f1ab58SBarry Smith PetscInt nnz = irow_in[nrows]; 2483dfa2556SBarry Smith size_t mem_orig = (nrows + nnz) * sizeof(PetscInt); 2493dfa2556SBarry Smith size_t mem_compressed; 250b2f1ab58SBarry Smith PetscInt *isort; 251b2f1ab58SBarry Smith PetscInt *icols; 252b2f1ab58SBarry Smith PetscInt row_nnz; 253b2f1ab58SBarry Smith PetscInt *ipoint; 254ace3abfcSBarry Smith PetscBool *used; 255b2f1ab58SBarry Smith PetscInt ptr; 256b2f1ab58SBarry Smith PetscInt i, j; 257ace3abfcSBarry Smith const PetscBool no_values = PETSC_FALSE; 258b2f1ab58SBarry Smith 259b2f1ab58SBarry Smith PetscFunctionBegin; 260c328eeadSBarry Smith /* Allocate the structure of the new matrix */ 261b2f1ab58SBarry Smith B->nrows = nrows; 262b2f1ab58SBarry Smith B->ncols = ncols; 263b2f1ab58SBarry Smith B->nnz = nnz; 264b2f1ab58SBarry Smith B->col_idx_type = col_idx_type; 265b2f1ab58SBarry Smith B->block_data = PETSC_TRUE; 2662205254eSKarl Rupp 2679566063dSJacob Faibussowitsch PetscCall(spbas_allocate_pattern(B, no_values)); 268b2f1ab58SBarry Smith 269c328eeadSBarry Smith /* When using an offset array, set it */ 2704e1ff37aSBarry Smith if (col_idx_type == SPBAS_OFFSET_ARRAY) { 2712205254eSKarl Rupp for (i = 0; i < nrows; i++) B->icol0[i] = icol_in[irow_in[i]]; 272b2f1ab58SBarry Smith } 273b2f1ab58SBarry Smith 274c328eeadSBarry Smith /* Allocate the ordering for the rows */ 2759566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrows, &isort)); 2769566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrows, &ipoint)); 2779566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(nrows, &used)); 278b2f1ab58SBarry Smith 2794e1ff37aSBarry Smith for (i = 0; i < nrows; i++) { 280b2f1ab58SBarry Smith B->row_nnz[i] = irow_in[i + 1] - irow_in[i]; 281b2f1ab58SBarry Smith isort[i] = i; 282b2f1ab58SBarry Smith ipoint[i] = i; 283b2f1ab58SBarry Smith } 284b2f1ab58SBarry Smith 285c328eeadSBarry Smith /* Sort the rows so that identical columns will be next to each other */ 2869566063dSJacob Faibussowitsch PetscCall(spbas_mergesort_icols(nrows, irow_in, icol_in, col_idx_type, isort)); 2879566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, "Rows have been sorted for patterns\n")); 288b2f1ab58SBarry Smith 289c328eeadSBarry Smith /* Replace identical rows with the first one in the list */ 2904e1ff37aSBarry Smith for (i = 1; i < nrows; i++) { 2919371c9d4SSatish Balay if (spbas_row_order_icol(isort[i - 1], isort[i], irow_in, icol_in, col_idx_type) == 0) { ipoint[isort[i]] = ipoint[isort[i - 1]]; } 292b2f1ab58SBarry Smith } 293b2f1ab58SBarry Smith 294c328eeadSBarry Smith /* Collect the rows which are used*/ 2952205254eSKarl Rupp for (i = 0; i < nrows; i++) used[ipoint[i]] = PETSC_TRUE; 296b2f1ab58SBarry Smith 297c328eeadSBarry Smith /* Calculate needed memory */ 298b2f1ab58SBarry Smith B->n_alloc_icol = 0; 2994e1ff37aSBarry Smith for (i = 0; i < nrows; i++) { 3002205254eSKarl Rupp if (used[i]) B->n_alloc_icol += B->row_nnz[i]; 301b2f1ab58SBarry Smith } 3029566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(B->n_alloc_icol, &B->alloc_icol)); 303b2f1ab58SBarry Smith 304c328eeadSBarry Smith /* Fill in the diagonal offsets for the rows which store their own data */ 305b2f1ab58SBarry Smith ptr = 0; 3064e1ff37aSBarry Smith for (i = 0; i < B->nrows; i++) { 3074e1ff37aSBarry Smith if (used[i]) { 308b2f1ab58SBarry Smith B->icols[i] = &B->alloc_icol[ptr]; 309b2f1ab58SBarry Smith icols = &icol_in[irow_in[i]]; 310b2f1ab58SBarry Smith row_nnz = B->row_nnz[i]; 3114e1ff37aSBarry Smith if (col_idx_type == SPBAS_COLUMN_NUMBERS) { 3129371c9d4SSatish Balay for (j = 0; j < row_nnz; j++) { B->icols[i][j] = icols[j]; } 3134e1ff37aSBarry Smith } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) { 3149371c9d4SSatish Balay for (j = 0; j < row_nnz; j++) { B->icols[i][j] = icols[j] - i; } 3154e1ff37aSBarry Smith } else if (col_idx_type == SPBAS_OFFSET_ARRAY) { 3169371c9d4SSatish Balay for (j = 0; j < row_nnz; j++) { B->icols[i][j] = icols[j] - icols[0]; } 317b2f1ab58SBarry Smith } 318b2f1ab58SBarry Smith ptr += B->row_nnz[i]; 319b2f1ab58SBarry Smith } 320b2f1ab58SBarry Smith } 321b2f1ab58SBarry Smith 322c328eeadSBarry Smith /* Point to the right places for all data */ 3239371c9d4SSatish Balay for (i = 0; i < nrows; i++) { B->icols[i] = B->icols[ipoint[i]]; } 3249566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, "Row patterns have been compressed\n")); 3259566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, " (%g nonzeros per row)\n", (double)((PetscReal)nnz / (PetscReal)nrows))); 326b2f1ab58SBarry Smith 3279566063dSJacob Faibussowitsch PetscCall(PetscFree(isort)); 3289566063dSJacob Faibussowitsch PetscCall(PetscFree(used)); 3299566063dSJacob Faibussowitsch PetscCall(PetscFree(ipoint)); 330b2f1ab58SBarry Smith 331b2f1ab58SBarry Smith mem_compressed = spbas_memory_requirement(*B); 3324e1ff37aSBarry Smith *mem_reduction = 100.0 * (PetscReal)(mem_orig - mem_compressed) / (PetscReal)mem_orig; 333b2f1ab58SBarry Smith PetscFunctionReturn(0); 334b2f1ab58SBarry Smith } 335b2f1ab58SBarry Smith 336b2f1ab58SBarry Smith /* 337b2f1ab58SBarry Smith spbas_incomplete_cholesky 338b2f1ab58SBarry Smith Incomplete Cholesky decomposition 339b2f1ab58SBarry Smith */ 340c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/bas/spbas_cholesky.h> 341b2f1ab58SBarry Smith 342b2f1ab58SBarry Smith /* 343b2f1ab58SBarry Smith spbas_delete : de-allocate the arrays owned by this matrix 344b2f1ab58SBarry Smith */ 3459371c9d4SSatish Balay PetscErrorCode spbas_delete(spbas_matrix matrix) { 346b2f1ab58SBarry Smith PetscInt i; 3479ccc4a9bSBarry Smith 348b2f1ab58SBarry Smith PetscFunctionBegin; 3499ccc4a9bSBarry Smith if (matrix.block_data) { 3509566063dSJacob Faibussowitsch PetscCall(PetscFree(matrix.alloc_icol)); 3519566063dSJacob Faibussowitsch if (matrix.values) PetscCall(PetscFree(matrix.alloc_val)); 3529ccc4a9bSBarry Smith } else { 3539566063dSJacob Faibussowitsch for (i = 0; i < matrix.nrows; i++) PetscCall(PetscFree(matrix.icols[i])); 3549566063dSJacob Faibussowitsch PetscCall(PetscFree(matrix.icols)); 3559ccc4a9bSBarry Smith if (matrix.values) { 3569566063dSJacob Faibussowitsch for (i = 0; i < matrix.nrows; i++) PetscCall(PetscFree(matrix.values[i])); 357b2f1ab58SBarry Smith } 358b2f1ab58SBarry Smith } 359b2f1ab58SBarry Smith 3609566063dSJacob Faibussowitsch PetscCall(PetscFree(matrix.row_nnz)); 3619566063dSJacob Faibussowitsch PetscCall(PetscFree(matrix.icols)); 3629566063dSJacob Faibussowitsch if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) PetscCall(PetscFree(matrix.icol0)); 3639566063dSJacob Faibussowitsch PetscCall(PetscFree(matrix.values)); 364b2f1ab58SBarry Smith PetscFunctionReturn(0); 365b2f1ab58SBarry Smith } 366b2f1ab58SBarry Smith 367b2f1ab58SBarry Smith /* 368b2f1ab58SBarry Smith spbas_matrix_to_crs: 369b2f1ab58SBarry Smith Convert an spbas_matrix to compessed row storage 370b2f1ab58SBarry Smith */ 3719371c9d4SSatish Balay PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A, MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out) { 372b2f1ab58SBarry Smith PetscInt nrows = matrix_A.nrows; 373b2f1ab58SBarry Smith PetscInt nnz = matrix_A.nnz; 374b2f1ab58SBarry Smith PetscInt i, j, r_nnz, i0; 375b2f1ab58SBarry Smith PetscInt *irow; 376b2f1ab58SBarry Smith PetscInt *icol; 377b2f1ab58SBarry Smith PetscInt *icol_A; 378b2f1ab58SBarry Smith MatScalar *val; 379b2f1ab58SBarry Smith PetscScalar *val_A; 380b2f1ab58SBarry Smith PetscInt col_idx_type = matrix_A.col_idx_type; 381ace3abfcSBarry Smith PetscBool do_values = matrix_A.values ? PETSC_TRUE : PETSC_FALSE; 382b2f1ab58SBarry Smith 383b2f1ab58SBarry Smith PetscFunctionBegin; 3849566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrows + 1, &irow)); 3859566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz, &icol)); 3869ccc4a9bSBarry Smith *icol_out = icol; 3879ccc4a9bSBarry Smith *irow_out = irow; 3889ccc4a9bSBarry Smith if (do_values) { 3899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz, &val)); 3909371c9d4SSatish Balay *val_out = val; 3919371c9d4SSatish Balay *icol_out = icol; 3929371c9d4SSatish Balay *irow_out = irow; 393b2f1ab58SBarry Smith } 394b2f1ab58SBarry Smith 395b2f1ab58SBarry Smith irow[0] = 0; 3969ccc4a9bSBarry Smith for (i = 0; i < nrows; i++) { 397b2f1ab58SBarry Smith r_nnz = matrix_A.row_nnz[i]; 398b2f1ab58SBarry Smith i0 = irow[i]; 399b2f1ab58SBarry Smith irow[i + 1] = i0 + r_nnz; 400b2f1ab58SBarry Smith icol_A = matrix_A.icols[i]; 401b2f1ab58SBarry Smith 4029ccc4a9bSBarry Smith if (do_values) { 403b2f1ab58SBarry Smith val_A = matrix_A.values[i]; 4049ccc4a9bSBarry Smith for (j = 0; j < r_nnz; j++) { 405b2f1ab58SBarry Smith icol[i0 + j] = icol_A[j]; 406b2f1ab58SBarry Smith val[i0 + j] = val_A[j]; 407b2f1ab58SBarry Smith } 4089ccc4a9bSBarry Smith } else { 4092205254eSKarl Rupp for (j = 0; j < r_nnz; j++) icol[i0 + j] = icol_A[j]; 410b2f1ab58SBarry Smith } 411b2f1ab58SBarry Smith 4129ccc4a9bSBarry Smith if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) { 4132205254eSKarl Rupp for (j = 0; j < r_nnz; j++) icol[i0 + j] += i; 4142205254eSKarl Rupp } else if (col_idx_type == SPBAS_OFFSET_ARRAY) { 415b2f1ab58SBarry Smith i0 = matrix_A.icol0[i]; 4162205254eSKarl Rupp for (j = 0; j < r_nnz; j++) icol[i0 + j] += i0; 417b2f1ab58SBarry Smith } 418b2f1ab58SBarry Smith } 419b2f1ab58SBarry Smith PetscFunctionReturn(0); 420b2f1ab58SBarry Smith } 421b2f1ab58SBarry Smith 422b2f1ab58SBarry Smith /* 423b2f1ab58SBarry Smith spbas_transpose 424b2f1ab58SBarry Smith return the transpose of a matrix 425b2f1ab58SBarry Smith */ 4269371c9d4SSatish Balay PetscErrorCode spbas_transpose(spbas_matrix in_matrix, spbas_matrix *result) { 427b2f1ab58SBarry Smith PetscInt col_idx_type = in_matrix.col_idx_type; 428b2f1ab58SBarry Smith PetscInt nnz = in_matrix.nnz; 429b2f1ab58SBarry Smith PetscInt ncols = in_matrix.nrows; 430b2f1ab58SBarry Smith PetscInt nrows = in_matrix.ncols; 431b2f1ab58SBarry Smith PetscInt i, j, k; 432b2f1ab58SBarry Smith PetscInt r_nnz; 433b2f1ab58SBarry Smith PetscInt *irow; 4344efc9174SBarry Smith PetscInt icol0 = 0; 435b2f1ab58SBarry Smith PetscScalar *val; 4364e1ff37aSBarry Smith 437b2f1ab58SBarry Smith PetscFunctionBegin; 438c328eeadSBarry Smith /* Copy input values */ 439b2f1ab58SBarry Smith result->nrows = nrows; 440b2f1ab58SBarry Smith result->ncols = ncols; 441b2f1ab58SBarry Smith result->nnz = nnz; 442b2f1ab58SBarry Smith result->col_idx_type = SPBAS_COLUMN_NUMBERS; 443b2f1ab58SBarry Smith result->block_data = PETSC_TRUE; 444b2f1ab58SBarry Smith 445c328eeadSBarry Smith /* Allocate sparseness pattern */ 4469566063dSJacob Faibussowitsch PetscCall(spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE)); 447b2f1ab58SBarry Smith 448c328eeadSBarry Smith /* Count the number of nonzeros in each row */ 4492205254eSKarl Rupp for (i = 0; i < nrows; i++) result->row_nnz[i] = 0; 450b2f1ab58SBarry Smith 4519ccc4a9bSBarry Smith for (i = 0; i < ncols; i++) { 452b2f1ab58SBarry Smith r_nnz = in_matrix.row_nnz[i]; 453b2f1ab58SBarry Smith irow = in_matrix.icols[i]; 4549ccc4a9bSBarry Smith if (col_idx_type == SPBAS_COLUMN_NUMBERS) { 4552205254eSKarl Rupp for (j = 0; j < r_nnz; j++) result->row_nnz[irow[j]]++; 4569ccc4a9bSBarry Smith } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) { 4572205254eSKarl Rupp for (j = 0; j < r_nnz; j++) result->row_nnz[i + irow[j]]++; 4589ccc4a9bSBarry Smith } else if (col_idx_type == SPBAS_OFFSET_ARRAY) { 459b2f1ab58SBarry Smith icol0 = in_matrix.icol0[i]; 4602205254eSKarl Rupp for (j = 0; j < r_nnz; j++) result->row_nnz[icol0 + irow[j]]++; 461b2f1ab58SBarry Smith } 462b2f1ab58SBarry Smith } 463b2f1ab58SBarry Smith 464c328eeadSBarry Smith /* Set the pointers to the data */ 4659566063dSJacob Faibussowitsch PetscCall(spbas_allocate_data(result)); 466b2f1ab58SBarry Smith 467c328eeadSBarry Smith /* Reset the number of nonzeros in each row */ 4682205254eSKarl Rupp for (i = 0; i < nrows; i++) result->row_nnz[i] = 0; 469b2f1ab58SBarry Smith 470c328eeadSBarry Smith /* Fill the data arrays */ 4719ccc4a9bSBarry Smith if (in_matrix.values) { 4729ccc4a9bSBarry Smith for (i = 0; i < ncols; i++) { 473b2f1ab58SBarry Smith r_nnz = in_matrix.row_nnz[i]; 474b2f1ab58SBarry Smith irow = in_matrix.icols[i]; 475b2f1ab58SBarry Smith val = in_matrix.values[i]; 476b2f1ab58SBarry Smith 4772205254eSKarl Rupp if (col_idx_type == SPBAS_COLUMN_NUMBERS) icol0 = 0; 4782205254eSKarl Rupp else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i; 4792205254eSKarl Rupp else if (col_idx_type == SPBAS_OFFSET_ARRAY) icol0 = in_matrix.icol0[i]; 4809ccc4a9bSBarry Smith for (j = 0; j < r_nnz; j++) { 481b2f1ab58SBarry Smith k = icol0 + irow[j]; 482b2f1ab58SBarry Smith result->icols[k][result->row_nnz[k]] = i; 483b2f1ab58SBarry Smith result->values[k][result->row_nnz[k]] = val[j]; 484b2f1ab58SBarry Smith result->row_nnz[k]++; 485b2f1ab58SBarry Smith } 486b2f1ab58SBarry Smith } 4879ccc4a9bSBarry Smith } else { 4889ccc4a9bSBarry Smith for (i = 0; i < ncols; i++) { 489b2f1ab58SBarry Smith r_nnz = in_matrix.row_nnz[i]; 490b2f1ab58SBarry Smith irow = in_matrix.icols[i]; 491b2f1ab58SBarry Smith 4922205254eSKarl Rupp if (col_idx_type == SPBAS_COLUMN_NUMBERS) icol0 = 0; 4932205254eSKarl Rupp else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i; 4942205254eSKarl Rupp else if (col_idx_type == SPBAS_OFFSET_ARRAY) icol0 = in_matrix.icol0[i]; 495b2f1ab58SBarry Smith 4969ccc4a9bSBarry Smith for (j = 0; j < r_nnz; j++) { 497b2f1ab58SBarry Smith k = icol0 + irow[j]; 498b2f1ab58SBarry Smith result->icols[k][result->row_nnz[k]] = i; 499b2f1ab58SBarry Smith result->row_nnz[k]++; 500b2f1ab58SBarry Smith } 501b2f1ab58SBarry Smith } 502b2f1ab58SBarry Smith } 503b2f1ab58SBarry Smith PetscFunctionReturn(0); 504b2f1ab58SBarry Smith } 505b2f1ab58SBarry Smith 506b2f1ab58SBarry Smith /* 507b2f1ab58SBarry Smith spbas_mergesort 508b2f1ab58SBarry Smith 509bb42e86aSJed Brown mergesort for an array of integers and an array of associated 510b2f1ab58SBarry Smith reals 511b2f1ab58SBarry Smith 512b2f1ab58SBarry Smith on output, icol[0..nnz-1] is increasing; 513b2f1ab58SBarry Smith val[0..nnz-1] has undergone the same permutation as icol 514b2f1ab58SBarry Smith 5150298fd71SBarry Smith NB: val may be NULL: in that case, only the integers are sorted 516b2f1ab58SBarry Smith 517b2f1ab58SBarry Smith */ 5189371c9d4SSatish Balay PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val) { 519c328eeadSBarry Smith PetscInt istep; /* Chunk-sizes of already sorted parts of arrays */ 520c328eeadSBarry Smith PetscInt i, i1, i2; /* Loop counters for (partly) sorted arrays */ 521c328eeadSBarry Smith PetscInt istart, i1end, i2end; /* start of newly sorted array part, end of both parts */ 522c328eeadSBarry Smith PetscInt *ialloc; /* Allocated arrays */ 5230298fd71SBarry Smith PetscScalar *valloc = NULL; 524c328eeadSBarry Smith PetscInt *iswap; /* auxiliary pointers for swapping */ 525b2f1ab58SBarry Smith PetscScalar *vswap; 526c328eeadSBarry Smith PetscInt *ihlp1; /* Pointers to new version of arrays, */ 5270298fd71SBarry Smith PetscScalar *vhlp1 = NULL; /* (arrays under construction) */ 528c328eeadSBarry Smith PetscInt *ihlp2; /* Pointers to previous version of arrays, */ 5290298fd71SBarry Smith PetscScalar *vhlp2 = NULL; 530b2f1ab58SBarry Smith 531362febeeSStefano Zampini PetscFunctionBegin; 5329566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz, &ialloc)); 533b2f1ab58SBarry Smith ihlp1 = ialloc; 534b2f1ab58SBarry Smith ihlp2 = icol; 535b2f1ab58SBarry Smith 5369ccc4a9bSBarry Smith if (val) { 5379566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz, &valloc)); 538b2f1ab58SBarry Smith vhlp1 = valloc; 539b2f1ab58SBarry Smith vhlp2 = val; 540b2f1ab58SBarry Smith } 541b2f1ab58SBarry Smith 542c328eeadSBarry Smith /* Sorted array chunks are first 1 long, and increase until they are the complete array */ 5439ccc4a9bSBarry Smith for (istep = 1; istep < nnz; istep *= 2) { 544c328eeadSBarry Smith /* 545c328eeadSBarry Smith Combine sorted parts 546c328eeadSBarry Smith istart:istart+istep-1 and istart+istep-1:istart+2*istep-1 547c328eeadSBarry Smith of ihlp2 and vhlp2 548c328eeadSBarry Smith 549c328eeadSBarry Smith into one sorted part 550c328eeadSBarry Smith istart:istart+2*istep-1 551c328eeadSBarry Smith of ihlp1 and vhlp1 552c328eeadSBarry Smith */ 5539ccc4a9bSBarry Smith for (istart = 0; istart < nnz; istart += 2 * istep) { 554c328eeadSBarry Smith /* Set counters and bound array part endings */ 5559371c9d4SSatish Balay i1 = istart; 5569371c9d4SSatish Balay i1end = i1 + istep; 5579371c9d4SSatish Balay if (i1end > nnz) i1end = nnz; 5589371c9d4SSatish Balay i2 = istart + istep; 5599371c9d4SSatish Balay i2end = i2 + istep; 5609371c9d4SSatish Balay if (i2end > nnz) i2end = nnz; 561b2f1ab58SBarry Smith 562c328eeadSBarry Smith /* Merge the two array parts */ 5639ccc4a9bSBarry Smith if (val) { 5649ccc4a9bSBarry Smith for (i = istart; i < i2end; i++) { 5659ccc4a9bSBarry Smith if (i1 < i1end && i2 < i2end && ihlp2[i1] < ihlp2[i2]) { 566b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i1]; 567b2f1ab58SBarry Smith vhlp1[i] = vhlp2[i1]; 568b2f1ab58SBarry Smith i1++; 5699ccc4a9bSBarry Smith } else if (i2 < i2end) { 570b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i2]; 571b2f1ab58SBarry Smith vhlp1[i] = vhlp2[i2]; 572b2f1ab58SBarry Smith i2++; 5739ccc4a9bSBarry Smith } else { 574b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i1]; 575b2f1ab58SBarry Smith vhlp1[i] = vhlp2[i1]; 576b2f1ab58SBarry Smith i1++; 577b2f1ab58SBarry Smith } 578b2f1ab58SBarry Smith } 5799ccc4a9bSBarry Smith } else { 5806322f4bdSBarry Smith for (i = istart; i < i2end; i++) { 5816322f4bdSBarry Smith if (i1 < i1end && i2 < i2end && ihlp2[i1] < ihlp2[i2]) { 582b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i1]; 583b2f1ab58SBarry Smith i1++; 5846322f4bdSBarry Smith } else if (i2 < i2end) { 585b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i2]; 586b2f1ab58SBarry Smith i2++; 5876322f4bdSBarry Smith } else { 588b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i1]; 589b2f1ab58SBarry Smith i1++; 590b2f1ab58SBarry Smith } 591b2f1ab58SBarry Smith } 592b2f1ab58SBarry Smith } 593b2f1ab58SBarry Smith } 594b2f1ab58SBarry Smith 595c328eeadSBarry Smith /* Swap the two array sets */ 5969371c9d4SSatish Balay iswap = ihlp2; 5979371c9d4SSatish Balay ihlp2 = ihlp1; 5989371c9d4SSatish Balay ihlp1 = iswap; 5999371c9d4SSatish Balay vswap = vhlp2; 6009371c9d4SSatish Balay vhlp2 = vhlp1; 6019371c9d4SSatish Balay vhlp1 = vswap; 602b2f1ab58SBarry Smith } 603b2f1ab58SBarry Smith 604c328eeadSBarry Smith /* Copy one more time in case the sorted arrays are the temporary ones */ 6056322f4bdSBarry Smith if (ihlp2 != icol) { 6062205254eSKarl Rupp for (i = 0; i < nnz; i++) icol[i] = ihlp2[i]; 6076322f4bdSBarry Smith if (val) { 6082205254eSKarl Rupp for (i = 0; i < nnz; i++) val[i] = vhlp2[i]; 609b2f1ab58SBarry Smith } 610b2f1ab58SBarry Smith } 611b2f1ab58SBarry Smith 6129566063dSJacob Faibussowitsch PetscCall(PetscFree(ialloc)); 6139566063dSJacob Faibussowitsch if (val) PetscCall(PetscFree(valloc)); 614b2f1ab58SBarry Smith PetscFunctionReturn(0); 615b2f1ab58SBarry Smith } 616b2f1ab58SBarry Smith 617b2f1ab58SBarry Smith /* 618b2f1ab58SBarry Smith spbas_apply_reordering_rows: 619b2f1ab58SBarry Smith apply the given reordering to the rows: matrix_A = matrix_A(perm,:); 620b2f1ab58SBarry Smith */ 6219371c9d4SSatish Balay PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation) { 622b2f1ab58SBarry Smith PetscInt i, j, ip; 623b2f1ab58SBarry Smith PetscInt nrows = matrix_A->nrows; 624b2f1ab58SBarry Smith PetscInt *row_nnz; 625b2f1ab58SBarry Smith PetscInt **icols; 626ace3abfcSBarry Smith PetscBool do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE; 6270298fd71SBarry Smith PetscScalar **vals = NULL; 628b2f1ab58SBarry Smith 629b2f1ab58SBarry Smith PetscFunctionBegin; 63008401ef6SPierre Jolivet PetscCheck(matrix_A->col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern"); 631b2f1ab58SBarry Smith 632*48a46eb9SPierre Jolivet if (do_values) PetscCall(PetscMalloc1(nrows, &vals)); 6339566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrows, &row_nnz)); 6349566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrows, &icols)); 635b2f1ab58SBarry Smith 6366322f4bdSBarry Smith for (i = 0; i < nrows; i++) { 637b2f1ab58SBarry Smith ip = permutation[i]; 6382205254eSKarl Rupp if (do_values) vals[i] = matrix_A->values[ip]; 639b2f1ab58SBarry Smith icols[i] = matrix_A->icols[ip]; 640b2f1ab58SBarry Smith row_nnz[i] = matrix_A->row_nnz[ip]; 6412205254eSKarl Rupp for (j = 0; j < row_nnz[i]; j++) icols[i][j] += ip - i; 642b2f1ab58SBarry Smith } 643b2f1ab58SBarry Smith 6449566063dSJacob Faibussowitsch if (do_values) PetscCall(PetscFree(matrix_A->values)); 6459566063dSJacob Faibussowitsch PetscCall(PetscFree(matrix_A->icols)); 6469566063dSJacob Faibussowitsch PetscCall(PetscFree(matrix_A->row_nnz)); 647b2f1ab58SBarry Smith 6482205254eSKarl Rupp if (do_values) matrix_A->values = vals; 649b2f1ab58SBarry Smith matrix_A->icols = icols; 650b2f1ab58SBarry Smith matrix_A->row_nnz = row_nnz; 651b2f1ab58SBarry Smith PetscFunctionReturn(0); 652b2f1ab58SBarry Smith } 653b2f1ab58SBarry Smith 654b2f1ab58SBarry Smith /* 655b2f1ab58SBarry Smith spbas_apply_reordering_cols: 656b2f1ab58SBarry Smith apply the given reordering to the columns: matrix_A(:,perm) = matrix_A; 657b2f1ab58SBarry Smith */ 6589371c9d4SSatish Balay PetscErrorCode spbas_apply_reordering_cols(spbas_matrix *matrix_A, const PetscInt *permutation) { 659b2f1ab58SBarry Smith PetscInt i, j; 660b2f1ab58SBarry Smith PetscInt nrows = matrix_A->nrows; 661b2f1ab58SBarry Smith PetscInt row_nnz; 662b2f1ab58SBarry Smith PetscInt *icols; 663ace3abfcSBarry Smith PetscBool do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE; 6640298fd71SBarry Smith PetscScalar *vals = NULL; 665b2f1ab58SBarry Smith 666b2f1ab58SBarry Smith PetscFunctionBegin; 66708401ef6SPierre Jolivet PetscCheck(matrix_A->col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern"); 668b2f1ab58SBarry Smith 6696322f4bdSBarry Smith for (i = 0; i < nrows; i++) { 670b2f1ab58SBarry Smith icols = matrix_A->icols[i]; 671b2f1ab58SBarry Smith row_nnz = matrix_A->row_nnz[i]; 6722205254eSKarl Rupp if (do_values) vals = matrix_A->values[i]; 673b2f1ab58SBarry Smith 6749371c9d4SSatish Balay for (j = 0; j < row_nnz; j++) { icols[j] = permutation[i + icols[j]] - i; } 6759566063dSJacob Faibussowitsch PetscCall(spbas_mergesort(row_nnz, icols, vals)); 676b2f1ab58SBarry Smith } 677b2f1ab58SBarry Smith PetscFunctionReturn(0); 678b2f1ab58SBarry Smith } 679b2f1ab58SBarry Smith 680b2f1ab58SBarry Smith /* 681b2f1ab58SBarry Smith spbas_apply_reordering: 682b2f1ab58SBarry Smith apply the given reordering: matrix_A(perm,perm) = matrix_A; 683b2f1ab58SBarry Smith */ 6849371c9d4SSatish Balay PetscErrorCode spbas_apply_reordering(spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt *inv_perm) { 685b2f1ab58SBarry Smith PetscFunctionBegin; 6869566063dSJacob Faibussowitsch PetscCall(spbas_apply_reordering_rows(matrix_A, inv_perm)); 6879566063dSJacob Faibussowitsch PetscCall(spbas_apply_reordering_cols(matrix_A, permutation)); 688b2f1ab58SBarry Smith PetscFunctionReturn(0); 689b2f1ab58SBarry Smith } 690b2f1ab58SBarry Smith 6919371c9d4SSatish Balay PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix *result) { 692b2f1ab58SBarry Smith spbas_matrix retval; 693b2f1ab58SBarry Smith PetscInt i, j, i0, r_nnz; 694b2f1ab58SBarry Smith 695b2f1ab58SBarry Smith PetscFunctionBegin; 696c328eeadSBarry Smith /* Copy input values */ 697b2f1ab58SBarry Smith retval.nrows = nrows; 698b2f1ab58SBarry Smith retval.ncols = ncols; 699b2f1ab58SBarry Smith retval.nnz = ai[nrows]; 700b2f1ab58SBarry Smith 701b2f1ab58SBarry Smith retval.block_data = PETSC_TRUE; 702b2f1ab58SBarry Smith retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS; 703b2f1ab58SBarry Smith 704c328eeadSBarry Smith /* Allocate output matrix */ 7059566063dSJacob Faibussowitsch PetscCall(spbas_allocate_pattern(&retval, PETSC_FALSE)); 7062205254eSKarl Rupp for (i = 0; i < nrows; i++) retval.row_nnz[i] = ai[i + 1] - ai[i]; 7079566063dSJacob Faibussowitsch PetscCall(spbas_allocate_data(&retval)); 708c328eeadSBarry Smith /* Copy the structure */ 7096322f4bdSBarry Smith for (i = 0; i < retval.nrows; i++) { 710b2f1ab58SBarry Smith i0 = ai[i]; 711b2f1ab58SBarry Smith r_nnz = ai[i + 1] - i0; 712b2f1ab58SBarry Smith 7139371c9d4SSatish Balay for (j = 0; j < r_nnz; j++) { retval.icols[i][j] = aj[i0 + j] - i; } 714b2f1ab58SBarry Smith } 715b2f1ab58SBarry Smith *result = retval; 716b2f1ab58SBarry Smith PetscFunctionReturn(0); 717b2f1ab58SBarry Smith } 718b2f1ab58SBarry Smith 719b2f1ab58SBarry Smith /* 720b2f1ab58SBarry Smith spbas_mark_row_power: 721b2f1ab58SBarry Smith Mark the columns in row 'row' which are nonzero in 722b2f1ab58SBarry Smith matrix^2log(marker). 723b2f1ab58SBarry Smith */ 724be332245SKarl Rupp PetscErrorCode spbas_mark_row_power(PetscInt *iwork, /* marker-vector */ 725c328eeadSBarry Smith PetscInt row, /* row for which the columns are marked */ 726c328eeadSBarry Smith spbas_matrix *in_matrix, /* matrix for which the power is being calculated */ 727c328eeadSBarry Smith PetscInt marker, /* marker-value: 2^power */ 728c328eeadSBarry Smith PetscInt minmrk, /* lower bound for marked points */ 729c328eeadSBarry Smith PetscInt maxmrk) /* upper bound for marked points */ 730b2f1ab58SBarry Smith { 731b2f1ab58SBarry Smith PetscInt i, j, nnz; 732b2f1ab58SBarry Smith 733b2f1ab58SBarry Smith PetscFunctionBegin; 734b2f1ab58SBarry Smith nnz = in_matrix->row_nnz[row]; 735b2f1ab58SBarry Smith 736c328eeadSBarry Smith /* For higher powers, call this function recursively */ 7376322f4bdSBarry Smith if (marker > 1) { 7386322f4bdSBarry Smith for (i = 0; i < nnz; i++) { 739b2f1ab58SBarry Smith j = row + in_matrix->icols[row][i]; 7406322f4bdSBarry Smith if (minmrk <= j && j < maxmrk && iwork[j] < marker) { 7419566063dSJacob Faibussowitsch PetscCall(spbas_mark_row_power(iwork, row + in_matrix->icols[row][i], in_matrix, marker / 2, minmrk, maxmrk)); 742b2f1ab58SBarry Smith iwork[j] |= marker; 743b2f1ab58SBarry Smith } 744b2f1ab58SBarry Smith } 7456322f4bdSBarry Smith } else { 746c328eeadSBarry Smith /* Mark the columns reached */ 7476322f4bdSBarry Smith for (i = 0; i < nnz; i++) { 748b2f1ab58SBarry Smith j = row + in_matrix->icols[row][i]; 7492205254eSKarl Rupp if (minmrk <= j && j < maxmrk) iwork[j] |= 1; 750b2f1ab58SBarry Smith } 751b2f1ab58SBarry Smith } 752b2f1ab58SBarry Smith PetscFunctionReturn(0); 753b2f1ab58SBarry Smith } 754b2f1ab58SBarry Smith 755b2f1ab58SBarry Smith /* 756b2f1ab58SBarry Smith spbas_power 757b2f1ab58SBarry Smith Calculate sparseness patterns for incomplete Cholesky decompositions 758b2f1ab58SBarry Smith of a given order: (almost) all nonzeros of the matrix^(order+1) which 759b2f1ab58SBarry Smith are inside the band width are found and stored in the output sparseness 760b2f1ab58SBarry Smith pattern. 761b2f1ab58SBarry Smith */ 7629371c9d4SSatish Balay PetscErrorCode spbas_power(spbas_matrix in_matrix, PetscInt power, spbas_matrix *result) { 763b2f1ab58SBarry Smith spbas_matrix retval; 764b2f1ab58SBarry Smith PetscInt nrows = in_matrix.nrows; 765b2f1ab58SBarry Smith PetscInt ncols = in_matrix.ncols; 766b2f1ab58SBarry Smith PetscInt i, j, kend; 767b2f1ab58SBarry Smith PetscInt nnz, inz; 768b2f1ab58SBarry Smith PetscInt *iwork; 769b2f1ab58SBarry Smith PetscInt marker; 770b2f1ab58SBarry Smith PetscInt maxmrk = 0; 771b2f1ab58SBarry Smith 772b2f1ab58SBarry Smith PetscFunctionBegin; 77308401ef6SPierre Jolivet PetscCheck(in_matrix.col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern"); 77408401ef6SPierre Jolivet PetscCheck(ncols == nrows, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Dimension error"); 775aed4548fSBarry Smith PetscCheck(!in_matrix.values, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Input array must be sparseness pattern (no values)"); 77608401ef6SPierre Jolivet PetscCheck(power > 0, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Power must be 1 or up"); 777b2f1ab58SBarry Smith 778c328eeadSBarry Smith /* Copy input values*/ 779b2f1ab58SBarry Smith retval.nrows = ncols; 780b2f1ab58SBarry Smith retval.ncols = nrows; 781b2f1ab58SBarry Smith retval.nnz = 0; 782b2f1ab58SBarry Smith retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS; 783b2f1ab58SBarry Smith retval.block_data = PETSC_FALSE; 784b2f1ab58SBarry Smith 785c328eeadSBarry Smith /* Allocate sparseness pattern */ 7869566063dSJacob Faibussowitsch PetscCall(spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE)); 787b2f1ab58SBarry Smith 788580bdb30SBarry Smith /* Allocate marker array: note sure the max needed so use the max of the two */ 7899566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(PetscMax(ncols, nrows), &iwork)); 790b2f1ab58SBarry Smith 791c328eeadSBarry Smith /* Calculate marker values */ 7929371c9d4SSatish Balay marker = 1; 7939371c9d4SSatish Balay for (i = 1; i < power; i++) marker *= 2; 794b2f1ab58SBarry Smith 7956322f4bdSBarry Smith for (i = 0; i < nrows; i++) { 796c328eeadSBarry Smith /* Calculate the pattern for each row */ 797b2f1ab58SBarry Smith 798b2f1ab58SBarry Smith nnz = in_matrix.row_nnz[i]; 799b2f1ab58SBarry Smith kend = i + in_matrix.icols[i][nnz - 1]; 8002205254eSKarl Rupp if (maxmrk <= kend) maxmrk = kend + 1; 8019566063dSJacob Faibussowitsch PetscCall(spbas_mark_row_power(iwork, i, &in_matrix, marker, i, maxmrk)); 802b2f1ab58SBarry Smith 803c328eeadSBarry Smith /* Count the columns*/ 804b2f1ab58SBarry Smith nnz = 0; 8052205254eSKarl Rupp for (j = i; j < maxmrk; j++) nnz += (iwork[j] != 0); 806b2f1ab58SBarry Smith 807c328eeadSBarry Smith /* Allocate the column indices */ 808b2f1ab58SBarry Smith retval.row_nnz[i] = nnz; 8099566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz, &retval.icols[i])); 810b2f1ab58SBarry Smith 811c328eeadSBarry Smith /* Administrate the column indices */ 812b2f1ab58SBarry Smith inz = 0; 8136322f4bdSBarry Smith for (j = i; j < maxmrk; j++) { 8146322f4bdSBarry Smith if (iwork[j]) { 815b2f1ab58SBarry Smith retval.icols[i][inz] = j - i; 816b2f1ab58SBarry Smith inz++; 817b2f1ab58SBarry Smith iwork[j] = 0; 818b2f1ab58SBarry Smith } 819b2f1ab58SBarry Smith } 820b2f1ab58SBarry Smith retval.nnz += nnz; 821b2f1ab58SBarry Smith }; 8229566063dSJacob Faibussowitsch PetscCall(PetscFree(iwork)); 823b2f1ab58SBarry Smith *result = retval; 824b2f1ab58SBarry Smith PetscFunctionReturn(0); 825b2f1ab58SBarry Smith } 826b2f1ab58SBarry Smith 827b2f1ab58SBarry Smith /* 828b2f1ab58SBarry Smith spbas_keep_upper: 829b2f1ab58SBarry Smith remove the lower part of the matrix: keep the upper part 830b2f1ab58SBarry Smith */ 8319371c9d4SSatish Balay PetscErrorCode spbas_keep_upper(spbas_matrix *inout_matrix) { 832b2f1ab58SBarry Smith PetscInt i, j; 833b2f1ab58SBarry Smith PetscInt jstart; 834b2f1ab58SBarry Smith 835b2f1ab58SBarry Smith PetscFunctionBegin; 8365f80ce2aSJacob Faibussowitsch PetscCheck(!inout_matrix->block_data, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Not yet for block data matrices"); 8376322f4bdSBarry Smith for (i = 0; i < inout_matrix->nrows; i++) { 8386322f4bdSBarry Smith for (jstart = 0; (jstart < inout_matrix->row_nnz[i]) && (inout_matrix->icols[i][jstart] < 0); jstart++) { } 8396322f4bdSBarry Smith if (jstart > 0) { 8409371c9d4SSatish Balay for (j = 0; j < inout_matrix->row_nnz[i] - jstart; j++) { inout_matrix->icols[i][j] = inout_matrix->icols[i][j + jstart]; } 841b2f1ab58SBarry Smith 8426322f4bdSBarry Smith if (inout_matrix->values) { 8439371c9d4SSatish Balay for (j = 0; j < inout_matrix->row_nnz[i] - jstart; j++) { inout_matrix->values[i][j] = inout_matrix->values[i][j + jstart]; } 844b2f1ab58SBarry Smith } 845b2f1ab58SBarry Smith 846b2f1ab58SBarry Smith inout_matrix->row_nnz[i] -= jstart; 847b2f1ab58SBarry Smith 8486322f4bdSBarry Smith inout_matrix->icols[i] = (PetscInt *)realloc((void *)inout_matrix->icols[i], inout_matrix->row_nnz[i] * sizeof(PetscInt)); 849b2f1ab58SBarry Smith 8509371c9d4SSatish Balay if (inout_matrix->values) { inout_matrix->values[i] = (PetscScalar *)realloc((void *)inout_matrix->values[i], inout_matrix->row_nnz[i] * sizeof(PetscScalar)); } 851b2f1ab58SBarry Smith inout_matrix->nnz -= jstart; 852b2f1ab58SBarry Smith } 853b2f1ab58SBarry Smith } 854b2f1ab58SBarry Smith PetscFunctionReturn(0); 855b2f1ab58SBarry Smith } 856