1c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> 2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/bas/spbas.h> 3b2f1ab58SBarry Smith 4845006b9SBarry Smith /*MC 52692d6eeSBarry Smith MATSOLVERBAS - Provides ICC(k) with drop tolerance 6845006b9SBarry Smith 7*11a5261eSBarry Smith Works with `MATAIJ` matrices 8845006b9SBarry Smith 9845006b9SBarry Smith Options Database Keys: 100053dfc8SBarry Smith + -pc_factor_levels <l> - number of levels of fill 110053dfc8SBarry Smith - -pc_factor_drop_tolerance - is not currently hooked up to do anything 12845006b9SBarry Smith 130053dfc8SBarry Smith Level: intermediate 14845006b9SBarry Smith 15845006b9SBarry Smith Contributed by: Bas van 't Hof 16845006b9SBarry Smith 17*11a5261eSBarry Smith Note: 1895452b02SPatrick Sanan Since this currently hooked up to use drop tolerance it should produce the same factors and hence convergence as the PETSc ICC, for higher 190053dfc8SBarry Smith levels of fill it does not. This needs to be investigated. Unless you are interested in drop tolerance ICC and willing to work through the code 200053dfc8SBarry Smith we recommend not using this functionality. 210053dfc8SBarry Smith 22db781477SPatrick Sanan .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `PCFactorSetLevels()`, `PCFactorSetDropTolerance()` 23845006b9SBarry Smith M*/ 249ccc4a9bSBarry Smith 25b2f1ab58SBarry Smith /* 26b2f1ab58SBarry Smith spbas_memory_requirement: 27b2f1ab58SBarry Smith Calculate the number of bytes needed to store tha matrix 28b2f1ab58SBarry Smith */ 299371c9d4SSatish Balay size_t spbas_memory_requirement(spbas_matrix matrix) { 303dfa2556SBarry Smith size_t memreq = 6 * sizeof(PetscInt) + /* nrows, ncols, nnz, n_alloc_icol, n_alloc_val, col_idx_type */ 31ace3abfcSBarry Smith sizeof(PetscBool) + /* block_data */ 32c328eeadSBarry Smith sizeof(PetscScalar **) + /* values */ 33c328eeadSBarry Smith sizeof(PetscScalar *) + /* alloc_val */ 343dfa2556SBarry Smith 2 * sizeof(PetscInt **) + /* icols, icols0 */ 35c328eeadSBarry Smith 2 * sizeof(PetscInt *) + /* row_nnz, alloc_icol */ 36c328eeadSBarry Smith matrix.nrows * sizeof(PetscInt) + /* row_nnz[*] */ 37c328eeadSBarry Smith matrix.nrows * sizeof(PetscInt *); /* icols[*] */ 38b2f1ab58SBarry Smith 39c328eeadSBarry Smith /* icol0[*] */ 402205254eSKarl Rupp if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) memreq += matrix.nrows * sizeof(PetscInt); 41b2f1ab58SBarry Smith 42c328eeadSBarry Smith /* icols[*][*] */ 432205254eSKarl Rupp if (matrix.block_data) memreq += matrix.n_alloc_icol * sizeof(PetscInt); 442205254eSKarl Rupp else memreq += matrix.nnz * sizeof(PetscInt); 45b2f1ab58SBarry Smith 464e1ff37aSBarry Smith if (matrix.values) { 47c328eeadSBarry Smith memreq += matrix.nrows * sizeof(PetscScalar *); /* values[*] */ 48c328eeadSBarry Smith /* values[*][*] */ 492205254eSKarl Rupp if (matrix.block_data) memreq += matrix.n_alloc_val * sizeof(PetscScalar); 502205254eSKarl Rupp else memreq += matrix.nnz * sizeof(PetscScalar); 51b2f1ab58SBarry Smith } 52b2f1ab58SBarry Smith return memreq; 53b2f1ab58SBarry Smith } 54b2f1ab58SBarry Smith 55b2f1ab58SBarry Smith /* 56b2f1ab58SBarry Smith spbas_allocate_pattern: 57b2f1ab58SBarry Smith allocate the pattern arrays row_nnz, icols and optionally values 58b2f1ab58SBarry Smith */ 599371c9d4SSatish Balay PetscErrorCode spbas_allocate_pattern(spbas_matrix *result, PetscBool do_values) { 60b2f1ab58SBarry Smith PetscInt nrows = result->nrows; 61b2f1ab58SBarry Smith PetscInt col_idx_type = result->col_idx_type; 62b2f1ab58SBarry Smith 63b2f1ab58SBarry Smith PetscFunctionBegin; 64c328eeadSBarry Smith /* Allocate sparseness pattern */ 659566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrows, &result->row_nnz)); 669566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrows, &result->icols)); 67b2f1ab58SBarry Smith 68c328eeadSBarry Smith /* If offsets are given wrt an array, create array */ 694e1ff37aSBarry Smith if (col_idx_type == SPBAS_OFFSET_ARRAY) { 709566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrows, &result->icol0)); 714e1ff37aSBarry Smith } else { 720298fd71SBarry Smith result->icol0 = NULL; 73b2f1ab58SBarry Smith } 74b2f1ab58SBarry Smith 75c328eeadSBarry Smith /* If values are given, allocate values array */ 764e1ff37aSBarry Smith if (do_values) { 779566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrows, &result->values)); 784e1ff37aSBarry Smith } else { 790298fd71SBarry Smith result->values = NULL; 80b2f1ab58SBarry Smith } 81b2f1ab58SBarry Smith PetscFunctionReturn(0); 82b2f1ab58SBarry Smith } 83b2f1ab58SBarry Smith 84b2f1ab58SBarry Smith /* 85b2f1ab58SBarry Smith spbas_allocate_data: 86b2f1ab58SBarry Smith in case of block_data: 87b2f1ab58SBarry Smith Allocate the data arrays alloc_icol and optionally alloc_val, 88b2f1ab58SBarry Smith set appropriate pointers from icols and values; 89b2f1ab58SBarry Smith in case of !block_data: 90b2f1ab58SBarry Smith Allocate the arrays icols[i] and optionally values[i] 91b2f1ab58SBarry Smith */ 929371c9d4SSatish Balay PetscErrorCode spbas_allocate_data(spbas_matrix *result) { 93b2f1ab58SBarry Smith PetscInt i; 94b2f1ab58SBarry Smith PetscInt nnz = result->nnz; 95b2f1ab58SBarry Smith PetscInt nrows = result->nrows; 96b2f1ab58SBarry Smith PetscInt r_nnz; 976c4ed002SBarry Smith PetscBool do_values = (result->values) ? PETSC_TRUE : PETSC_FALSE; 98ace3abfcSBarry Smith PetscBool block_data = result->block_data; 99b2f1ab58SBarry Smith 100b2f1ab58SBarry Smith PetscFunctionBegin; 1014e1ff37aSBarry Smith if (block_data) { 102c328eeadSBarry Smith /* Allocate the column number array and point to it */ 103b2f1ab58SBarry Smith result->n_alloc_icol = nnz; 1042205254eSKarl Rupp 1059566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz, &result->alloc_icol)); 1062205254eSKarl Rupp 107b2f1ab58SBarry Smith result->icols[0] = result->alloc_icol; 108ad540459SPierre Jolivet for (i = 1; i < nrows; i++) result->icols[i] = result->icols[i - 1] + result->row_nnz[i - 1]; 109b2f1ab58SBarry Smith 110c328eeadSBarry Smith /* Allocate the value array and point to it */ 1114e1ff37aSBarry Smith if (do_values) { 112b2f1ab58SBarry Smith result->n_alloc_val = nnz; 1132205254eSKarl Rupp 1149566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz, &result->alloc_val)); 1152205254eSKarl Rupp 116b2f1ab58SBarry Smith result->values[0] = result->alloc_val; 117ad540459SPierre Jolivet for (i = 1; i < nrows; i++) result->values[i] = result->values[i - 1] + result->row_nnz[i - 1]; 118b2f1ab58SBarry Smith } 1194e1ff37aSBarry Smith } else { 1204e1ff37aSBarry Smith for (i = 0; i < nrows; i++) { 121b2f1ab58SBarry Smith r_nnz = result->row_nnz[i]; 1229566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(r_nnz, &result->icols[i])); 123b2f1ab58SBarry Smith } 1244e1ff37aSBarry Smith if (do_values) { 1254e1ff37aSBarry Smith for (i = 0; i < nrows; i++) { 126b2f1ab58SBarry Smith r_nnz = result->row_nnz[i]; 1279566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(r_nnz, &result->values[i])); 128b2f1ab58SBarry Smith } 129b2f1ab58SBarry Smith } 130b2f1ab58SBarry Smith } 131b2f1ab58SBarry Smith PetscFunctionReturn(0); 132b2f1ab58SBarry Smith } 133b2f1ab58SBarry Smith 134b2f1ab58SBarry Smith /* 135b2f1ab58SBarry Smith spbas_row_order_icol 136b2f1ab58SBarry Smith determine if row i1 should come 137b2f1ab58SBarry Smith + before row i2 in the sorted rows (return -1), 138b2f1ab58SBarry Smith + after (return 1) 139b2f1ab58SBarry Smith + is identical (return 0). 140b2f1ab58SBarry Smith */ 1419371c9d4SSatish Balay int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in, PetscInt col_idx_type) { 142b2f1ab58SBarry Smith PetscInt j; 143b2f1ab58SBarry Smith PetscInt nnz1 = irow_in[i1 + 1] - irow_in[i1]; 144b2f1ab58SBarry Smith PetscInt nnz2 = irow_in[i2 + 1] - irow_in[i2]; 145b2f1ab58SBarry Smith PetscInt *icol1 = &icol_in[irow_in[i1]]; 146b2f1ab58SBarry Smith PetscInt *icol2 = &icol_in[irow_in[i2]]; 147b2f1ab58SBarry Smith 1482205254eSKarl Rupp if (nnz1 < nnz2) return -1; 1492205254eSKarl Rupp if (nnz1 > nnz2) return 1; 150b2f1ab58SBarry Smith 1514e1ff37aSBarry Smith if (col_idx_type == SPBAS_COLUMN_NUMBERS) { 1524e1ff37aSBarry Smith for (j = 0; j < nnz1; j++) { 1532205254eSKarl Rupp if (icol1[j] < icol2[j]) return -1; 1542205254eSKarl Rupp if (icol1[j] > icol2[j]) return 1; 155b2f1ab58SBarry Smith } 1564e1ff37aSBarry Smith } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) { 1574e1ff37aSBarry Smith for (j = 0; j < nnz1; j++) { 1582205254eSKarl Rupp if (icol1[j] - i1 < icol2[j] - i2) return -1; 1592205254eSKarl Rupp if (icol1[j] - i1 > icol2[j] - i2) return 1; 160b2f1ab58SBarry Smith } 1614e1ff37aSBarry Smith } else if (col_idx_type == SPBAS_OFFSET_ARRAY) { 1624e1ff37aSBarry Smith for (j = 1; j < nnz1; j++) { 1632205254eSKarl Rupp if (icol1[j] - icol1[0] < icol2[j] - icol2[0]) return -1; 1642205254eSKarl Rupp if (icol1[j] - icol1[0] > icol2[j] - icol2[0]) return 1; 165b2f1ab58SBarry Smith } 166b2f1ab58SBarry Smith } 167b2f1ab58SBarry Smith return 0; 168b2f1ab58SBarry Smith } 169b2f1ab58SBarry Smith 170b2f1ab58SBarry Smith /* 171b2f1ab58SBarry Smith spbas_mergesort_icols: 172b2f1ab58SBarry Smith return a sorting of the rows in which identical sparseness patterns are 173b2f1ab58SBarry Smith next to each other 174b2f1ab58SBarry Smith */ 1759371c9d4SSatish Balay PetscErrorCode spbas_mergesort_icols(PetscInt nrows, PetscInt *irow_in, PetscInt *icol_in, PetscInt col_idx_type, PetscInt *isort) { 176c328eeadSBarry Smith PetscInt istep; /* Chunk-sizes of already sorted parts of arrays */ 177c328eeadSBarry Smith PetscInt i, i1, i2; /* Loop counters for (partly) sorted arrays */ 178c328eeadSBarry Smith PetscInt istart, i1end, i2end; /* start of newly sorted array part, end of both parts */ 179c328eeadSBarry Smith PetscInt *ialloc; /* Allocated arrays */ 180c328eeadSBarry Smith PetscInt *iswap; /* auxiliary pointers for swapping */ 181c328eeadSBarry Smith PetscInt *ihlp1; /* Pointers to new version of arrays, */ 182c328eeadSBarry Smith PetscInt *ihlp2; /* Pointers to previous version of arrays, */ 183b2f1ab58SBarry Smith 184b2f1ab58SBarry Smith PetscFunctionBegin; 1859566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrows, &ialloc)); 186b2f1ab58SBarry Smith 187b2f1ab58SBarry Smith ihlp1 = ialloc; 188b2f1ab58SBarry Smith ihlp2 = isort; 189b2f1ab58SBarry Smith 190c328eeadSBarry Smith /* Sorted array chunks are first 1 long, and increase until they are the complete array */ 1914e1ff37aSBarry Smith for (istep = 1; istep < nrows; istep *= 2) { 192c328eeadSBarry Smith /* 193c328eeadSBarry Smith Combine sorted parts 194c328eeadSBarry Smith istart:istart+istep-1 and istart+istep-1:istart+2*istep-1 195c328eeadSBarry Smith of ihlp2 and vhlp2 196c328eeadSBarry Smith 197c328eeadSBarry Smith into one sorted part 198c328eeadSBarry Smith istart:istart+2*istep-1 199c328eeadSBarry Smith of ihlp1 and vhlp1 200c328eeadSBarry Smith */ 2014e1ff37aSBarry Smith for (istart = 0; istart < nrows; istart += 2 * istep) { 202c328eeadSBarry Smith /* Set counters and bound array part endings */ 2039371c9d4SSatish Balay i1 = istart; 2049371c9d4SSatish Balay i1end = i1 + istep; 2059371c9d4SSatish Balay if (i1end > nrows) i1end = nrows; 2069371c9d4SSatish Balay i2 = istart + istep; 2079371c9d4SSatish Balay i2end = i2 + istep; 2089371c9d4SSatish Balay if (i2end > nrows) i2end = nrows; 209b2f1ab58SBarry Smith 210c328eeadSBarry Smith /* Merge the two array parts */ 2114e1ff37aSBarry Smith for (i = istart; i < i2end; i++) { 2124e1ff37aSBarry Smith if (i1 < i1end && i2 < i2end && spbas_row_order_icol(ihlp2[i1], ihlp2[i2], irow_in, icol_in, col_idx_type) < 0) { 213b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i1]; 214b2f1ab58SBarry Smith i1++; 2154e1ff37aSBarry Smith } else if (i2 < i2end) { 216b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i2]; 217b2f1ab58SBarry Smith i2++; 2184e1ff37aSBarry Smith } else { 219b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i1]; 220b2f1ab58SBarry Smith i1++; 221b2f1ab58SBarry Smith } 222b2f1ab58SBarry Smith } 223b2f1ab58SBarry Smith } 224b2f1ab58SBarry Smith 225c328eeadSBarry Smith /* Swap the two array sets */ 2269371c9d4SSatish Balay iswap = ihlp2; 2279371c9d4SSatish Balay ihlp2 = ihlp1; 2289371c9d4SSatish Balay ihlp1 = iswap; 229b2f1ab58SBarry Smith } 230b2f1ab58SBarry Smith 231c328eeadSBarry Smith /* Copy one more time in case the sorted arrays are the temporary ones */ 2324e1ff37aSBarry Smith if (ihlp2 != isort) { 2332205254eSKarl Rupp for (i = 0; i < nrows; i++) isort[i] = ihlp2[i]; 234b2f1ab58SBarry Smith } 2359566063dSJacob Faibussowitsch PetscCall(PetscFree(ialloc)); 236b2f1ab58SBarry Smith PetscFunctionReturn(0); 237b2f1ab58SBarry Smith } 238b2f1ab58SBarry Smith 239b2f1ab58SBarry Smith /* 240b2f1ab58SBarry Smith spbas_compress_pattern: 241b2f1ab58SBarry Smith calculate a compressed sparseness pattern for a sparseness pattern 242b2f1ab58SBarry Smith given in compressed row storage. The compressed sparseness pattern may 243b2f1ab58SBarry Smith require (much) less memory. 244b2f1ab58SBarry Smith */ 2459371c9d4SSatish Balay PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, PetscInt col_idx_type, spbas_matrix *B, PetscReal *mem_reduction) { 246b2f1ab58SBarry Smith PetscInt nnz = irow_in[nrows]; 2473dfa2556SBarry Smith size_t mem_orig = (nrows + nnz) * sizeof(PetscInt); 2483dfa2556SBarry Smith size_t mem_compressed; 249b2f1ab58SBarry Smith PetscInt *isort; 250b2f1ab58SBarry Smith PetscInt *icols; 251b2f1ab58SBarry Smith PetscInt row_nnz; 252b2f1ab58SBarry Smith PetscInt *ipoint; 253ace3abfcSBarry Smith PetscBool *used; 254b2f1ab58SBarry Smith PetscInt ptr; 255b2f1ab58SBarry Smith PetscInt i, j; 256ace3abfcSBarry Smith const PetscBool no_values = PETSC_FALSE; 257b2f1ab58SBarry Smith 258b2f1ab58SBarry Smith PetscFunctionBegin; 259c328eeadSBarry Smith /* Allocate the structure of the new matrix */ 260b2f1ab58SBarry Smith B->nrows = nrows; 261b2f1ab58SBarry Smith B->ncols = ncols; 262b2f1ab58SBarry Smith B->nnz = nnz; 263b2f1ab58SBarry Smith B->col_idx_type = col_idx_type; 264b2f1ab58SBarry Smith B->block_data = PETSC_TRUE; 2652205254eSKarl Rupp 2669566063dSJacob Faibussowitsch PetscCall(spbas_allocate_pattern(B, no_values)); 267b2f1ab58SBarry Smith 268c328eeadSBarry Smith /* When using an offset array, set it */ 2694e1ff37aSBarry Smith if (col_idx_type == SPBAS_OFFSET_ARRAY) { 2702205254eSKarl Rupp for (i = 0; i < nrows; i++) B->icol0[i] = icol_in[irow_in[i]]; 271b2f1ab58SBarry Smith } 272b2f1ab58SBarry Smith 273c328eeadSBarry Smith /* Allocate the ordering for the rows */ 2749566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrows, &isort)); 2759566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrows, &ipoint)); 2769566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(nrows, &used)); 277b2f1ab58SBarry Smith 2784e1ff37aSBarry Smith for (i = 0; i < nrows; i++) { 279b2f1ab58SBarry Smith B->row_nnz[i] = irow_in[i + 1] - irow_in[i]; 280b2f1ab58SBarry Smith isort[i] = i; 281b2f1ab58SBarry Smith ipoint[i] = i; 282b2f1ab58SBarry Smith } 283b2f1ab58SBarry Smith 284c328eeadSBarry Smith /* Sort the rows so that identical columns will be next to each other */ 2859566063dSJacob Faibussowitsch PetscCall(spbas_mergesort_icols(nrows, irow_in, icol_in, col_idx_type, isort)); 2869566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, "Rows have been sorted for patterns\n")); 287b2f1ab58SBarry Smith 288c328eeadSBarry Smith /* Replace identical rows with the first one in the list */ 2894e1ff37aSBarry Smith for (i = 1; i < nrows; i++) { 290ad540459SPierre Jolivet if (spbas_row_order_icol(isort[i - 1], isort[i], irow_in, icol_in, col_idx_type) == 0) ipoint[isort[i]] = ipoint[isort[i - 1]]; 291b2f1ab58SBarry Smith } 292b2f1ab58SBarry Smith 293c328eeadSBarry Smith /* Collect the rows which are used*/ 2942205254eSKarl Rupp for (i = 0; i < nrows; i++) used[ipoint[i]] = PETSC_TRUE; 295b2f1ab58SBarry Smith 296c328eeadSBarry Smith /* Calculate needed memory */ 297b2f1ab58SBarry Smith B->n_alloc_icol = 0; 2984e1ff37aSBarry Smith for (i = 0; i < nrows; i++) { 2992205254eSKarl Rupp if (used[i]) B->n_alloc_icol += B->row_nnz[i]; 300b2f1ab58SBarry Smith } 3019566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(B->n_alloc_icol, &B->alloc_icol)); 302b2f1ab58SBarry Smith 303c328eeadSBarry Smith /* Fill in the diagonal offsets for the rows which store their own data */ 304b2f1ab58SBarry Smith ptr = 0; 3054e1ff37aSBarry Smith for (i = 0; i < B->nrows; i++) { 3064e1ff37aSBarry Smith if (used[i]) { 307b2f1ab58SBarry Smith B->icols[i] = &B->alloc_icol[ptr]; 308b2f1ab58SBarry Smith icols = &icol_in[irow_in[i]]; 309b2f1ab58SBarry Smith row_nnz = B->row_nnz[i]; 3104e1ff37aSBarry Smith if (col_idx_type == SPBAS_COLUMN_NUMBERS) { 311ad540459SPierre Jolivet for (j = 0; j < row_nnz; j++) B->icols[i][j] = icols[j]; 3124e1ff37aSBarry Smith } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) { 313ad540459SPierre Jolivet for (j = 0; j < row_nnz; j++) B->icols[i][j] = icols[j] - i; 3144e1ff37aSBarry Smith } else if (col_idx_type == SPBAS_OFFSET_ARRAY) { 315ad540459SPierre Jolivet for (j = 0; j < row_nnz; j++) B->icols[i][j] = icols[j] - icols[0]; 316b2f1ab58SBarry Smith } 317b2f1ab58SBarry Smith ptr += B->row_nnz[i]; 318b2f1ab58SBarry Smith } 319b2f1ab58SBarry Smith } 320b2f1ab58SBarry Smith 321c328eeadSBarry Smith /* Point to the right places for all data */ 322ad540459SPierre Jolivet for (i = 0; i < nrows; i++) B->icols[i] = B->icols[ipoint[i]]; 3239566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, "Row patterns have been compressed\n")); 3249566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, " (%g nonzeros per row)\n", (double)((PetscReal)nnz / (PetscReal)nrows))); 325b2f1ab58SBarry Smith 3269566063dSJacob Faibussowitsch PetscCall(PetscFree(isort)); 3279566063dSJacob Faibussowitsch PetscCall(PetscFree(used)); 3289566063dSJacob Faibussowitsch PetscCall(PetscFree(ipoint)); 329b2f1ab58SBarry Smith 330b2f1ab58SBarry Smith mem_compressed = spbas_memory_requirement(*B); 3314e1ff37aSBarry Smith *mem_reduction = 100.0 * (PetscReal)(mem_orig - mem_compressed) / (PetscReal)mem_orig; 332b2f1ab58SBarry Smith PetscFunctionReturn(0); 333b2f1ab58SBarry Smith } 334b2f1ab58SBarry Smith 335b2f1ab58SBarry Smith /* 336b2f1ab58SBarry Smith spbas_incomplete_cholesky 337b2f1ab58SBarry Smith Incomplete Cholesky decomposition 338b2f1ab58SBarry Smith */ 339c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/bas/spbas_cholesky.h> 340b2f1ab58SBarry Smith 341b2f1ab58SBarry Smith /* 342b2f1ab58SBarry Smith spbas_delete : de-allocate the arrays owned by this matrix 343b2f1ab58SBarry Smith */ 3449371c9d4SSatish Balay PetscErrorCode spbas_delete(spbas_matrix matrix) { 345b2f1ab58SBarry Smith PetscInt i; 3469ccc4a9bSBarry Smith 347b2f1ab58SBarry Smith PetscFunctionBegin; 3489ccc4a9bSBarry Smith if (matrix.block_data) { 3499566063dSJacob Faibussowitsch PetscCall(PetscFree(matrix.alloc_icol)); 3509566063dSJacob Faibussowitsch if (matrix.values) PetscCall(PetscFree(matrix.alloc_val)); 3519ccc4a9bSBarry Smith } else { 3529566063dSJacob Faibussowitsch for (i = 0; i < matrix.nrows; i++) PetscCall(PetscFree(matrix.icols[i])); 3539566063dSJacob Faibussowitsch PetscCall(PetscFree(matrix.icols)); 3549ccc4a9bSBarry Smith if (matrix.values) { 3559566063dSJacob Faibussowitsch for (i = 0; i < matrix.nrows; i++) PetscCall(PetscFree(matrix.values[i])); 356b2f1ab58SBarry Smith } 357b2f1ab58SBarry Smith } 358b2f1ab58SBarry Smith 3599566063dSJacob Faibussowitsch PetscCall(PetscFree(matrix.row_nnz)); 3609566063dSJacob Faibussowitsch PetscCall(PetscFree(matrix.icols)); 3619566063dSJacob Faibussowitsch if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) PetscCall(PetscFree(matrix.icol0)); 3629566063dSJacob Faibussowitsch PetscCall(PetscFree(matrix.values)); 363b2f1ab58SBarry Smith PetscFunctionReturn(0); 364b2f1ab58SBarry Smith } 365b2f1ab58SBarry Smith 366b2f1ab58SBarry Smith /* 367b2f1ab58SBarry Smith spbas_matrix_to_crs: 368b2f1ab58SBarry Smith Convert an spbas_matrix to compessed row storage 369b2f1ab58SBarry Smith */ 3709371c9d4SSatish Balay PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A, MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out) { 371b2f1ab58SBarry Smith PetscInt nrows = matrix_A.nrows; 372b2f1ab58SBarry Smith PetscInt nnz = matrix_A.nnz; 373b2f1ab58SBarry Smith PetscInt i, j, r_nnz, i0; 374b2f1ab58SBarry Smith PetscInt *irow; 375b2f1ab58SBarry Smith PetscInt *icol; 376b2f1ab58SBarry Smith PetscInt *icol_A; 377b2f1ab58SBarry Smith MatScalar *val; 378b2f1ab58SBarry Smith PetscScalar *val_A; 379b2f1ab58SBarry Smith PetscInt col_idx_type = matrix_A.col_idx_type; 380ace3abfcSBarry Smith PetscBool do_values = matrix_A.values ? PETSC_TRUE : PETSC_FALSE; 381b2f1ab58SBarry Smith 382b2f1ab58SBarry Smith PetscFunctionBegin; 3839566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrows + 1, &irow)); 3849566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz, &icol)); 3859ccc4a9bSBarry Smith *icol_out = icol; 3869ccc4a9bSBarry Smith *irow_out = irow; 3879ccc4a9bSBarry Smith if (do_values) { 3889566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz, &val)); 3899371c9d4SSatish Balay *val_out = val; 3909371c9d4SSatish Balay *icol_out = icol; 3919371c9d4SSatish Balay *irow_out = irow; 392b2f1ab58SBarry Smith } 393b2f1ab58SBarry Smith 394b2f1ab58SBarry Smith irow[0] = 0; 3959ccc4a9bSBarry Smith for (i = 0; i < nrows; i++) { 396b2f1ab58SBarry Smith r_nnz = matrix_A.row_nnz[i]; 397b2f1ab58SBarry Smith i0 = irow[i]; 398b2f1ab58SBarry Smith irow[i + 1] = i0 + r_nnz; 399b2f1ab58SBarry Smith icol_A = matrix_A.icols[i]; 400b2f1ab58SBarry Smith 4019ccc4a9bSBarry Smith if (do_values) { 402b2f1ab58SBarry Smith val_A = matrix_A.values[i]; 4039ccc4a9bSBarry Smith for (j = 0; j < r_nnz; j++) { 404b2f1ab58SBarry Smith icol[i0 + j] = icol_A[j]; 405b2f1ab58SBarry Smith val[i0 + j] = val_A[j]; 406b2f1ab58SBarry Smith } 4079ccc4a9bSBarry Smith } else { 4082205254eSKarl Rupp for (j = 0; j < r_nnz; j++) icol[i0 + j] = icol_A[j]; 409b2f1ab58SBarry Smith } 410b2f1ab58SBarry Smith 4119ccc4a9bSBarry Smith if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) { 4122205254eSKarl Rupp for (j = 0; j < r_nnz; j++) icol[i0 + j] += i; 4132205254eSKarl Rupp } else if (col_idx_type == SPBAS_OFFSET_ARRAY) { 414b2f1ab58SBarry Smith i0 = matrix_A.icol0[i]; 4152205254eSKarl Rupp for (j = 0; j < r_nnz; j++) icol[i0 + j] += i0; 416b2f1ab58SBarry Smith } 417b2f1ab58SBarry Smith } 418b2f1ab58SBarry Smith PetscFunctionReturn(0); 419b2f1ab58SBarry Smith } 420b2f1ab58SBarry Smith 421b2f1ab58SBarry Smith /* 422b2f1ab58SBarry Smith spbas_transpose 423b2f1ab58SBarry Smith return the transpose of a matrix 424b2f1ab58SBarry Smith */ 4259371c9d4SSatish Balay PetscErrorCode spbas_transpose(spbas_matrix in_matrix, spbas_matrix *result) { 426b2f1ab58SBarry Smith PetscInt col_idx_type = in_matrix.col_idx_type; 427b2f1ab58SBarry Smith PetscInt nnz = in_matrix.nnz; 428b2f1ab58SBarry Smith PetscInt ncols = in_matrix.nrows; 429b2f1ab58SBarry Smith PetscInt nrows = in_matrix.ncols; 430b2f1ab58SBarry Smith PetscInt i, j, k; 431b2f1ab58SBarry Smith PetscInt r_nnz; 432b2f1ab58SBarry Smith PetscInt *irow; 4334efc9174SBarry Smith PetscInt icol0 = 0; 434b2f1ab58SBarry Smith PetscScalar *val; 4354e1ff37aSBarry Smith 436b2f1ab58SBarry Smith PetscFunctionBegin; 437c328eeadSBarry Smith /* Copy input values */ 438b2f1ab58SBarry Smith result->nrows = nrows; 439b2f1ab58SBarry Smith result->ncols = ncols; 440b2f1ab58SBarry Smith result->nnz = nnz; 441b2f1ab58SBarry Smith result->col_idx_type = SPBAS_COLUMN_NUMBERS; 442b2f1ab58SBarry Smith result->block_data = PETSC_TRUE; 443b2f1ab58SBarry Smith 444c328eeadSBarry Smith /* Allocate sparseness pattern */ 4459566063dSJacob Faibussowitsch PetscCall(spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE)); 446b2f1ab58SBarry Smith 447c328eeadSBarry Smith /* Count the number of nonzeros in each row */ 4482205254eSKarl Rupp for (i = 0; i < nrows; i++) result->row_nnz[i] = 0; 449b2f1ab58SBarry Smith 4509ccc4a9bSBarry Smith for (i = 0; i < ncols; i++) { 451b2f1ab58SBarry Smith r_nnz = in_matrix.row_nnz[i]; 452b2f1ab58SBarry Smith irow = in_matrix.icols[i]; 4539ccc4a9bSBarry Smith if (col_idx_type == SPBAS_COLUMN_NUMBERS) { 4542205254eSKarl Rupp for (j = 0; j < r_nnz; j++) result->row_nnz[irow[j]]++; 4559ccc4a9bSBarry Smith } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) { 4562205254eSKarl Rupp for (j = 0; j < r_nnz; j++) result->row_nnz[i + irow[j]]++; 4579ccc4a9bSBarry Smith } else if (col_idx_type == SPBAS_OFFSET_ARRAY) { 458b2f1ab58SBarry Smith icol0 = in_matrix.icol0[i]; 4592205254eSKarl Rupp for (j = 0; j < r_nnz; j++) result->row_nnz[icol0 + irow[j]]++; 460b2f1ab58SBarry Smith } 461b2f1ab58SBarry Smith } 462b2f1ab58SBarry Smith 463c328eeadSBarry Smith /* Set the pointers to the data */ 4649566063dSJacob Faibussowitsch PetscCall(spbas_allocate_data(result)); 465b2f1ab58SBarry Smith 466c328eeadSBarry Smith /* Reset the number of nonzeros in each row */ 4672205254eSKarl Rupp for (i = 0; i < nrows; i++) result->row_nnz[i] = 0; 468b2f1ab58SBarry Smith 469c328eeadSBarry Smith /* Fill the data arrays */ 4709ccc4a9bSBarry Smith if (in_matrix.values) { 4719ccc4a9bSBarry Smith for (i = 0; i < ncols; i++) { 472b2f1ab58SBarry Smith r_nnz = in_matrix.row_nnz[i]; 473b2f1ab58SBarry Smith irow = in_matrix.icols[i]; 474b2f1ab58SBarry Smith val = in_matrix.values[i]; 475b2f1ab58SBarry Smith 4762205254eSKarl Rupp if (col_idx_type == SPBAS_COLUMN_NUMBERS) icol0 = 0; 4772205254eSKarl Rupp else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i; 4782205254eSKarl Rupp else if (col_idx_type == SPBAS_OFFSET_ARRAY) icol0 = in_matrix.icol0[i]; 4799ccc4a9bSBarry Smith for (j = 0; j < r_nnz; j++) { 480b2f1ab58SBarry Smith k = icol0 + irow[j]; 481b2f1ab58SBarry Smith result->icols[k][result->row_nnz[k]] = i; 482b2f1ab58SBarry Smith result->values[k][result->row_nnz[k]] = val[j]; 483b2f1ab58SBarry Smith result->row_nnz[k]++; 484b2f1ab58SBarry Smith } 485b2f1ab58SBarry Smith } 4869ccc4a9bSBarry Smith } else { 4879ccc4a9bSBarry Smith for (i = 0; i < ncols; i++) { 488b2f1ab58SBarry Smith r_nnz = in_matrix.row_nnz[i]; 489b2f1ab58SBarry Smith irow = in_matrix.icols[i]; 490b2f1ab58SBarry Smith 4912205254eSKarl Rupp if (col_idx_type == SPBAS_COLUMN_NUMBERS) icol0 = 0; 4922205254eSKarl Rupp else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i; 4932205254eSKarl Rupp else if (col_idx_type == SPBAS_OFFSET_ARRAY) icol0 = in_matrix.icol0[i]; 494b2f1ab58SBarry Smith 4959ccc4a9bSBarry Smith for (j = 0; j < r_nnz; j++) { 496b2f1ab58SBarry Smith k = icol0 + irow[j]; 497b2f1ab58SBarry Smith result->icols[k][result->row_nnz[k]] = i; 498b2f1ab58SBarry Smith result->row_nnz[k]++; 499b2f1ab58SBarry Smith } 500b2f1ab58SBarry Smith } 501b2f1ab58SBarry Smith } 502b2f1ab58SBarry Smith PetscFunctionReturn(0); 503b2f1ab58SBarry Smith } 504b2f1ab58SBarry Smith 505b2f1ab58SBarry Smith /* 506b2f1ab58SBarry Smith spbas_mergesort 507b2f1ab58SBarry Smith 508bb42e86aSJed Brown mergesort for an array of integers and an array of associated 509b2f1ab58SBarry Smith reals 510b2f1ab58SBarry Smith 511b2f1ab58SBarry Smith on output, icol[0..nnz-1] is increasing; 512b2f1ab58SBarry Smith val[0..nnz-1] has undergone the same permutation as icol 513b2f1ab58SBarry Smith 5140298fd71SBarry Smith NB: val may be NULL: in that case, only the integers are sorted 515b2f1ab58SBarry Smith 516b2f1ab58SBarry Smith */ 5179371c9d4SSatish Balay PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val) { 518c328eeadSBarry Smith PetscInt istep; /* Chunk-sizes of already sorted parts of arrays */ 519c328eeadSBarry Smith PetscInt i, i1, i2; /* Loop counters for (partly) sorted arrays */ 520c328eeadSBarry Smith PetscInt istart, i1end, i2end; /* start of newly sorted array part, end of both parts */ 521c328eeadSBarry Smith PetscInt *ialloc; /* Allocated arrays */ 5220298fd71SBarry Smith PetscScalar *valloc = NULL; 523c328eeadSBarry Smith PetscInt *iswap; /* auxiliary pointers for swapping */ 524b2f1ab58SBarry Smith PetscScalar *vswap; 525c328eeadSBarry Smith PetscInt *ihlp1; /* Pointers to new version of arrays, */ 5260298fd71SBarry Smith PetscScalar *vhlp1 = NULL; /* (arrays under construction) */ 527c328eeadSBarry Smith PetscInt *ihlp2; /* Pointers to previous version of arrays, */ 5280298fd71SBarry Smith PetscScalar *vhlp2 = NULL; 529b2f1ab58SBarry Smith 530362febeeSStefano Zampini PetscFunctionBegin; 5319566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz, &ialloc)); 532b2f1ab58SBarry Smith ihlp1 = ialloc; 533b2f1ab58SBarry Smith ihlp2 = icol; 534b2f1ab58SBarry Smith 5359ccc4a9bSBarry Smith if (val) { 5369566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz, &valloc)); 537b2f1ab58SBarry Smith vhlp1 = valloc; 538b2f1ab58SBarry Smith vhlp2 = val; 539b2f1ab58SBarry Smith } 540b2f1ab58SBarry Smith 541c328eeadSBarry Smith /* Sorted array chunks are first 1 long, and increase until they are the complete array */ 5429ccc4a9bSBarry Smith for (istep = 1; istep < nnz; istep *= 2) { 543c328eeadSBarry Smith /* 544c328eeadSBarry Smith Combine sorted parts 545c328eeadSBarry Smith istart:istart+istep-1 and istart+istep-1:istart+2*istep-1 546c328eeadSBarry Smith of ihlp2 and vhlp2 547c328eeadSBarry Smith 548c328eeadSBarry Smith into one sorted part 549c328eeadSBarry Smith istart:istart+2*istep-1 550c328eeadSBarry Smith of ihlp1 and vhlp1 551c328eeadSBarry Smith */ 5529ccc4a9bSBarry Smith for (istart = 0; istart < nnz; istart += 2 * istep) { 553c328eeadSBarry Smith /* Set counters and bound array part endings */ 5549371c9d4SSatish Balay i1 = istart; 5559371c9d4SSatish Balay i1end = i1 + istep; 5569371c9d4SSatish Balay if (i1end > nnz) i1end = nnz; 5579371c9d4SSatish Balay i2 = istart + istep; 5589371c9d4SSatish Balay i2end = i2 + istep; 5599371c9d4SSatish Balay if (i2end > nnz) i2end = nnz; 560b2f1ab58SBarry Smith 561c328eeadSBarry Smith /* Merge the two array parts */ 5629ccc4a9bSBarry Smith if (val) { 5639ccc4a9bSBarry Smith for (i = istart; i < i2end; i++) { 5649ccc4a9bSBarry Smith if (i1 < i1end && i2 < i2end && ihlp2[i1] < ihlp2[i2]) { 565b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i1]; 566b2f1ab58SBarry Smith vhlp1[i] = vhlp2[i1]; 567b2f1ab58SBarry Smith i1++; 5689ccc4a9bSBarry Smith } else if (i2 < i2end) { 569b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i2]; 570b2f1ab58SBarry Smith vhlp1[i] = vhlp2[i2]; 571b2f1ab58SBarry Smith i2++; 5729ccc4a9bSBarry Smith } else { 573b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i1]; 574b2f1ab58SBarry Smith vhlp1[i] = vhlp2[i1]; 575b2f1ab58SBarry Smith i1++; 576b2f1ab58SBarry Smith } 577b2f1ab58SBarry Smith } 5789ccc4a9bSBarry Smith } else { 5796322f4bdSBarry Smith for (i = istart; i < i2end; i++) { 5806322f4bdSBarry Smith if (i1 < i1end && i2 < i2end && ihlp2[i1] < ihlp2[i2]) { 581b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i1]; 582b2f1ab58SBarry Smith i1++; 5836322f4bdSBarry Smith } else if (i2 < i2end) { 584b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i2]; 585b2f1ab58SBarry Smith i2++; 5866322f4bdSBarry Smith } else { 587b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i1]; 588b2f1ab58SBarry Smith i1++; 589b2f1ab58SBarry Smith } 590b2f1ab58SBarry Smith } 591b2f1ab58SBarry Smith } 592b2f1ab58SBarry Smith } 593b2f1ab58SBarry Smith 594c328eeadSBarry Smith /* Swap the two array sets */ 5959371c9d4SSatish Balay iswap = ihlp2; 5969371c9d4SSatish Balay ihlp2 = ihlp1; 5979371c9d4SSatish Balay ihlp1 = iswap; 5989371c9d4SSatish Balay vswap = vhlp2; 5999371c9d4SSatish Balay vhlp2 = vhlp1; 6009371c9d4SSatish Balay vhlp1 = vswap; 601b2f1ab58SBarry Smith } 602b2f1ab58SBarry Smith 603c328eeadSBarry Smith /* Copy one more time in case the sorted arrays are the temporary ones */ 6046322f4bdSBarry Smith if (ihlp2 != icol) { 6052205254eSKarl Rupp for (i = 0; i < nnz; i++) icol[i] = ihlp2[i]; 6066322f4bdSBarry Smith if (val) { 6072205254eSKarl Rupp for (i = 0; i < nnz; i++) val[i] = vhlp2[i]; 608b2f1ab58SBarry Smith } 609b2f1ab58SBarry Smith } 610b2f1ab58SBarry Smith 6119566063dSJacob Faibussowitsch PetscCall(PetscFree(ialloc)); 6129566063dSJacob Faibussowitsch if (val) PetscCall(PetscFree(valloc)); 613b2f1ab58SBarry Smith PetscFunctionReturn(0); 614b2f1ab58SBarry Smith } 615b2f1ab58SBarry Smith 616b2f1ab58SBarry Smith /* 617b2f1ab58SBarry Smith spbas_apply_reordering_rows: 618b2f1ab58SBarry Smith apply the given reordering to the rows: matrix_A = matrix_A(perm,:); 619b2f1ab58SBarry Smith */ 6209371c9d4SSatish Balay PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation) { 621b2f1ab58SBarry Smith PetscInt i, j, ip; 622b2f1ab58SBarry Smith PetscInt nrows = matrix_A->nrows; 623b2f1ab58SBarry Smith PetscInt *row_nnz; 624b2f1ab58SBarry Smith PetscInt **icols; 625ace3abfcSBarry Smith PetscBool do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE; 6260298fd71SBarry Smith PetscScalar **vals = NULL; 627b2f1ab58SBarry Smith 628b2f1ab58SBarry Smith PetscFunctionBegin; 62908401ef6SPierre Jolivet PetscCheck(matrix_A->col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern"); 630b2f1ab58SBarry Smith 63148a46eb9SPierre Jolivet if (do_values) PetscCall(PetscMalloc1(nrows, &vals)); 6329566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrows, &row_nnz)); 6339566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrows, &icols)); 634b2f1ab58SBarry Smith 6356322f4bdSBarry Smith for (i = 0; i < nrows; i++) { 636b2f1ab58SBarry Smith ip = permutation[i]; 6372205254eSKarl Rupp if (do_values) vals[i] = matrix_A->values[ip]; 638b2f1ab58SBarry Smith icols[i] = matrix_A->icols[ip]; 639b2f1ab58SBarry Smith row_nnz[i] = matrix_A->row_nnz[ip]; 6402205254eSKarl Rupp for (j = 0; j < row_nnz[i]; j++) icols[i][j] += ip - i; 641b2f1ab58SBarry Smith } 642b2f1ab58SBarry Smith 6439566063dSJacob Faibussowitsch if (do_values) PetscCall(PetscFree(matrix_A->values)); 6449566063dSJacob Faibussowitsch PetscCall(PetscFree(matrix_A->icols)); 6459566063dSJacob Faibussowitsch PetscCall(PetscFree(matrix_A->row_nnz)); 646b2f1ab58SBarry Smith 6472205254eSKarl Rupp if (do_values) matrix_A->values = vals; 648b2f1ab58SBarry Smith matrix_A->icols = icols; 649b2f1ab58SBarry Smith matrix_A->row_nnz = row_nnz; 650b2f1ab58SBarry Smith PetscFunctionReturn(0); 651b2f1ab58SBarry Smith } 652b2f1ab58SBarry Smith 653b2f1ab58SBarry Smith /* 654b2f1ab58SBarry Smith spbas_apply_reordering_cols: 655b2f1ab58SBarry Smith apply the given reordering to the columns: matrix_A(:,perm) = matrix_A; 656b2f1ab58SBarry Smith */ 6579371c9d4SSatish Balay PetscErrorCode spbas_apply_reordering_cols(spbas_matrix *matrix_A, const PetscInt *permutation) { 658b2f1ab58SBarry Smith PetscInt i, j; 659b2f1ab58SBarry Smith PetscInt nrows = matrix_A->nrows; 660b2f1ab58SBarry Smith PetscInt row_nnz; 661b2f1ab58SBarry Smith PetscInt *icols; 662ace3abfcSBarry Smith PetscBool do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE; 6630298fd71SBarry Smith PetscScalar *vals = NULL; 664b2f1ab58SBarry Smith 665b2f1ab58SBarry Smith PetscFunctionBegin; 66608401ef6SPierre Jolivet PetscCheck(matrix_A->col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern"); 667b2f1ab58SBarry Smith 6686322f4bdSBarry Smith for (i = 0; i < nrows; i++) { 669b2f1ab58SBarry Smith icols = matrix_A->icols[i]; 670b2f1ab58SBarry Smith row_nnz = matrix_A->row_nnz[i]; 6712205254eSKarl Rupp if (do_values) vals = matrix_A->values[i]; 672b2f1ab58SBarry Smith 673ad540459SPierre Jolivet for (j = 0; j < row_nnz; j++) icols[j] = permutation[i + icols[j]] - i; 6749566063dSJacob Faibussowitsch PetscCall(spbas_mergesort(row_nnz, icols, vals)); 675b2f1ab58SBarry Smith } 676b2f1ab58SBarry Smith PetscFunctionReturn(0); 677b2f1ab58SBarry Smith } 678b2f1ab58SBarry Smith 679b2f1ab58SBarry Smith /* 680b2f1ab58SBarry Smith spbas_apply_reordering: 681b2f1ab58SBarry Smith apply the given reordering: matrix_A(perm,perm) = matrix_A; 682b2f1ab58SBarry Smith */ 6839371c9d4SSatish Balay PetscErrorCode spbas_apply_reordering(spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt *inv_perm) { 684b2f1ab58SBarry Smith PetscFunctionBegin; 6859566063dSJacob Faibussowitsch PetscCall(spbas_apply_reordering_rows(matrix_A, inv_perm)); 6869566063dSJacob Faibussowitsch PetscCall(spbas_apply_reordering_cols(matrix_A, permutation)); 687b2f1ab58SBarry Smith PetscFunctionReturn(0); 688b2f1ab58SBarry Smith } 689b2f1ab58SBarry Smith 6909371c9d4SSatish Balay PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix *result) { 691b2f1ab58SBarry Smith spbas_matrix retval; 692b2f1ab58SBarry Smith PetscInt i, j, i0, r_nnz; 693b2f1ab58SBarry Smith 694b2f1ab58SBarry Smith PetscFunctionBegin; 695c328eeadSBarry Smith /* Copy input values */ 696b2f1ab58SBarry Smith retval.nrows = nrows; 697b2f1ab58SBarry Smith retval.ncols = ncols; 698b2f1ab58SBarry Smith retval.nnz = ai[nrows]; 699b2f1ab58SBarry Smith 700b2f1ab58SBarry Smith retval.block_data = PETSC_TRUE; 701b2f1ab58SBarry Smith retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS; 702b2f1ab58SBarry Smith 703c328eeadSBarry Smith /* Allocate output matrix */ 7049566063dSJacob Faibussowitsch PetscCall(spbas_allocate_pattern(&retval, PETSC_FALSE)); 7052205254eSKarl Rupp for (i = 0; i < nrows; i++) retval.row_nnz[i] = ai[i + 1] - ai[i]; 7069566063dSJacob Faibussowitsch PetscCall(spbas_allocate_data(&retval)); 707c328eeadSBarry Smith /* Copy the structure */ 7086322f4bdSBarry Smith for (i = 0; i < retval.nrows; i++) { 709b2f1ab58SBarry Smith i0 = ai[i]; 710b2f1ab58SBarry Smith r_nnz = ai[i + 1] - i0; 711b2f1ab58SBarry Smith 712ad540459SPierre Jolivet for (j = 0; j < r_nnz; j++) retval.icols[i][j] = aj[i0 + j] - i; 713b2f1ab58SBarry Smith } 714b2f1ab58SBarry Smith *result = retval; 715b2f1ab58SBarry Smith PetscFunctionReturn(0); 716b2f1ab58SBarry Smith } 717b2f1ab58SBarry Smith 718b2f1ab58SBarry Smith /* 719b2f1ab58SBarry Smith spbas_mark_row_power: 720b2f1ab58SBarry Smith Mark the columns in row 'row' which are nonzero in 721b2f1ab58SBarry Smith matrix^2log(marker). 722b2f1ab58SBarry Smith */ 723be332245SKarl Rupp PetscErrorCode spbas_mark_row_power(PetscInt *iwork, /* marker-vector */ 724c328eeadSBarry Smith PetscInt row, /* row for which the columns are marked */ 725c328eeadSBarry Smith spbas_matrix *in_matrix, /* matrix for which the power is being calculated */ 726c328eeadSBarry Smith PetscInt marker, /* marker-value: 2^power */ 727c328eeadSBarry Smith PetscInt minmrk, /* lower bound for marked points */ 728c328eeadSBarry Smith PetscInt maxmrk) /* upper bound for marked points */ 729b2f1ab58SBarry Smith { 730b2f1ab58SBarry Smith PetscInt i, j, nnz; 731b2f1ab58SBarry Smith 732b2f1ab58SBarry Smith PetscFunctionBegin; 733b2f1ab58SBarry Smith nnz = in_matrix->row_nnz[row]; 734b2f1ab58SBarry Smith 735c328eeadSBarry Smith /* For higher powers, call this function recursively */ 7366322f4bdSBarry Smith if (marker > 1) { 7376322f4bdSBarry Smith for (i = 0; i < nnz; i++) { 738b2f1ab58SBarry Smith j = row + in_matrix->icols[row][i]; 7396322f4bdSBarry Smith if (minmrk <= j && j < maxmrk && iwork[j] < marker) { 7409566063dSJacob Faibussowitsch PetscCall(spbas_mark_row_power(iwork, row + in_matrix->icols[row][i], in_matrix, marker / 2, minmrk, maxmrk)); 741b2f1ab58SBarry Smith iwork[j] |= marker; 742b2f1ab58SBarry Smith } 743b2f1ab58SBarry Smith } 7446322f4bdSBarry Smith } else { 745c328eeadSBarry Smith /* Mark the columns reached */ 7466322f4bdSBarry Smith for (i = 0; i < nnz; i++) { 747b2f1ab58SBarry Smith j = row + in_matrix->icols[row][i]; 7482205254eSKarl Rupp if (minmrk <= j && j < maxmrk) iwork[j] |= 1; 749b2f1ab58SBarry Smith } 750b2f1ab58SBarry Smith } 751b2f1ab58SBarry Smith PetscFunctionReturn(0); 752b2f1ab58SBarry Smith } 753b2f1ab58SBarry Smith 754b2f1ab58SBarry Smith /* 755b2f1ab58SBarry Smith spbas_power 756b2f1ab58SBarry Smith Calculate sparseness patterns for incomplete Cholesky decompositions 757b2f1ab58SBarry Smith of a given order: (almost) all nonzeros of the matrix^(order+1) which 758b2f1ab58SBarry Smith are inside the band width are found and stored in the output sparseness 759b2f1ab58SBarry Smith pattern. 760b2f1ab58SBarry Smith */ 7619371c9d4SSatish Balay PetscErrorCode spbas_power(spbas_matrix in_matrix, PetscInt power, spbas_matrix *result) { 762b2f1ab58SBarry Smith spbas_matrix retval; 763b2f1ab58SBarry Smith PetscInt nrows = in_matrix.nrows; 764b2f1ab58SBarry Smith PetscInt ncols = in_matrix.ncols; 765b2f1ab58SBarry Smith PetscInt i, j, kend; 766b2f1ab58SBarry Smith PetscInt nnz, inz; 767b2f1ab58SBarry Smith PetscInt *iwork; 768b2f1ab58SBarry Smith PetscInt marker; 769b2f1ab58SBarry Smith PetscInt maxmrk = 0; 770b2f1ab58SBarry Smith 771b2f1ab58SBarry Smith PetscFunctionBegin; 77208401ef6SPierre Jolivet PetscCheck(in_matrix.col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern"); 77308401ef6SPierre Jolivet PetscCheck(ncols == nrows, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Dimension error"); 774aed4548fSBarry Smith PetscCheck(!in_matrix.values, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Input array must be sparseness pattern (no values)"); 77508401ef6SPierre Jolivet PetscCheck(power > 0, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Power must be 1 or up"); 776b2f1ab58SBarry Smith 777c328eeadSBarry Smith /* Copy input values*/ 778b2f1ab58SBarry Smith retval.nrows = ncols; 779b2f1ab58SBarry Smith retval.ncols = nrows; 780b2f1ab58SBarry Smith retval.nnz = 0; 781b2f1ab58SBarry Smith retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS; 782b2f1ab58SBarry Smith retval.block_data = PETSC_FALSE; 783b2f1ab58SBarry Smith 784c328eeadSBarry Smith /* Allocate sparseness pattern */ 7859566063dSJacob Faibussowitsch PetscCall(spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE)); 786b2f1ab58SBarry Smith 787580bdb30SBarry Smith /* Allocate marker array: note sure the max needed so use the max of the two */ 7889566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(PetscMax(ncols, nrows), &iwork)); 789b2f1ab58SBarry Smith 790c328eeadSBarry Smith /* Calculate marker values */ 7919371c9d4SSatish Balay marker = 1; 7929371c9d4SSatish Balay for (i = 1; i < power; i++) marker *= 2; 793b2f1ab58SBarry Smith 7946322f4bdSBarry Smith for (i = 0; i < nrows; i++) { 795c328eeadSBarry Smith /* Calculate the pattern for each row */ 796b2f1ab58SBarry Smith 797b2f1ab58SBarry Smith nnz = in_matrix.row_nnz[i]; 798b2f1ab58SBarry Smith kend = i + in_matrix.icols[i][nnz - 1]; 7992205254eSKarl Rupp if (maxmrk <= kend) maxmrk = kend + 1; 8009566063dSJacob Faibussowitsch PetscCall(spbas_mark_row_power(iwork, i, &in_matrix, marker, i, maxmrk)); 801b2f1ab58SBarry Smith 802c328eeadSBarry Smith /* Count the columns*/ 803b2f1ab58SBarry Smith nnz = 0; 8042205254eSKarl Rupp for (j = i; j < maxmrk; j++) nnz += (iwork[j] != 0); 805b2f1ab58SBarry Smith 806c328eeadSBarry Smith /* Allocate the column indices */ 807b2f1ab58SBarry Smith retval.row_nnz[i] = nnz; 8089566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz, &retval.icols[i])); 809b2f1ab58SBarry Smith 810c328eeadSBarry Smith /* Administrate the column indices */ 811b2f1ab58SBarry Smith inz = 0; 8126322f4bdSBarry Smith for (j = i; j < maxmrk; j++) { 8136322f4bdSBarry Smith if (iwork[j]) { 814b2f1ab58SBarry Smith retval.icols[i][inz] = j - i; 815b2f1ab58SBarry Smith inz++; 816b2f1ab58SBarry Smith iwork[j] = 0; 817b2f1ab58SBarry Smith } 818b2f1ab58SBarry Smith } 819b2f1ab58SBarry Smith retval.nnz += nnz; 820b2f1ab58SBarry Smith }; 8219566063dSJacob Faibussowitsch PetscCall(PetscFree(iwork)); 822b2f1ab58SBarry Smith *result = retval; 823b2f1ab58SBarry Smith PetscFunctionReturn(0); 824b2f1ab58SBarry Smith } 825b2f1ab58SBarry Smith 826b2f1ab58SBarry Smith /* 827b2f1ab58SBarry Smith spbas_keep_upper: 828b2f1ab58SBarry Smith remove the lower part of the matrix: keep the upper part 829b2f1ab58SBarry Smith */ 8309371c9d4SSatish Balay PetscErrorCode spbas_keep_upper(spbas_matrix *inout_matrix) { 831b2f1ab58SBarry Smith PetscInt i, j; 832b2f1ab58SBarry Smith PetscInt jstart; 833b2f1ab58SBarry Smith 834b2f1ab58SBarry Smith PetscFunctionBegin; 8355f80ce2aSJacob Faibussowitsch PetscCheck(!inout_matrix->block_data, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Not yet for block data matrices"); 8366322f4bdSBarry Smith for (i = 0; i < inout_matrix->nrows; i++) { 8376322f4bdSBarry Smith for (jstart = 0; (jstart < inout_matrix->row_nnz[i]) && (inout_matrix->icols[i][jstart] < 0); jstart++) { } 8386322f4bdSBarry Smith if (jstart > 0) { 839ad540459SPierre Jolivet for (j = 0; j < inout_matrix->row_nnz[i] - jstart; j++) inout_matrix->icols[i][j] = inout_matrix->icols[i][j + jstart]; 840b2f1ab58SBarry Smith 8416322f4bdSBarry Smith if (inout_matrix->values) { 842ad540459SPierre Jolivet for (j = 0; j < inout_matrix->row_nnz[i] - jstart; j++) inout_matrix->values[i][j] = inout_matrix->values[i][j + jstart]; 843b2f1ab58SBarry Smith } 844b2f1ab58SBarry Smith 845b2f1ab58SBarry Smith inout_matrix->row_nnz[i] -= jstart; 846b2f1ab58SBarry Smith 8476322f4bdSBarry Smith inout_matrix->icols[i] = (PetscInt *)realloc((void *)inout_matrix->icols[i], inout_matrix->row_nnz[i] * sizeof(PetscInt)); 848b2f1ab58SBarry Smith 849ad540459SPierre Jolivet if (inout_matrix->values) inout_matrix->values[i] = (PetscScalar *)realloc((void *)inout_matrix->values[i], inout_matrix->row_nnz[i] * sizeof(PetscScalar)); 850b2f1ab58SBarry Smith inout_matrix->nnz -= jstart; 851b2f1ab58SBarry Smith } 852b2f1ab58SBarry Smith } 853b2f1ab58SBarry Smith PetscFunctionReturn(0); 854b2f1ab58SBarry Smith } 855