xref: /petsc/src/mat/impls/aij/seq/bas/spbas.c (revision 48a46eb9bd028bec07ec0f396b1a3abb43f14558)
1c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>
2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/bas/spbas.h>
3b2f1ab58SBarry Smith 
4845006b9SBarry Smith /*MC
52692d6eeSBarry Smith   MATSOLVERBAS -  Provides ICC(k) with drop tolerance
6845006b9SBarry Smith 
7845006b9SBarry Smith   Works with MATAIJ  matrices
8845006b9SBarry Smith 
9845006b9SBarry Smith   Options Database Keys:
100053dfc8SBarry Smith + -pc_factor_levels <l> - number of levels of fill
110053dfc8SBarry Smith - -pc_factor_drop_tolerance - is not currently hooked up to do anything
12845006b9SBarry Smith 
130053dfc8SBarry Smith   Level: intermediate
14845006b9SBarry Smith 
15845006b9SBarry Smith    Contributed by: Bas van 't Hof
16845006b9SBarry Smith 
1795452b02SPatrick Sanan    Notes:
1895452b02SPatrick Sanan     Since this currently hooked up to use drop tolerance it should produce the same factors and hence convergence as the PETSc ICC, for higher
190053dfc8SBarry Smith      levels of fill it does not. This needs to be investigated. Unless you are interested in drop tolerance ICC and willing to work through the code
200053dfc8SBarry Smith      we recommend not using this functionality.
210053dfc8SBarry Smith 
22db781477SPatrick Sanan .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `PCFactorSetLevels()`, `PCFactorSetDropTolerance()`
23845006b9SBarry Smith 
24845006b9SBarry Smith M*/
259ccc4a9bSBarry Smith 
26b2f1ab58SBarry Smith /*
27b2f1ab58SBarry Smith   spbas_memory_requirement:
28b2f1ab58SBarry Smith     Calculate the number of bytes needed to store tha matrix
29b2f1ab58SBarry Smith */
309371c9d4SSatish Balay size_t spbas_memory_requirement(spbas_matrix matrix) {
313dfa2556SBarry Smith   size_t memreq = 6 * sizeof(PetscInt) +             /* nrows, ncols, nnz, n_alloc_icol, n_alloc_val, col_idx_type */
32ace3abfcSBarry Smith                   sizeof(PetscBool) +                /* block_data */
33c328eeadSBarry Smith                   sizeof(PetscScalar **) +           /* values */
34c328eeadSBarry Smith                   sizeof(PetscScalar *) +            /* alloc_val */
353dfa2556SBarry Smith                   2 * sizeof(PetscInt **) +          /* icols, icols0 */
36c328eeadSBarry Smith                   2 * sizeof(PetscInt *) +           /* row_nnz, alloc_icol */
37c328eeadSBarry Smith                   matrix.nrows * sizeof(PetscInt) +  /* row_nnz[*] */
38c328eeadSBarry Smith                   matrix.nrows * sizeof(PetscInt *); /* icols[*] */
39b2f1ab58SBarry Smith 
40c328eeadSBarry Smith   /* icol0[*] */
412205254eSKarl Rupp   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) memreq += matrix.nrows * sizeof(PetscInt);
42b2f1ab58SBarry Smith 
43c328eeadSBarry Smith   /* icols[*][*] */
442205254eSKarl Rupp   if (matrix.block_data) memreq += matrix.n_alloc_icol * sizeof(PetscInt);
452205254eSKarl Rupp   else memreq += matrix.nnz * sizeof(PetscInt);
46b2f1ab58SBarry Smith 
474e1ff37aSBarry Smith   if (matrix.values) {
48c328eeadSBarry Smith     memreq += matrix.nrows * sizeof(PetscScalar *); /* values[*] */
49c328eeadSBarry Smith     /* values[*][*] */
502205254eSKarl Rupp     if (matrix.block_data) memreq += matrix.n_alloc_val * sizeof(PetscScalar);
512205254eSKarl Rupp     else memreq += matrix.nnz * sizeof(PetscScalar);
52b2f1ab58SBarry Smith   }
53b2f1ab58SBarry Smith   return memreq;
54b2f1ab58SBarry Smith }
55b2f1ab58SBarry Smith 
56b2f1ab58SBarry Smith /*
57b2f1ab58SBarry Smith   spbas_allocate_pattern:
58b2f1ab58SBarry Smith     allocate the pattern arrays row_nnz, icols and optionally values
59b2f1ab58SBarry Smith */
609371c9d4SSatish Balay PetscErrorCode spbas_allocate_pattern(spbas_matrix *result, PetscBool do_values) {
61b2f1ab58SBarry Smith   PetscInt nrows        = result->nrows;
62b2f1ab58SBarry Smith   PetscInt col_idx_type = result->col_idx_type;
63b2f1ab58SBarry Smith 
64b2f1ab58SBarry Smith   PetscFunctionBegin;
65c328eeadSBarry Smith   /* Allocate sparseness pattern */
669566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nrows, &result->row_nnz));
679566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nrows, &result->icols));
68b2f1ab58SBarry Smith 
69c328eeadSBarry Smith   /* If offsets are given wrt an array, create array */
704e1ff37aSBarry Smith   if (col_idx_type == SPBAS_OFFSET_ARRAY) {
719566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nrows, &result->icol0));
724e1ff37aSBarry Smith   } else {
730298fd71SBarry Smith     result->icol0 = NULL;
74b2f1ab58SBarry Smith   }
75b2f1ab58SBarry Smith 
76c328eeadSBarry Smith   /* If values are given, allocate values array */
774e1ff37aSBarry Smith   if (do_values) {
789566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nrows, &result->values));
794e1ff37aSBarry Smith   } else {
800298fd71SBarry Smith     result->values = NULL;
81b2f1ab58SBarry Smith   }
82b2f1ab58SBarry Smith   PetscFunctionReturn(0);
83b2f1ab58SBarry Smith }
84b2f1ab58SBarry Smith 
85b2f1ab58SBarry Smith /*
86b2f1ab58SBarry Smith spbas_allocate_data:
87b2f1ab58SBarry Smith    in case of block_data:
88b2f1ab58SBarry Smith        Allocate the data arrays alloc_icol and optionally alloc_val,
89b2f1ab58SBarry Smith        set appropriate pointers from icols and values;
90b2f1ab58SBarry Smith    in case of !block_data:
91b2f1ab58SBarry Smith        Allocate the arrays icols[i] and optionally values[i]
92b2f1ab58SBarry Smith */
939371c9d4SSatish Balay PetscErrorCode spbas_allocate_data(spbas_matrix *result) {
94b2f1ab58SBarry Smith   PetscInt  i;
95b2f1ab58SBarry Smith   PetscInt  nnz   = result->nnz;
96b2f1ab58SBarry Smith   PetscInt  nrows = result->nrows;
97b2f1ab58SBarry Smith   PetscInt  r_nnz;
986c4ed002SBarry Smith   PetscBool do_values  = (result->values) ? PETSC_TRUE : PETSC_FALSE;
99ace3abfcSBarry Smith   PetscBool block_data = result->block_data;
100b2f1ab58SBarry Smith 
101b2f1ab58SBarry Smith   PetscFunctionBegin;
1024e1ff37aSBarry Smith   if (block_data) {
103c328eeadSBarry Smith     /* Allocate the column number array and point to it */
104b2f1ab58SBarry Smith     result->n_alloc_icol = nnz;
1052205254eSKarl Rupp 
1069566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nnz, &result->alloc_icol));
1072205254eSKarl Rupp 
108b2f1ab58SBarry Smith     result->icols[0] = result->alloc_icol;
1099371c9d4SSatish Balay     for (i = 1; i < nrows; i++) { result->icols[i] = result->icols[i - 1] + result->row_nnz[i - 1]; }
110b2f1ab58SBarry Smith 
111c328eeadSBarry Smith     /* Allocate the value array and point to it */
1124e1ff37aSBarry Smith     if (do_values) {
113b2f1ab58SBarry Smith       result->n_alloc_val = nnz;
1142205254eSKarl Rupp 
1159566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nnz, &result->alloc_val));
1162205254eSKarl Rupp 
117b2f1ab58SBarry Smith       result->values[0] = result->alloc_val;
1189371c9d4SSatish Balay       for (i = 1; i < nrows; i++) { result->values[i] = result->values[i - 1] + result->row_nnz[i - 1]; }
119b2f1ab58SBarry Smith     }
1204e1ff37aSBarry Smith   } else {
1214e1ff37aSBarry Smith     for (i = 0; i < nrows; i++) {
122b2f1ab58SBarry Smith       r_nnz = result->row_nnz[i];
1239566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(r_nnz, &result->icols[i]));
124b2f1ab58SBarry Smith     }
1254e1ff37aSBarry Smith     if (do_values) {
1264e1ff37aSBarry Smith       for (i = 0; i < nrows; i++) {
127b2f1ab58SBarry Smith         r_nnz = result->row_nnz[i];
1289566063dSJacob Faibussowitsch         PetscCall(PetscMalloc1(r_nnz, &result->values[i]));
129b2f1ab58SBarry Smith       }
130b2f1ab58SBarry Smith     }
131b2f1ab58SBarry Smith   }
132b2f1ab58SBarry Smith   PetscFunctionReturn(0);
133b2f1ab58SBarry Smith }
134b2f1ab58SBarry Smith 
135b2f1ab58SBarry Smith /*
136b2f1ab58SBarry Smith   spbas_row_order_icol
137b2f1ab58SBarry Smith      determine if row i1 should come
138b2f1ab58SBarry Smith        + before row i2 in the sorted rows (return -1),
139b2f1ab58SBarry Smith        + after (return 1)
140b2f1ab58SBarry Smith        + is identical (return 0).
141b2f1ab58SBarry Smith */
1429371c9d4SSatish Balay int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in, PetscInt col_idx_type) {
143b2f1ab58SBarry Smith   PetscInt  j;
144b2f1ab58SBarry Smith   PetscInt  nnz1  = irow_in[i1 + 1] - irow_in[i1];
145b2f1ab58SBarry Smith   PetscInt  nnz2  = irow_in[i2 + 1] - irow_in[i2];
146b2f1ab58SBarry Smith   PetscInt *icol1 = &icol_in[irow_in[i1]];
147b2f1ab58SBarry Smith   PetscInt *icol2 = &icol_in[irow_in[i2]];
148b2f1ab58SBarry Smith 
1492205254eSKarl Rupp   if (nnz1 < nnz2) return -1;
1502205254eSKarl Rupp   if (nnz1 > nnz2) return 1;
151b2f1ab58SBarry Smith 
1524e1ff37aSBarry Smith   if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
1534e1ff37aSBarry Smith     for (j = 0; j < nnz1; j++) {
1542205254eSKarl Rupp       if (icol1[j] < icol2[j]) return -1;
1552205254eSKarl Rupp       if (icol1[j] > icol2[j]) return 1;
156b2f1ab58SBarry Smith     }
1574e1ff37aSBarry Smith   } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
1584e1ff37aSBarry Smith     for (j = 0; j < nnz1; j++) {
1592205254eSKarl Rupp       if (icol1[j] - i1 < icol2[j] - i2) return -1;
1602205254eSKarl Rupp       if (icol1[j] - i1 > icol2[j] - i2) return 1;
161b2f1ab58SBarry Smith     }
1624e1ff37aSBarry Smith   } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
1634e1ff37aSBarry Smith     for (j = 1; j < nnz1; j++) {
1642205254eSKarl Rupp       if (icol1[j] - icol1[0] < icol2[j] - icol2[0]) return -1;
1652205254eSKarl Rupp       if (icol1[j] - icol1[0] > icol2[j] - icol2[0]) return 1;
166b2f1ab58SBarry Smith     }
167b2f1ab58SBarry Smith   }
168b2f1ab58SBarry Smith   return 0;
169b2f1ab58SBarry Smith }
170b2f1ab58SBarry Smith 
171b2f1ab58SBarry Smith /*
172b2f1ab58SBarry Smith   spbas_mergesort_icols:
173b2f1ab58SBarry Smith     return a sorting of the rows in which identical sparseness patterns are
174b2f1ab58SBarry Smith     next to each other
175b2f1ab58SBarry Smith */
1769371c9d4SSatish Balay PetscErrorCode spbas_mergesort_icols(PetscInt nrows, PetscInt *irow_in, PetscInt *icol_in, PetscInt col_idx_type, PetscInt *isort) {
177c328eeadSBarry Smith   PetscInt  istep;                /* Chunk-sizes of already sorted parts of arrays */
178c328eeadSBarry Smith   PetscInt  i, i1, i2;            /* Loop counters for (partly) sorted arrays */
179c328eeadSBarry Smith   PetscInt  istart, i1end, i2end; /* start of newly sorted array part, end of both  parts */
180c328eeadSBarry Smith   PetscInt *ialloc;               /* Allocated arrays */
181c328eeadSBarry Smith   PetscInt *iswap;                /* auxiliary pointers for swapping */
182c328eeadSBarry Smith   PetscInt *ihlp1;                /* Pointers to new version of arrays, */
183c328eeadSBarry Smith   PetscInt *ihlp2;                /* Pointers to previous version of arrays, */
184b2f1ab58SBarry Smith 
185b2f1ab58SBarry Smith   PetscFunctionBegin;
1869566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nrows, &ialloc));
187b2f1ab58SBarry Smith 
188b2f1ab58SBarry Smith   ihlp1 = ialloc;
189b2f1ab58SBarry Smith   ihlp2 = isort;
190b2f1ab58SBarry Smith 
191c328eeadSBarry Smith   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
1924e1ff37aSBarry Smith   for (istep = 1; istep < nrows; istep *= 2) {
193c328eeadSBarry Smith     /*
194c328eeadSBarry Smith       Combine sorted parts
195c328eeadSBarry Smith           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
196c328eeadSBarry Smith       of ihlp2 and vhlp2
197c328eeadSBarry Smith 
198c328eeadSBarry Smith       into one sorted part
199c328eeadSBarry Smith           istart:istart+2*istep-1
200c328eeadSBarry Smith       of ihlp1 and vhlp1
201c328eeadSBarry Smith     */
2024e1ff37aSBarry Smith     for (istart = 0; istart < nrows; istart += 2 * istep) {
203c328eeadSBarry Smith       /* Set counters and bound array part endings */
2049371c9d4SSatish Balay       i1    = istart;
2059371c9d4SSatish Balay       i1end = i1 + istep;
2069371c9d4SSatish Balay       if (i1end > nrows) i1end = nrows;
2079371c9d4SSatish Balay       i2    = istart + istep;
2089371c9d4SSatish Balay       i2end = i2 + istep;
2099371c9d4SSatish Balay       if (i2end > nrows) i2end = nrows;
210b2f1ab58SBarry Smith 
211c328eeadSBarry Smith       /* Merge the two array parts */
2124e1ff37aSBarry Smith       for (i = istart; i < i2end; i++) {
2134e1ff37aSBarry Smith         if (i1 < i1end && i2 < i2end && spbas_row_order_icol(ihlp2[i1], ihlp2[i2], irow_in, icol_in, col_idx_type) < 0) {
214b2f1ab58SBarry Smith           ihlp1[i] = ihlp2[i1];
215b2f1ab58SBarry Smith           i1++;
2164e1ff37aSBarry Smith         } else if (i2 < i2end) {
217b2f1ab58SBarry Smith           ihlp1[i] = ihlp2[i2];
218b2f1ab58SBarry Smith           i2++;
2194e1ff37aSBarry Smith         } else {
220b2f1ab58SBarry Smith           ihlp1[i] = ihlp2[i1];
221b2f1ab58SBarry Smith           i1++;
222b2f1ab58SBarry Smith         }
223b2f1ab58SBarry Smith       }
224b2f1ab58SBarry Smith     }
225b2f1ab58SBarry Smith 
226c328eeadSBarry Smith     /* Swap the two array sets */
2279371c9d4SSatish Balay     iswap = ihlp2;
2289371c9d4SSatish Balay     ihlp2 = ihlp1;
2299371c9d4SSatish Balay     ihlp1 = iswap;
230b2f1ab58SBarry Smith   }
231b2f1ab58SBarry Smith 
232c328eeadSBarry Smith   /* Copy one more time in case the sorted arrays are the temporary ones */
2334e1ff37aSBarry Smith   if (ihlp2 != isort) {
2342205254eSKarl Rupp     for (i = 0; i < nrows; i++) isort[i] = ihlp2[i];
235b2f1ab58SBarry Smith   }
2369566063dSJacob Faibussowitsch   PetscCall(PetscFree(ialloc));
237b2f1ab58SBarry Smith   PetscFunctionReturn(0);
238b2f1ab58SBarry Smith }
239b2f1ab58SBarry Smith 
240b2f1ab58SBarry Smith /*
241b2f1ab58SBarry Smith   spbas_compress_pattern:
242b2f1ab58SBarry Smith      calculate a compressed sparseness pattern for a sparseness pattern
243b2f1ab58SBarry Smith      given in compressed row storage. The compressed sparseness pattern may
244b2f1ab58SBarry Smith      require (much) less memory.
245b2f1ab58SBarry Smith */
2469371c9d4SSatish Balay PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, PetscInt col_idx_type, spbas_matrix *B, PetscReal *mem_reduction) {
247b2f1ab58SBarry Smith   PetscInt        nnz      = irow_in[nrows];
2483dfa2556SBarry Smith   size_t          mem_orig = (nrows + nnz) * sizeof(PetscInt);
2493dfa2556SBarry Smith   size_t          mem_compressed;
250b2f1ab58SBarry Smith   PetscInt       *isort;
251b2f1ab58SBarry Smith   PetscInt       *icols;
252b2f1ab58SBarry Smith   PetscInt        row_nnz;
253b2f1ab58SBarry Smith   PetscInt       *ipoint;
254ace3abfcSBarry Smith   PetscBool      *used;
255b2f1ab58SBarry Smith   PetscInt        ptr;
256b2f1ab58SBarry Smith   PetscInt        i, j;
257ace3abfcSBarry Smith   const PetscBool no_values = PETSC_FALSE;
258b2f1ab58SBarry Smith 
259b2f1ab58SBarry Smith   PetscFunctionBegin;
260c328eeadSBarry Smith   /* Allocate the structure of the new matrix */
261b2f1ab58SBarry Smith   B->nrows        = nrows;
262b2f1ab58SBarry Smith   B->ncols        = ncols;
263b2f1ab58SBarry Smith   B->nnz          = nnz;
264b2f1ab58SBarry Smith   B->col_idx_type = col_idx_type;
265b2f1ab58SBarry Smith   B->block_data   = PETSC_TRUE;
2662205254eSKarl Rupp 
2679566063dSJacob Faibussowitsch   PetscCall(spbas_allocate_pattern(B, no_values));
268b2f1ab58SBarry Smith 
269c328eeadSBarry Smith   /* When using an offset array, set it */
2704e1ff37aSBarry Smith   if (col_idx_type == SPBAS_OFFSET_ARRAY) {
2712205254eSKarl Rupp     for (i = 0; i < nrows; i++) B->icol0[i] = icol_in[irow_in[i]];
272b2f1ab58SBarry Smith   }
273b2f1ab58SBarry Smith 
274c328eeadSBarry Smith   /* Allocate the ordering for the rows */
2759566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nrows, &isort));
2769566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nrows, &ipoint));
2779566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(nrows, &used));
278b2f1ab58SBarry Smith 
2794e1ff37aSBarry Smith   for (i = 0; i < nrows; i++) {
280b2f1ab58SBarry Smith     B->row_nnz[i] = irow_in[i + 1] - irow_in[i];
281b2f1ab58SBarry Smith     isort[i]      = i;
282b2f1ab58SBarry Smith     ipoint[i]     = i;
283b2f1ab58SBarry Smith   }
284b2f1ab58SBarry Smith 
285c328eeadSBarry Smith   /* Sort the rows so that identical columns will be next to each other */
2869566063dSJacob Faibussowitsch   PetscCall(spbas_mergesort_icols(nrows, irow_in, icol_in, col_idx_type, isort));
2879566063dSJacob Faibussowitsch   PetscCall(PetscInfo(NULL, "Rows have been sorted for patterns\n"));
288b2f1ab58SBarry Smith 
289c328eeadSBarry Smith   /* Replace identical rows with the first one in the list */
2904e1ff37aSBarry Smith   for (i = 1; i < nrows; i++) {
2919371c9d4SSatish Balay     if (spbas_row_order_icol(isort[i - 1], isort[i], irow_in, icol_in, col_idx_type) == 0) { ipoint[isort[i]] = ipoint[isort[i - 1]]; }
292b2f1ab58SBarry Smith   }
293b2f1ab58SBarry Smith 
294c328eeadSBarry Smith   /* Collect the rows which are used*/
2952205254eSKarl Rupp   for (i = 0; i < nrows; i++) used[ipoint[i]] = PETSC_TRUE;
296b2f1ab58SBarry Smith 
297c328eeadSBarry Smith   /* Calculate needed memory */
298b2f1ab58SBarry Smith   B->n_alloc_icol = 0;
2994e1ff37aSBarry Smith   for (i = 0; i < nrows; i++) {
3002205254eSKarl Rupp     if (used[i]) B->n_alloc_icol += B->row_nnz[i];
301b2f1ab58SBarry Smith   }
3029566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(B->n_alloc_icol, &B->alloc_icol));
303b2f1ab58SBarry Smith 
304c328eeadSBarry Smith   /* Fill in the diagonal offsets for the rows which store their own data */
305b2f1ab58SBarry Smith   ptr = 0;
3064e1ff37aSBarry Smith   for (i = 0; i < B->nrows; i++) {
3074e1ff37aSBarry Smith     if (used[i]) {
308b2f1ab58SBarry Smith       B->icols[i] = &B->alloc_icol[ptr];
309b2f1ab58SBarry Smith       icols       = &icol_in[irow_in[i]];
310b2f1ab58SBarry Smith       row_nnz     = B->row_nnz[i];
3114e1ff37aSBarry Smith       if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
3129371c9d4SSatish Balay         for (j = 0; j < row_nnz; j++) { B->icols[i][j] = icols[j]; }
3134e1ff37aSBarry Smith       } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
3149371c9d4SSatish Balay         for (j = 0; j < row_nnz; j++) { B->icols[i][j] = icols[j] - i; }
3154e1ff37aSBarry Smith       } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
3169371c9d4SSatish Balay         for (j = 0; j < row_nnz; j++) { B->icols[i][j] = icols[j] - icols[0]; }
317b2f1ab58SBarry Smith       }
318b2f1ab58SBarry Smith       ptr += B->row_nnz[i];
319b2f1ab58SBarry Smith     }
320b2f1ab58SBarry Smith   }
321b2f1ab58SBarry Smith 
322c328eeadSBarry Smith   /* Point to the right places for all data */
3239371c9d4SSatish Balay   for (i = 0; i < nrows; i++) { B->icols[i] = B->icols[ipoint[i]]; }
3249566063dSJacob Faibussowitsch   PetscCall(PetscInfo(NULL, "Row patterns have been compressed\n"));
3259566063dSJacob Faibussowitsch   PetscCall(PetscInfo(NULL, "         (%g nonzeros per row)\n", (double)((PetscReal)nnz / (PetscReal)nrows)));
326b2f1ab58SBarry Smith 
3279566063dSJacob Faibussowitsch   PetscCall(PetscFree(isort));
3289566063dSJacob Faibussowitsch   PetscCall(PetscFree(used));
3299566063dSJacob Faibussowitsch   PetscCall(PetscFree(ipoint));
330b2f1ab58SBarry Smith 
331b2f1ab58SBarry Smith   mem_compressed = spbas_memory_requirement(*B);
3324e1ff37aSBarry Smith   *mem_reduction = 100.0 * (PetscReal)(mem_orig - mem_compressed) / (PetscReal)mem_orig;
333b2f1ab58SBarry Smith   PetscFunctionReturn(0);
334b2f1ab58SBarry Smith }
335b2f1ab58SBarry Smith 
336b2f1ab58SBarry Smith /*
337b2f1ab58SBarry Smith    spbas_incomplete_cholesky
338b2f1ab58SBarry Smith        Incomplete Cholesky decomposition
339b2f1ab58SBarry Smith */
340c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/bas/spbas_cholesky.h>
341b2f1ab58SBarry Smith 
342b2f1ab58SBarry Smith /*
343b2f1ab58SBarry Smith   spbas_delete : de-allocate the arrays owned by this matrix
344b2f1ab58SBarry Smith */
3459371c9d4SSatish Balay PetscErrorCode spbas_delete(spbas_matrix matrix) {
346b2f1ab58SBarry Smith   PetscInt i;
3479ccc4a9bSBarry Smith 
348b2f1ab58SBarry Smith   PetscFunctionBegin;
3499ccc4a9bSBarry Smith   if (matrix.block_data) {
3509566063dSJacob Faibussowitsch     PetscCall(PetscFree(matrix.alloc_icol));
3519566063dSJacob Faibussowitsch     if (matrix.values) PetscCall(PetscFree(matrix.alloc_val));
3529ccc4a9bSBarry Smith   } else {
3539566063dSJacob Faibussowitsch     for (i = 0; i < matrix.nrows; i++) PetscCall(PetscFree(matrix.icols[i]));
3549566063dSJacob Faibussowitsch     PetscCall(PetscFree(matrix.icols));
3559ccc4a9bSBarry Smith     if (matrix.values) {
3569566063dSJacob Faibussowitsch       for (i = 0; i < matrix.nrows; i++) PetscCall(PetscFree(matrix.values[i]));
357b2f1ab58SBarry Smith     }
358b2f1ab58SBarry Smith   }
359b2f1ab58SBarry Smith 
3609566063dSJacob Faibussowitsch   PetscCall(PetscFree(matrix.row_nnz));
3619566063dSJacob Faibussowitsch   PetscCall(PetscFree(matrix.icols));
3629566063dSJacob Faibussowitsch   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) PetscCall(PetscFree(matrix.icol0));
3639566063dSJacob Faibussowitsch   PetscCall(PetscFree(matrix.values));
364b2f1ab58SBarry Smith   PetscFunctionReturn(0);
365b2f1ab58SBarry Smith }
366b2f1ab58SBarry Smith 
367b2f1ab58SBarry Smith /*
368b2f1ab58SBarry Smith spbas_matrix_to_crs:
369b2f1ab58SBarry Smith    Convert an spbas_matrix to compessed row storage
370b2f1ab58SBarry Smith */
3719371c9d4SSatish Balay PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A, MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out) {
372b2f1ab58SBarry Smith   PetscInt     nrows = matrix_A.nrows;
373b2f1ab58SBarry Smith   PetscInt     nnz   = matrix_A.nnz;
374b2f1ab58SBarry Smith   PetscInt     i, j, r_nnz, i0;
375b2f1ab58SBarry Smith   PetscInt    *irow;
376b2f1ab58SBarry Smith   PetscInt    *icol;
377b2f1ab58SBarry Smith   PetscInt    *icol_A;
378b2f1ab58SBarry Smith   MatScalar   *val;
379b2f1ab58SBarry Smith   PetscScalar *val_A;
380b2f1ab58SBarry Smith   PetscInt     col_idx_type = matrix_A.col_idx_type;
381ace3abfcSBarry Smith   PetscBool    do_values    = matrix_A.values ? PETSC_TRUE : PETSC_FALSE;
382b2f1ab58SBarry Smith 
383b2f1ab58SBarry Smith   PetscFunctionBegin;
3849566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nrows + 1, &irow));
3859566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nnz, &icol));
3869ccc4a9bSBarry Smith   *icol_out = icol;
3879ccc4a9bSBarry Smith   *irow_out = irow;
3889ccc4a9bSBarry Smith   if (do_values) {
3899566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nnz, &val));
3909371c9d4SSatish Balay     *val_out  = val;
3919371c9d4SSatish Balay     *icol_out = icol;
3929371c9d4SSatish Balay     *irow_out = irow;
393b2f1ab58SBarry Smith   }
394b2f1ab58SBarry Smith 
395b2f1ab58SBarry Smith   irow[0] = 0;
3969ccc4a9bSBarry Smith   for (i = 0; i < nrows; i++) {
397b2f1ab58SBarry Smith     r_nnz       = matrix_A.row_nnz[i];
398b2f1ab58SBarry Smith     i0          = irow[i];
399b2f1ab58SBarry Smith     irow[i + 1] = i0 + r_nnz;
400b2f1ab58SBarry Smith     icol_A      = matrix_A.icols[i];
401b2f1ab58SBarry Smith 
4029ccc4a9bSBarry Smith     if (do_values) {
403b2f1ab58SBarry Smith       val_A = matrix_A.values[i];
4049ccc4a9bSBarry Smith       for (j = 0; j < r_nnz; j++) {
405b2f1ab58SBarry Smith         icol[i0 + j] = icol_A[j];
406b2f1ab58SBarry Smith         val[i0 + j]  = val_A[j];
407b2f1ab58SBarry Smith       }
4089ccc4a9bSBarry Smith     } else {
4092205254eSKarl Rupp       for (j = 0; j < r_nnz; j++) icol[i0 + j] = icol_A[j];
410b2f1ab58SBarry Smith     }
411b2f1ab58SBarry Smith 
4129ccc4a9bSBarry Smith     if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
4132205254eSKarl Rupp       for (j = 0; j < r_nnz; j++) icol[i0 + j] += i;
4142205254eSKarl Rupp     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
415b2f1ab58SBarry Smith       i0 = matrix_A.icol0[i];
4162205254eSKarl Rupp       for (j = 0; j < r_nnz; j++) icol[i0 + j] += i0;
417b2f1ab58SBarry Smith     }
418b2f1ab58SBarry Smith   }
419b2f1ab58SBarry Smith   PetscFunctionReturn(0);
420b2f1ab58SBarry Smith }
421b2f1ab58SBarry Smith 
422b2f1ab58SBarry Smith /*
423b2f1ab58SBarry Smith     spbas_transpose
424b2f1ab58SBarry Smith        return the transpose of a matrix
425b2f1ab58SBarry Smith */
4269371c9d4SSatish Balay PetscErrorCode spbas_transpose(spbas_matrix in_matrix, spbas_matrix *result) {
427b2f1ab58SBarry Smith   PetscInt     col_idx_type = in_matrix.col_idx_type;
428b2f1ab58SBarry Smith   PetscInt     nnz          = in_matrix.nnz;
429b2f1ab58SBarry Smith   PetscInt     ncols        = in_matrix.nrows;
430b2f1ab58SBarry Smith   PetscInt     nrows        = in_matrix.ncols;
431b2f1ab58SBarry Smith   PetscInt     i, j, k;
432b2f1ab58SBarry Smith   PetscInt     r_nnz;
433b2f1ab58SBarry Smith   PetscInt    *irow;
4344efc9174SBarry Smith   PetscInt     icol0 = 0;
435b2f1ab58SBarry Smith   PetscScalar *val;
4364e1ff37aSBarry Smith 
437b2f1ab58SBarry Smith   PetscFunctionBegin;
438c328eeadSBarry Smith   /* Copy input values */
439b2f1ab58SBarry Smith   result->nrows        = nrows;
440b2f1ab58SBarry Smith   result->ncols        = ncols;
441b2f1ab58SBarry Smith   result->nnz          = nnz;
442b2f1ab58SBarry Smith   result->col_idx_type = SPBAS_COLUMN_NUMBERS;
443b2f1ab58SBarry Smith   result->block_data   = PETSC_TRUE;
444b2f1ab58SBarry Smith 
445c328eeadSBarry Smith   /* Allocate sparseness pattern */
4469566063dSJacob Faibussowitsch   PetscCall(spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE));
447b2f1ab58SBarry Smith 
448c328eeadSBarry Smith   /*  Count the number of nonzeros in each row */
4492205254eSKarl Rupp   for (i = 0; i < nrows; i++) result->row_nnz[i] = 0;
450b2f1ab58SBarry Smith 
4519ccc4a9bSBarry Smith   for (i = 0; i < ncols; i++) {
452b2f1ab58SBarry Smith     r_nnz = in_matrix.row_nnz[i];
453b2f1ab58SBarry Smith     irow  = in_matrix.icols[i];
4549ccc4a9bSBarry Smith     if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
4552205254eSKarl Rupp       for (j = 0; j < r_nnz; j++) result->row_nnz[irow[j]]++;
4569ccc4a9bSBarry Smith     } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
4572205254eSKarl Rupp       for (j = 0; j < r_nnz; j++) result->row_nnz[i + irow[j]]++;
4589ccc4a9bSBarry Smith     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
459b2f1ab58SBarry Smith       icol0 = in_matrix.icol0[i];
4602205254eSKarl Rupp       for (j = 0; j < r_nnz; j++) result->row_nnz[icol0 + irow[j]]++;
461b2f1ab58SBarry Smith     }
462b2f1ab58SBarry Smith   }
463b2f1ab58SBarry Smith 
464c328eeadSBarry Smith   /* Set the pointers to the data */
4659566063dSJacob Faibussowitsch   PetscCall(spbas_allocate_data(result));
466b2f1ab58SBarry Smith 
467c328eeadSBarry Smith   /* Reset the number of nonzeros in each row */
4682205254eSKarl Rupp   for (i = 0; i < nrows; i++) result->row_nnz[i] = 0;
469b2f1ab58SBarry Smith 
470c328eeadSBarry Smith   /* Fill the data arrays */
4719ccc4a9bSBarry Smith   if (in_matrix.values) {
4729ccc4a9bSBarry Smith     for (i = 0; i < ncols; i++) {
473b2f1ab58SBarry Smith       r_nnz = in_matrix.row_nnz[i];
474b2f1ab58SBarry Smith       irow  = in_matrix.icols[i];
475b2f1ab58SBarry Smith       val   = in_matrix.values[i];
476b2f1ab58SBarry Smith 
4772205254eSKarl Rupp       if (col_idx_type == SPBAS_COLUMN_NUMBERS) icol0 = 0;
4782205254eSKarl Rupp       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i;
4792205254eSKarl Rupp       else if (col_idx_type == SPBAS_OFFSET_ARRAY) icol0 = in_matrix.icol0[i];
4809ccc4a9bSBarry Smith       for (j = 0; j < r_nnz; j++) {
481b2f1ab58SBarry Smith         k                                     = icol0 + irow[j];
482b2f1ab58SBarry Smith         result->icols[k][result->row_nnz[k]]  = i;
483b2f1ab58SBarry Smith         result->values[k][result->row_nnz[k]] = val[j];
484b2f1ab58SBarry Smith         result->row_nnz[k]++;
485b2f1ab58SBarry Smith       }
486b2f1ab58SBarry Smith     }
4879ccc4a9bSBarry Smith   } else {
4889ccc4a9bSBarry Smith     for (i = 0; i < ncols; i++) {
489b2f1ab58SBarry Smith       r_nnz = in_matrix.row_nnz[i];
490b2f1ab58SBarry Smith       irow  = in_matrix.icols[i];
491b2f1ab58SBarry Smith 
4922205254eSKarl Rupp       if (col_idx_type == SPBAS_COLUMN_NUMBERS) icol0 = 0;
4932205254eSKarl Rupp       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i;
4942205254eSKarl Rupp       else if (col_idx_type == SPBAS_OFFSET_ARRAY) icol0 = in_matrix.icol0[i];
495b2f1ab58SBarry Smith 
4969ccc4a9bSBarry Smith       for (j = 0; j < r_nnz; j++) {
497b2f1ab58SBarry Smith         k                                    = icol0 + irow[j];
498b2f1ab58SBarry Smith         result->icols[k][result->row_nnz[k]] = i;
499b2f1ab58SBarry Smith         result->row_nnz[k]++;
500b2f1ab58SBarry Smith       }
501b2f1ab58SBarry Smith     }
502b2f1ab58SBarry Smith   }
503b2f1ab58SBarry Smith   PetscFunctionReturn(0);
504b2f1ab58SBarry Smith }
505b2f1ab58SBarry Smith 
506b2f1ab58SBarry Smith /*
507b2f1ab58SBarry Smith    spbas_mergesort
508b2f1ab58SBarry Smith 
509bb42e86aSJed Brown       mergesort for an array of integers and an array of associated
510b2f1ab58SBarry Smith       reals
511b2f1ab58SBarry Smith 
512b2f1ab58SBarry Smith       on output, icol[0..nnz-1] is increasing;
513b2f1ab58SBarry Smith                   val[0..nnz-1] has undergone the same permutation as icol
514b2f1ab58SBarry Smith 
5150298fd71SBarry Smith       NB: val may be NULL: in that case, only the integers are sorted
516b2f1ab58SBarry Smith 
517b2f1ab58SBarry Smith */
5189371c9d4SSatish Balay PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val) {
519c328eeadSBarry Smith   PetscInt     istep;                /* Chunk-sizes of already sorted parts of arrays */
520c328eeadSBarry Smith   PetscInt     i, i1, i2;            /* Loop counters for (partly) sorted arrays */
521c328eeadSBarry Smith   PetscInt     istart, i1end, i2end; /* start of newly sorted array part, end of both parts */
522c328eeadSBarry Smith   PetscInt    *ialloc;               /* Allocated arrays */
5230298fd71SBarry Smith   PetscScalar *valloc = NULL;
524c328eeadSBarry Smith   PetscInt    *iswap; /* auxiliary pointers for swapping */
525b2f1ab58SBarry Smith   PetscScalar *vswap;
526c328eeadSBarry Smith   PetscInt    *ihlp1;        /* Pointers to new version of arrays, */
5270298fd71SBarry Smith   PetscScalar *vhlp1 = NULL; /* (arrays under construction) */
528c328eeadSBarry Smith   PetscInt    *ihlp2;        /* Pointers to previous version of arrays, */
5290298fd71SBarry Smith   PetscScalar *vhlp2 = NULL;
530b2f1ab58SBarry Smith 
531362febeeSStefano Zampini   PetscFunctionBegin;
5329566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nnz, &ialloc));
533b2f1ab58SBarry Smith   ihlp1 = ialloc;
534b2f1ab58SBarry Smith   ihlp2 = icol;
535b2f1ab58SBarry Smith 
5369ccc4a9bSBarry Smith   if (val) {
5379566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nnz, &valloc));
538b2f1ab58SBarry Smith     vhlp1 = valloc;
539b2f1ab58SBarry Smith     vhlp2 = val;
540b2f1ab58SBarry Smith   }
541b2f1ab58SBarry Smith 
542c328eeadSBarry Smith   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
5439ccc4a9bSBarry Smith   for (istep = 1; istep < nnz; istep *= 2) {
544c328eeadSBarry Smith     /*
545c328eeadSBarry Smith       Combine sorted parts
546c328eeadSBarry Smith           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
547c328eeadSBarry Smith       of ihlp2 and vhlp2
548c328eeadSBarry Smith 
549c328eeadSBarry Smith       into one sorted part
550c328eeadSBarry Smith           istart:istart+2*istep-1
551c328eeadSBarry Smith       of ihlp1 and vhlp1
552c328eeadSBarry Smith     */
5539ccc4a9bSBarry Smith     for (istart = 0; istart < nnz; istart += 2 * istep) {
554c328eeadSBarry Smith       /* Set counters and bound array part endings */
5559371c9d4SSatish Balay       i1    = istart;
5569371c9d4SSatish Balay       i1end = i1 + istep;
5579371c9d4SSatish Balay       if (i1end > nnz) i1end = nnz;
5589371c9d4SSatish Balay       i2    = istart + istep;
5599371c9d4SSatish Balay       i2end = i2 + istep;
5609371c9d4SSatish Balay       if (i2end > nnz) i2end = nnz;
561b2f1ab58SBarry Smith 
562c328eeadSBarry Smith       /* Merge the two array parts */
5639ccc4a9bSBarry Smith       if (val) {
5649ccc4a9bSBarry Smith         for (i = istart; i < i2end; i++) {
5659ccc4a9bSBarry Smith           if (i1 < i1end && i2 < i2end && ihlp2[i1] < ihlp2[i2]) {
566b2f1ab58SBarry Smith             ihlp1[i] = ihlp2[i1];
567b2f1ab58SBarry Smith             vhlp1[i] = vhlp2[i1];
568b2f1ab58SBarry Smith             i1++;
5699ccc4a9bSBarry Smith           } else if (i2 < i2end) {
570b2f1ab58SBarry Smith             ihlp1[i] = ihlp2[i2];
571b2f1ab58SBarry Smith             vhlp1[i] = vhlp2[i2];
572b2f1ab58SBarry Smith             i2++;
5739ccc4a9bSBarry Smith           } else {
574b2f1ab58SBarry Smith             ihlp1[i] = ihlp2[i1];
575b2f1ab58SBarry Smith             vhlp1[i] = vhlp2[i1];
576b2f1ab58SBarry Smith             i1++;
577b2f1ab58SBarry Smith           }
578b2f1ab58SBarry Smith         }
5799ccc4a9bSBarry Smith       } else {
5806322f4bdSBarry Smith         for (i = istart; i < i2end; i++) {
5816322f4bdSBarry Smith           if (i1 < i1end && i2 < i2end && ihlp2[i1] < ihlp2[i2]) {
582b2f1ab58SBarry Smith             ihlp1[i] = ihlp2[i1];
583b2f1ab58SBarry Smith             i1++;
5846322f4bdSBarry Smith           } else if (i2 < i2end) {
585b2f1ab58SBarry Smith             ihlp1[i] = ihlp2[i2];
586b2f1ab58SBarry Smith             i2++;
5876322f4bdSBarry Smith           } else {
588b2f1ab58SBarry Smith             ihlp1[i] = ihlp2[i1];
589b2f1ab58SBarry Smith             i1++;
590b2f1ab58SBarry Smith           }
591b2f1ab58SBarry Smith         }
592b2f1ab58SBarry Smith       }
593b2f1ab58SBarry Smith     }
594b2f1ab58SBarry Smith 
595c328eeadSBarry Smith     /* Swap the two array sets */
5969371c9d4SSatish Balay     iswap = ihlp2;
5979371c9d4SSatish Balay     ihlp2 = ihlp1;
5989371c9d4SSatish Balay     ihlp1 = iswap;
5999371c9d4SSatish Balay     vswap = vhlp2;
6009371c9d4SSatish Balay     vhlp2 = vhlp1;
6019371c9d4SSatish Balay     vhlp1 = vswap;
602b2f1ab58SBarry Smith   }
603b2f1ab58SBarry Smith 
604c328eeadSBarry Smith   /* Copy one more time in case the sorted arrays are the temporary ones */
6056322f4bdSBarry Smith   if (ihlp2 != icol) {
6062205254eSKarl Rupp     for (i = 0; i < nnz; i++) icol[i] = ihlp2[i];
6076322f4bdSBarry Smith     if (val) {
6082205254eSKarl Rupp       for (i = 0; i < nnz; i++) val[i] = vhlp2[i];
609b2f1ab58SBarry Smith     }
610b2f1ab58SBarry Smith   }
611b2f1ab58SBarry Smith 
6129566063dSJacob Faibussowitsch   PetscCall(PetscFree(ialloc));
6139566063dSJacob Faibussowitsch   if (val) PetscCall(PetscFree(valloc));
614b2f1ab58SBarry Smith   PetscFunctionReturn(0);
615b2f1ab58SBarry Smith }
616b2f1ab58SBarry Smith 
617b2f1ab58SBarry Smith /*
618b2f1ab58SBarry Smith   spbas_apply_reordering_rows:
619b2f1ab58SBarry Smith     apply the given reordering to the rows:  matrix_A = matrix_A(perm,:);
620b2f1ab58SBarry Smith */
6219371c9d4SSatish Balay PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation) {
622b2f1ab58SBarry Smith   PetscInt      i, j, ip;
623b2f1ab58SBarry Smith   PetscInt      nrows = matrix_A->nrows;
624b2f1ab58SBarry Smith   PetscInt     *row_nnz;
625b2f1ab58SBarry Smith   PetscInt    **icols;
626ace3abfcSBarry Smith   PetscBool     do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
6270298fd71SBarry Smith   PetscScalar **vals      = NULL;
628b2f1ab58SBarry Smith 
629b2f1ab58SBarry Smith   PetscFunctionBegin;
63008401ef6SPierre Jolivet   PetscCheck(matrix_A->col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern");
631b2f1ab58SBarry Smith 
632*48a46eb9SPierre Jolivet   if (do_values) PetscCall(PetscMalloc1(nrows, &vals));
6339566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nrows, &row_nnz));
6349566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nrows, &icols));
635b2f1ab58SBarry Smith 
6366322f4bdSBarry Smith   for (i = 0; i < nrows; i++) {
637b2f1ab58SBarry Smith     ip = permutation[i];
6382205254eSKarl Rupp     if (do_values) vals[i] = matrix_A->values[ip];
639b2f1ab58SBarry Smith     icols[i]   = matrix_A->icols[ip];
640b2f1ab58SBarry Smith     row_nnz[i] = matrix_A->row_nnz[ip];
6412205254eSKarl Rupp     for (j = 0; j < row_nnz[i]; j++) icols[i][j] += ip - i;
642b2f1ab58SBarry Smith   }
643b2f1ab58SBarry Smith 
6449566063dSJacob Faibussowitsch   if (do_values) PetscCall(PetscFree(matrix_A->values));
6459566063dSJacob Faibussowitsch   PetscCall(PetscFree(matrix_A->icols));
6469566063dSJacob Faibussowitsch   PetscCall(PetscFree(matrix_A->row_nnz));
647b2f1ab58SBarry Smith 
6482205254eSKarl Rupp   if (do_values) matrix_A->values = vals;
649b2f1ab58SBarry Smith   matrix_A->icols   = icols;
650b2f1ab58SBarry Smith   matrix_A->row_nnz = row_nnz;
651b2f1ab58SBarry Smith   PetscFunctionReturn(0);
652b2f1ab58SBarry Smith }
653b2f1ab58SBarry Smith 
654b2f1ab58SBarry Smith /*
655b2f1ab58SBarry Smith   spbas_apply_reordering_cols:
656b2f1ab58SBarry Smith     apply the given reordering to the columns:  matrix_A(:,perm) = matrix_A;
657b2f1ab58SBarry Smith */
6589371c9d4SSatish Balay PetscErrorCode spbas_apply_reordering_cols(spbas_matrix *matrix_A, const PetscInt *permutation) {
659b2f1ab58SBarry Smith   PetscInt     i, j;
660b2f1ab58SBarry Smith   PetscInt     nrows = matrix_A->nrows;
661b2f1ab58SBarry Smith   PetscInt     row_nnz;
662b2f1ab58SBarry Smith   PetscInt    *icols;
663ace3abfcSBarry Smith   PetscBool    do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
6640298fd71SBarry Smith   PetscScalar *vals      = NULL;
665b2f1ab58SBarry Smith 
666b2f1ab58SBarry Smith   PetscFunctionBegin;
66708401ef6SPierre Jolivet   PetscCheck(matrix_A->col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern");
668b2f1ab58SBarry Smith 
6696322f4bdSBarry Smith   for (i = 0; i < nrows; i++) {
670b2f1ab58SBarry Smith     icols   = matrix_A->icols[i];
671b2f1ab58SBarry Smith     row_nnz = matrix_A->row_nnz[i];
6722205254eSKarl Rupp     if (do_values) vals = matrix_A->values[i];
673b2f1ab58SBarry Smith 
6749371c9d4SSatish Balay     for (j = 0; j < row_nnz; j++) { icols[j] = permutation[i + icols[j]] - i; }
6759566063dSJacob Faibussowitsch     PetscCall(spbas_mergesort(row_nnz, icols, vals));
676b2f1ab58SBarry Smith   }
677b2f1ab58SBarry Smith   PetscFunctionReturn(0);
678b2f1ab58SBarry Smith }
679b2f1ab58SBarry Smith 
680b2f1ab58SBarry Smith /*
681b2f1ab58SBarry Smith   spbas_apply_reordering:
682b2f1ab58SBarry Smith     apply the given reordering:  matrix_A(perm,perm) = matrix_A;
683b2f1ab58SBarry Smith */
6849371c9d4SSatish Balay PetscErrorCode spbas_apply_reordering(spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt *inv_perm) {
685b2f1ab58SBarry Smith   PetscFunctionBegin;
6869566063dSJacob Faibussowitsch   PetscCall(spbas_apply_reordering_rows(matrix_A, inv_perm));
6879566063dSJacob Faibussowitsch   PetscCall(spbas_apply_reordering_cols(matrix_A, permutation));
688b2f1ab58SBarry Smith   PetscFunctionReturn(0);
689b2f1ab58SBarry Smith }
690b2f1ab58SBarry Smith 
6919371c9d4SSatish Balay PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix *result) {
692b2f1ab58SBarry Smith   spbas_matrix retval;
693b2f1ab58SBarry Smith   PetscInt     i, j, i0, r_nnz;
694b2f1ab58SBarry Smith 
695b2f1ab58SBarry Smith   PetscFunctionBegin;
696c328eeadSBarry Smith   /* Copy input values */
697b2f1ab58SBarry Smith   retval.nrows = nrows;
698b2f1ab58SBarry Smith   retval.ncols = ncols;
699b2f1ab58SBarry Smith   retval.nnz   = ai[nrows];
700b2f1ab58SBarry Smith 
701b2f1ab58SBarry Smith   retval.block_data   = PETSC_TRUE;
702b2f1ab58SBarry Smith   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
703b2f1ab58SBarry Smith 
704c328eeadSBarry Smith   /* Allocate output matrix */
7059566063dSJacob Faibussowitsch   PetscCall(spbas_allocate_pattern(&retval, PETSC_FALSE));
7062205254eSKarl Rupp   for (i = 0; i < nrows; i++) retval.row_nnz[i] = ai[i + 1] - ai[i];
7079566063dSJacob Faibussowitsch   PetscCall(spbas_allocate_data(&retval));
708c328eeadSBarry Smith   /* Copy the structure */
7096322f4bdSBarry Smith   for (i = 0; i < retval.nrows; i++) {
710b2f1ab58SBarry Smith     i0    = ai[i];
711b2f1ab58SBarry Smith     r_nnz = ai[i + 1] - i0;
712b2f1ab58SBarry Smith 
7139371c9d4SSatish Balay     for (j = 0; j < r_nnz; j++) { retval.icols[i][j] = aj[i0 + j] - i; }
714b2f1ab58SBarry Smith   }
715b2f1ab58SBarry Smith   *result = retval;
716b2f1ab58SBarry Smith   PetscFunctionReturn(0);
717b2f1ab58SBarry Smith }
718b2f1ab58SBarry Smith 
719b2f1ab58SBarry Smith /*
720b2f1ab58SBarry Smith    spbas_mark_row_power:
721b2f1ab58SBarry Smith       Mark the columns in row 'row' which are nonzero in
722b2f1ab58SBarry Smith           matrix^2log(marker).
723b2f1ab58SBarry Smith */
724be332245SKarl Rupp PetscErrorCode spbas_mark_row_power(PetscInt     *iwork,     /* marker-vector */
725c328eeadSBarry Smith                                     PetscInt      row,       /* row for which the columns are marked */
726c328eeadSBarry Smith                                     spbas_matrix *in_matrix, /* matrix for which the power is being  calculated */
727c328eeadSBarry Smith                                     PetscInt      marker,    /* marker-value: 2^power */
728c328eeadSBarry Smith                                     PetscInt      minmrk,    /* lower bound for marked points */
729c328eeadSBarry Smith                                     PetscInt      maxmrk)         /* upper bound for marked points */
730b2f1ab58SBarry Smith {
731b2f1ab58SBarry Smith   PetscInt i, j, nnz;
732b2f1ab58SBarry Smith 
733b2f1ab58SBarry Smith   PetscFunctionBegin;
734b2f1ab58SBarry Smith   nnz = in_matrix->row_nnz[row];
735b2f1ab58SBarry Smith 
736c328eeadSBarry Smith   /* For higher powers, call this function recursively */
7376322f4bdSBarry Smith   if (marker > 1) {
7386322f4bdSBarry Smith     for (i = 0; i < nnz; i++) {
739b2f1ab58SBarry Smith       j = row + in_matrix->icols[row][i];
7406322f4bdSBarry Smith       if (minmrk <= j && j < maxmrk && iwork[j] < marker) {
7419566063dSJacob Faibussowitsch         PetscCall(spbas_mark_row_power(iwork, row + in_matrix->icols[row][i], in_matrix, marker / 2, minmrk, maxmrk));
742b2f1ab58SBarry Smith         iwork[j] |= marker;
743b2f1ab58SBarry Smith       }
744b2f1ab58SBarry Smith     }
7456322f4bdSBarry Smith   } else {
746c328eeadSBarry Smith     /*  Mark the columns reached */
7476322f4bdSBarry Smith     for (i = 0; i < nnz; i++) {
748b2f1ab58SBarry Smith       j = row + in_matrix->icols[row][i];
7492205254eSKarl Rupp       if (minmrk <= j && j < maxmrk) iwork[j] |= 1;
750b2f1ab58SBarry Smith     }
751b2f1ab58SBarry Smith   }
752b2f1ab58SBarry Smith   PetscFunctionReturn(0);
753b2f1ab58SBarry Smith }
754b2f1ab58SBarry Smith 
755b2f1ab58SBarry Smith /*
756b2f1ab58SBarry Smith    spbas_power
757b2f1ab58SBarry Smith       Calculate sparseness patterns for incomplete Cholesky decompositions
758b2f1ab58SBarry Smith       of a given order: (almost) all nonzeros of the matrix^(order+1) which
759b2f1ab58SBarry Smith       are inside the band width are found and stored in the output sparseness
760b2f1ab58SBarry Smith       pattern.
761b2f1ab58SBarry Smith */
7629371c9d4SSatish Balay PetscErrorCode spbas_power(spbas_matrix in_matrix, PetscInt power, spbas_matrix *result) {
763b2f1ab58SBarry Smith   spbas_matrix retval;
764b2f1ab58SBarry Smith   PetscInt     nrows = in_matrix.nrows;
765b2f1ab58SBarry Smith   PetscInt     ncols = in_matrix.ncols;
766b2f1ab58SBarry Smith   PetscInt     i, j, kend;
767b2f1ab58SBarry Smith   PetscInt     nnz, inz;
768b2f1ab58SBarry Smith   PetscInt    *iwork;
769b2f1ab58SBarry Smith   PetscInt     marker;
770b2f1ab58SBarry Smith   PetscInt     maxmrk = 0;
771b2f1ab58SBarry Smith 
772b2f1ab58SBarry Smith   PetscFunctionBegin;
77308401ef6SPierre Jolivet   PetscCheck(in_matrix.col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern");
77408401ef6SPierre Jolivet   PetscCheck(ncols == nrows, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Dimension error");
775aed4548fSBarry Smith   PetscCheck(!in_matrix.values, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Input array must be sparseness pattern (no values)");
77608401ef6SPierre Jolivet   PetscCheck(power > 0, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Power must be 1 or up");
777b2f1ab58SBarry Smith 
778c328eeadSBarry Smith   /* Copy input values*/
779b2f1ab58SBarry Smith   retval.nrows        = ncols;
780b2f1ab58SBarry Smith   retval.ncols        = nrows;
781b2f1ab58SBarry Smith   retval.nnz          = 0;
782b2f1ab58SBarry Smith   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
783b2f1ab58SBarry Smith   retval.block_data   = PETSC_FALSE;
784b2f1ab58SBarry Smith 
785c328eeadSBarry Smith   /* Allocate sparseness pattern */
7869566063dSJacob Faibussowitsch   PetscCall(spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE));
787b2f1ab58SBarry Smith 
788580bdb30SBarry Smith   /* Allocate marker array: note sure the max needed so use the max of the two */
7899566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(PetscMax(ncols, nrows), &iwork));
790b2f1ab58SBarry Smith 
791c328eeadSBarry Smith   /* Calculate marker values */
7929371c9d4SSatish Balay   marker = 1;
7939371c9d4SSatish Balay   for (i = 1; i < power; i++) marker *= 2;
794b2f1ab58SBarry Smith 
7956322f4bdSBarry Smith   for (i = 0; i < nrows; i++) {
796c328eeadSBarry Smith     /* Calculate the pattern for each row */
797b2f1ab58SBarry Smith 
798b2f1ab58SBarry Smith     nnz  = in_matrix.row_nnz[i];
799b2f1ab58SBarry Smith     kend = i + in_matrix.icols[i][nnz - 1];
8002205254eSKarl Rupp     if (maxmrk <= kend) maxmrk = kend + 1;
8019566063dSJacob Faibussowitsch     PetscCall(spbas_mark_row_power(iwork, i, &in_matrix, marker, i, maxmrk));
802b2f1ab58SBarry Smith 
803c328eeadSBarry Smith     /* Count the columns*/
804b2f1ab58SBarry Smith     nnz = 0;
8052205254eSKarl Rupp     for (j = i; j < maxmrk; j++) nnz += (iwork[j] != 0);
806b2f1ab58SBarry Smith 
807c328eeadSBarry Smith     /* Allocate the column indices */
808b2f1ab58SBarry Smith     retval.row_nnz[i] = nnz;
8099566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nnz, &retval.icols[i]));
810b2f1ab58SBarry Smith 
811c328eeadSBarry Smith     /* Administrate the column indices */
812b2f1ab58SBarry Smith     inz = 0;
8136322f4bdSBarry Smith     for (j = i; j < maxmrk; j++) {
8146322f4bdSBarry Smith       if (iwork[j]) {
815b2f1ab58SBarry Smith         retval.icols[i][inz] = j - i;
816b2f1ab58SBarry Smith         inz++;
817b2f1ab58SBarry Smith         iwork[j] = 0;
818b2f1ab58SBarry Smith       }
819b2f1ab58SBarry Smith     }
820b2f1ab58SBarry Smith     retval.nnz += nnz;
821b2f1ab58SBarry Smith   };
8229566063dSJacob Faibussowitsch   PetscCall(PetscFree(iwork));
823b2f1ab58SBarry Smith   *result = retval;
824b2f1ab58SBarry Smith   PetscFunctionReturn(0);
825b2f1ab58SBarry Smith }
826b2f1ab58SBarry Smith 
827b2f1ab58SBarry Smith /*
828b2f1ab58SBarry Smith    spbas_keep_upper:
829b2f1ab58SBarry Smith       remove the lower part of the matrix: keep the upper part
830b2f1ab58SBarry Smith */
8319371c9d4SSatish Balay PetscErrorCode spbas_keep_upper(spbas_matrix *inout_matrix) {
832b2f1ab58SBarry Smith   PetscInt i, j;
833b2f1ab58SBarry Smith   PetscInt jstart;
834b2f1ab58SBarry Smith 
835b2f1ab58SBarry Smith   PetscFunctionBegin;
8365f80ce2aSJacob Faibussowitsch   PetscCheck(!inout_matrix->block_data, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Not yet for block data matrices");
8376322f4bdSBarry Smith   for (i = 0; i < inout_matrix->nrows; i++) {
8386322f4bdSBarry Smith     for (jstart = 0; (jstart < inout_matrix->row_nnz[i]) && (inout_matrix->icols[i][jstart] < 0); jstart++) { }
8396322f4bdSBarry Smith     if (jstart > 0) {
8409371c9d4SSatish Balay       for (j = 0; j < inout_matrix->row_nnz[i] - jstart; j++) { inout_matrix->icols[i][j] = inout_matrix->icols[i][j + jstart]; }
841b2f1ab58SBarry Smith 
8426322f4bdSBarry Smith       if (inout_matrix->values) {
8439371c9d4SSatish Balay         for (j = 0; j < inout_matrix->row_nnz[i] - jstart; j++) { inout_matrix->values[i][j] = inout_matrix->values[i][j + jstart]; }
844b2f1ab58SBarry Smith       }
845b2f1ab58SBarry Smith 
846b2f1ab58SBarry Smith       inout_matrix->row_nnz[i] -= jstart;
847b2f1ab58SBarry Smith 
8486322f4bdSBarry Smith       inout_matrix->icols[i] = (PetscInt *)realloc((void *)inout_matrix->icols[i], inout_matrix->row_nnz[i] * sizeof(PetscInt));
849b2f1ab58SBarry Smith 
8509371c9d4SSatish Balay       if (inout_matrix->values) { inout_matrix->values[i] = (PetscScalar *)realloc((void *)inout_matrix->values[i], inout_matrix->row_nnz[i] * sizeof(PetscScalar)); }
851b2f1ab58SBarry Smith       inout_matrix->nnz -= jstart;
852b2f1ab58SBarry Smith     }
853b2f1ab58SBarry Smith   }
854b2f1ab58SBarry Smith   PetscFunctionReturn(0);
855b2f1ab58SBarry Smith }
856