xref: /petsc/src/mat/impls/aij/seq/bas/spbas.c (revision 11a5261e40035b7c793f2783a2ba6c7cd4f3b077)
1c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>
2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/bas/spbas.h>
3b2f1ab58SBarry Smith 
4845006b9SBarry Smith /*MC
52692d6eeSBarry Smith   MATSOLVERBAS -  Provides ICC(k) with drop tolerance
6845006b9SBarry Smith 
7*11a5261eSBarry Smith   Works with `MATAIJ`  matrices
8845006b9SBarry Smith 
9845006b9SBarry Smith   Options Database Keys:
100053dfc8SBarry Smith + -pc_factor_levels <l> - number of levels of fill
110053dfc8SBarry Smith - -pc_factor_drop_tolerance - is not currently hooked up to do anything
12845006b9SBarry Smith 
130053dfc8SBarry Smith   Level: intermediate
14845006b9SBarry Smith 
15845006b9SBarry Smith    Contributed by: Bas van 't Hof
16845006b9SBarry Smith 
17*11a5261eSBarry Smith    Note:
1895452b02SPatrick Sanan     Since this currently hooked up to use drop tolerance it should produce the same factors and hence convergence as the PETSc ICC, for higher
190053dfc8SBarry Smith      levels of fill it does not. This needs to be investigated. Unless you are interested in drop tolerance ICC and willing to work through the code
200053dfc8SBarry Smith      we recommend not using this functionality.
210053dfc8SBarry Smith 
22db781477SPatrick Sanan .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `PCFactorSetLevels()`, `PCFactorSetDropTolerance()`
23845006b9SBarry Smith M*/
249ccc4a9bSBarry Smith 
25b2f1ab58SBarry Smith /*
26b2f1ab58SBarry Smith   spbas_memory_requirement:
27b2f1ab58SBarry Smith     Calculate the number of bytes needed to store tha matrix
28b2f1ab58SBarry Smith */
299371c9d4SSatish Balay size_t spbas_memory_requirement(spbas_matrix matrix) {
303dfa2556SBarry Smith   size_t memreq = 6 * sizeof(PetscInt) +             /* nrows, ncols, nnz, n_alloc_icol, n_alloc_val, col_idx_type */
31ace3abfcSBarry Smith                   sizeof(PetscBool) +                /* block_data */
32c328eeadSBarry Smith                   sizeof(PetscScalar **) +           /* values */
33c328eeadSBarry Smith                   sizeof(PetscScalar *) +            /* alloc_val */
343dfa2556SBarry Smith                   2 * sizeof(PetscInt **) +          /* icols, icols0 */
35c328eeadSBarry Smith                   2 * sizeof(PetscInt *) +           /* row_nnz, alloc_icol */
36c328eeadSBarry Smith                   matrix.nrows * sizeof(PetscInt) +  /* row_nnz[*] */
37c328eeadSBarry Smith                   matrix.nrows * sizeof(PetscInt *); /* icols[*] */
38b2f1ab58SBarry Smith 
39c328eeadSBarry Smith   /* icol0[*] */
402205254eSKarl Rupp   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) memreq += matrix.nrows * sizeof(PetscInt);
41b2f1ab58SBarry Smith 
42c328eeadSBarry Smith   /* icols[*][*] */
432205254eSKarl Rupp   if (matrix.block_data) memreq += matrix.n_alloc_icol * sizeof(PetscInt);
442205254eSKarl Rupp   else memreq += matrix.nnz * sizeof(PetscInt);
45b2f1ab58SBarry Smith 
464e1ff37aSBarry Smith   if (matrix.values) {
47c328eeadSBarry Smith     memreq += matrix.nrows * sizeof(PetscScalar *); /* values[*] */
48c328eeadSBarry Smith     /* values[*][*] */
492205254eSKarl Rupp     if (matrix.block_data) memreq += matrix.n_alloc_val * sizeof(PetscScalar);
502205254eSKarl Rupp     else memreq += matrix.nnz * sizeof(PetscScalar);
51b2f1ab58SBarry Smith   }
52b2f1ab58SBarry Smith   return memreq;
53b2f1ab58SBarry Smith }
54b2f1ab58SBarry Smith 
55b2f1ab58SBarry Smith /*
56b2f1ab58SBarry Smith   spbas_allocate_pattern:
57b2f1ab58SBarry Smith     allocate the pattern arrays row_nnz, icols and optionally values
58b2f1ab58SBarry Smith */
599371c9d4SSatish Balay PetscErrorCode spbas_allocate_pattern(spbas_matrix *result, PetscBool do_values) {
60b2f1ab58SBarry Smith   PetscInt nrows        = result->nrows;
61b2f1ab58SBarry Smith   PetscInt col_idx_type = result->col_idx_type;
62b2f1ab58SBarry Smith 
63b2f1ab58SBarry Smith   PetscFunctionBegin;
64c328eeadSBarry Smith   /* Allocate sparseness pattern */
659566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nrows, &result->row_nnz));
669566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nrows, &result->icols));
67b2f1ab58SBarry Smith 
68c328eeadSBarry Smith   /* If offsets are given wrt an array, create array */
694e1ff37aSBarry Smith   if (col_idx_type == SPBAS_OFFSET_ARRAY) {
709566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nrows, &result->icol0));
714e1ff37aSBarry Smith   } else {
720298fd71SBarry Smith     result->icol0 = NULL;
73b2f1ab58SBarry Smith   }
74b2f1ab58SBarry Smith 
75c328eeadSBarry Smith   /* If values are given, allocate values array */
764e1ff37aSBarry Smith   if (do_values) {
779566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nrows, &result->values));
784e1ff37aSBarry Smith   } else {
790298fd71SBarry Smith     result->values = NULL;
80b2f1ab58SBarry Smith   }
81b2f1ab58SBarry Smith   PetscFunctionReturn(0);
82b2f1ab58SBarry Smith }
83b2f1ab58SBarry Smith 
84b2f1ab58SBarry Smith /*
85b2f1ab58SBarry Smith spbas_allocate_data:
86b2f1ab58SBarry Smith    in case of block_data:
87b2f1ab58SBarry Smith        Allocate the data arrays alloc_icol and optionally alloc_val,
88b2f1ab58SBarry Smith        set appropriate pointers from icols and values;
89b2f1ab58SBarry Smith    in case of !block_data:
90b2f1ab58SBarry Smith        Allocate the arrays icols[i] and optionally values[i]
91b2f1ab58SBarry Smith */
929371c9d4SSatish Balay PetscErrorCode spbas_allocate_data(spbas_matrix *result) {
93b2f1ab58SBarry Smith   PetscInt  i;
94b2f1ab58SBarry Smith   PetscInt  nnz   = result->nnz;
95b2f1ab58SBarry Smith   PetscInt  nrows = result->nrows;
96b2f1ab58SBarry Smith   PetscInt  r_nnz;
976c4ed002SBarry Smith   PetscBool do_values  = (result->values) ? PETSC_TRUE : PETSC_FALSE;
98ace3abfcSBarry Smith   PetscBool block_data = result->block_data;
99b2f1ab58SBarry Smith 
100b2f1ab58SBarry Smith   PetscFunctionBegin;
1014e1ff37aSBarry Smith   if (block_data) {
102c328eeadSBarry Smith     /* Allocate the column number array and point to it */
103b2f1ab58SBarry Smith     result->n_alloc_icol = nnz;
1042205254eSKarl Rupp 
1059566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nnz, &result->alloc_icol));
1062205254eSKarl Rupp 
107b2f1ab58SBarry Smith     result->icols[0] = result->alloc_icol;
108ad540459SPierre Jolivet     for (i = 1; i < nrows; i++) result->icols[i] = result->icols[i - 1] + result->row_nnz[i - 1];
109b2f1ab58SBarry Smith 
110c328eeadSBarry Smith     /* Allocate the value array and point to it */
1114e1ff37aSBarry Smith     if (do_values) {
112b2f1ab58SBarry Smith       result->n_alloc_val = nnz;
1132205254eSKarl Rupp 
1149566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nnz, &result->alloc_val));
1152205254eSKarl Rupp 
116b2f1ab58SBarry Smith       result->values[0] = result->alloc_val;
117ad540459SPierre Jolivet       for (i = 1; i < nrows; i++) result->values[i] = result->values[i - 1] + result->row_nnz[i - 1];
118b2f1ab58SBarry Smith     }
1194e1ff37aSBarry Smith   } else {
1204e1ff37aSBarry Smith     for (i = 0; i < nrows; i++) {
121b2f1ab58SBarry Smith       r_nnz = result->row_nnz[i];
1229566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(r_nnz, &result->icols[i]));
123b2f1ab58SBarry Smith     }
1244e1ff37aSBarry Smith     if (do_values) {
1254e1ff37aSBarry Smith       for (i = 0; i < nrows; i++) {
126b2f1ab58SBarry Smith         r_nnz = result->row_nnz[i];
1279566063dSJacob Faibussowitsch         PetscCall(PetscMalloc1(r_nnz, &result->values[i]));
128b2f1ab58SBarry Smith       }
129b2f1ab58SBarry Smith     }
130b2f1ab58SBarry Smith   }
131b2f1ab58SBarry Smith   PetscFunctionReturn(0);
132b2f1ab58SBarry Smith }
133b2f1ab58SBarry Smith 
134b2f1ab58SBarry Smith /*
135b2f1ab58SBarry Smith   spbas_row_order_icol
136b2f1ab58SBarry Smith      determine if row i1 should come
137b2f1ab58SBarry Smith        + before row i2 in the sorted rows (return -1),
138b2f1ab58SBarry Smith        + after (return 1)
139b2f1ab58SBarry Smith        + is identical (return 0).
140b2f1ab58SBarry Smith */
1419371c9d4SSatish Balay int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in, PetscInt col_idx_type) {
142b2f1ab58SBarry Smith   PetscInt  j;
143b2f1ab58SBarry Smith   PetscInt  nnz1  = irow_in[i1 + 1] - irow_in[i1];
144b2f1ab58SBarry Smith   PetscInt  nnz2  = irow_in[i2 + 1] - irow_in[i2];
145b2f1ab58SBarry Smith   PetscInt *icol1 = &icol_in[irow_in[i1]];
146b2f1ab58SBarry Smith   PetscInt *icol2 = &icol_in[irow_in[i2]];
147b2f1ab58SBarry Smith 
1482205254eSKarl Rupp   if (nnz1 < nnz2) return -1;
1492205254eSKarl Rupp   if (nnz1 > nnz2) return 1;
150b2f1ab58SBarry Smith 
1514e1ff37aSBarry Smith   if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
1524e1ff37aSBarry Smith     for (j = 0; j < nnz1; j++) {
1532205254eSKarl Rupp       if (icol1[j] < icol2[j]) return -1;
1542205254eSKarl Rupp       if (icol1[j] > icol2[j]) return 1;
155b2f1ab58SBarry Smith     }
1564e1ff37aSBarry Smith   } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
1574e1ff37aSBarry Smith     for (j = 0; j < nnz1; j++) {
1582205254eSKarl Rupp       if (icol1[j] - i1 < icol2[j] - i2) return -1;
1592205254eSKarl Rupp       if (icol1[j] - i1 > icol2[j] - i2) return 1;
160b2f1ab58SBarry Smith     }
1614e1ff37aSBarry Smith   } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
1624e1ff37aSBarry Smith     for (j = 1; j < nnz1; j++) {
1632205254eSKarl Rupp       if (icol1[j] - icol1[0] < icol2[j] - icol2[0]) return -1;
1642205254eSKarl Rupp       if (icol1[j] - icol1[0] > icol2[j] - icol2[0]) return 1;
165b2f1ab58SBarry Smith     }
166b2f1ab58SBarry Smith   }
167b2f1ab58SBarry Smith   return 0;
168b2f1ab58SBarry Smith }
169b2f1ab58SBarry Smith 
170b2f1ab58SBarry Smith /*
171b2f1ab58SBarry Smith   spbas_mergesort_icols:
172b2f1ab58SBarry Smith     return a sorting of the rows in which identical sparseness patterns are
173b2f1ab58SBarry Smith     next to each other
174b2f1ab58SBarry Smith */
1759371c9d4SSatish Balay PetscErrorCode spbas_mergesort_icols(PetscInt nrows, PetscInt *irow_in, PetscInt *icol_in, PetscInt col_idx_type, PetscInt *isort) {
176c328eeadSBarry Smith   PetscInt  istep;                /* Chunk-sizes of already sorted parts of arrays */
177c328eeadSBarry Smith   PetscInt  i, i1, i2;            /* Loop counters for (partly) sorted arrays */
178c328eeadSBarry Smith   PetscInt  istart, i1end, i2end; /* start of newly sorted array part, end of both  parts */
179c328eeadSBarry Smith   PetscInt *ialloc;               /* Allocated arrays */
180c328eeadSBarry Smith   PetscInt *iswap;                /* auxiliary pointers for swapping */
181c328eeadSBarry Smith   PetscInt *ihlp1;                /* Pointers to new version of arrays, */
182c328eeadSBarry Smith   PetscInt *ihlp2;                /* Pointers to previous version of arrays, */
183b2f1ab58SBarry Smith 
184b2f1ab58SBarry Smith   PetscFunctionBegin;
1859566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nrows, &ialloc));
186b2f1ab58SBarry Smith 
187b2f1ab58SBarry Smith   ihlp1 = ialloc;
188b2f1ab58SBarry Smith   ihlp2 = isort;
189b2f1ab58SBarry Smith 
190c328eeadSBarry Smith   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
1914e1ff37aSBarry Smith   for (istep = 1; istep < nrows; istep *= 2) {
192c328eeadSBarry Smith     /*
193c328eeadSBarry Smith       Combine sorted parts
194c328eeadSBarry Smith           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
195c328eeadSBarry Smith       of ihlp2 and vhlp2
196c328eeadSBarry Smith 
197c328eeadSBarry Smith       into one sorted part
198c328eeadSBarry Smith           istart:istart+2*istep-1
199c328eeadSBarry Smith       of ihlp1 and vhlp1
200c328eeadSBarry Smith     */
2014e1ff37aSBarry Smith     for (istart = 0; istart < nrows; istart += 2 * istep) {
202c328eeadSBarry Smith       /* Set counters and bound array part endings */
2039371c9d4SSatish Balay       i1    = istart;
2049371c9d4SSatish Balay       i1end = i1 + istep;
2059371c9d4SSatish Balay       if (i1end > nrows) i1end = nrows;
2069371c9d4SSatish Balay       i2    = istart + istep;
2079371c9d4SSatish Balay       i2end = i2 + istep;
2089371c9d4SSatish Balay       if (i2end > nrows) i2end = nrows;
209b2f1ab58SBarry Smith 
210c328eeadSBarry Smith       /* Merge the two array parts */
2114e1ff37aSBarry Smith       for (i = istart; i < i2end; i++) {
2124e1ff37aSBarry Smith         if (i1 < i1end && i2 < i2end && spbas_row_order_icol(ihlp2[i1], ihlp2[i2], irow_in, icol_in, col_idx_type) < 0) {
213b2f1ab58SBarry Smith           ihlp1[i] = ihlp2[i1];
214b2f1ab58SBarry Smith           i1++;
2154e1ff37aSBarry Smith         } else if (i2 < i2end) {
216b2f1ab58SBarry Smith           ihlp1[i] = ihlp2[i2];
217b2f1ab58SBarry Smith           i2++;
2184e1ff37aSBarry Smith         } else {
219b2f1ab58SBarry Smith           ihlp1[i] = ihlp2[i1];
220b2f1ab58SBarry Smith           i1++;
221b2f1ab58SBarry Smith         }
222b2f1ab58SBarry Smith       }
223b2f1ab58SBarry Smith     }
224b2f1ab58SBarry Smith 
225c328eeadSBarry Smith     /* Swap the two array sets */
2269371c9d4SSatish Balay     iswap = ihlp2;
2279371c9d4SSatish Balay     ihlp2 = ihlp1;
2289371c9d4SSatish Balay     ihlp1 = iswap;
229b2f1ab58SBarry Smith   }
230b2f1ab58SBarry Smith 
231c328eeadSBarry Smith   /* Copy one more time in case the sorted arrays are the temporary ones */
2324e1ff37aSBarry Smith   if (ihlp2 != isort) {
2332205254eSKarl Rupp     for (i = 0; i < nrows; i++) isort[i] = ihlp2[i];
234b2f1ab58SBarry Smith   }
2359566063dSJacob Faibussowitsch   PetscCall(PetscFree(ialloc));
236b2f1ab58SBarry Smith   PetscFunctionReturn(0);
237b2f1ab58SBarry Smith }
238b2f1ab58SBarry Smith 
239b2f1ab58SBarry Smith /*
240b2f1ab58SBarry Smith   spbas_compress_pattern:
241b2f1ab58SBarry Smith      calculate a compressed sparseness pattern for a sparseness pattern
242b2f1ab58SBarry Smith      given in compressed row storage. The compressed sparseness pattern may
243b2f1ab58SBarry Smith      require (much) less memory.
244b2f1ab58SBarry Smith */
2459371c9d4SSatish Balay PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, PetscInt col_idx_type, spbas_matrix *B, PetscReal *mem_reduction) {
246b2f1ab58SBarry Smith   PetscInt        nnz      = irow_in[nrows];
2473dfa2556SBarry Smith   size_t          mem_orig = (nrows + nnz) * sizeof(PetscInt);
2483dfa2556SBarry Smith   size_t          mem_compressed;
249b2f1ab58SBarry Smith   PetscInt       *isort;
250b2f1ab58SBarry Smith   PetscInt       *icols;
251b2f1ab58SBarry Smith   PetscInt        row_nnz;
252b2f1ab58SBarry Smith   PetscInt       *ipoint;
253ace3abfcSBarry Smith   PetscBool      *used;
254b2f1ab58SBarry Smith   PetscInt        ptr;
255b2f1ab58SBarry Smith   PetscInt        i, j;
256ace3abfcSBarry Smith   const PetscBool no_values = PETSC_FALSE;
257b2f1ab58SBarry Smith 
258b2f1ab58SBarry Smith   PetscFunctionBegin;
259c328eeadSBarry Smith   /* Allocate the structure of the new matrix */
260b2f1ab58SBarry Smith   B->nrows        = nrows;
261b2f1ab58SBarry Smith   B->ncols        = ncols;
262b2f1ab58SBarry Smith   B->nnz          = nnz;
263b2f1ab58SBarry Smith   B->col_idx_type = col_idx_type;
264b2f1ab58SBarry Smith   B->block_data   = PETSC_TRUE;
2652205254eSKarl Rupp 
2669566063dSJacob Faibussowitsch   PetscCall(spbas_allocate_pattern(B, no_values));
267b2f1ab58SBarry Smith 
268c328eeadSBarry Smith   /* When using an offset array, set it */
2694e1ff37aSBarry Smith   if (col_idx_type == SPBAS_OFFSET_ARRAY) {
2702205254eSKarl Rupp     for (i = 0; i < nrows; i++) B->icol0[i] = icol_in[irow_in[i]];
271b2f1ab58SBarry Smith   }
272b2f1ab58SBarry Smith 
273c328eeadSBarry Smith   /* Allocate the ordering for the rows */
2749566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nrows, &isort));
2759566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nrows, &ipoint));
2769566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(nrows, &used));
277b2f1ab58SBarry Smith 
2784e1ff37aSBarry Smith   for (i = 0; i < nrows; i++) {
279b2f1ab58SBarry Smith     B->row_nnz[i] = irow_in[i + 1] - irow_in[i];
280b2f1ab58SBarry Smith     isort[i]      = i;
281b2f1ab58SBarry Smith     ipoint[i]     = i;
282b2f1ab58SBarry Smith   }
283b2f1ab58SBarry Smith 
284c328eeadSBarry Smith   /* Sort the rows so that identical columns will be next to each other */
2859566063dSJacob Faibussowitsch   PetscCall(spbas_mergesort_icols(nrows, irow_in, icol_in, col_idx_type, isort));
2869566063dSJacob Faibussowitsch   PetscCall(PetscInfo(NULL, "Rows have been sorted for patterns\n"));
287b2f1ab58SBarry Smith 
288c328eeadSBarry Smith   /* Replace identical rows with the first one in the list */
2894e1ff37aSBarry Smith   for (i = 1; i < nrows; i++) {
290ad540459SPierre Jolivet     if (spbas_row_order_icol(isort[i - 1], isort[i], irow_in, icol_in, col_idx_type) == 0) ipoint[isort[i]] = ipoint[isort[i - 1]];
291b2f1ab58SBarry Smith   }
292b2f1ab58SBarry Smith 
293c328eeadSBarry Smith   /* Collect the rows which are used*/
2942205254eSKarl Rupp   for (i = 0; i < nrows; i++) used[ipoint[i]] = PETSC_TRUE;
295b2f1ab58SBarry Smith 
296c328eeadSBarry Smith   /* Calculate needed memory */
297b2f1ab58SBarry Smith   B->n_alloc_icol = 0;
2984e1ff37aSBarry Smith   for (i = 0; i < nrows; i++) {
2992205254eSKarl Rupp     if (used[i]) B->n_alloc_icol += B->row_nnz[i];
300b2f1ab58SBarry Smith   }
3019566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(B->n_alloc_icol, &B->alloc_icol));
302b2f1ab58SBarry Smith 
303c328eeadSBarry Smith   /* Fill in the diagonal offsets for the rows which store their own data */
304b2f1ab58SBarry Smith   ptr = 0;
3054e1ff37aSBarry Smith   for (i = 0; i < B->nrows; i++) {
3064e1ff37aSBarry Smith     if (used[i]) {
307b2f1ab58SBarry Smith       B->icols[i] = &B->alloc_icol[ptr];
308b2f1ab58SBarry Smith       icols       = &icol_in[irow_in[i]];
309b2f1ab58SBarry Smith       row_nnz     = B->row_nnz[i];
3104e1ff37aSBarry Smith       if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
311ad540459SPierre Jolivet         for (j = 0; j < row_nnz; j++) B->icols[i][j] = icols[j];
3124e1ff37aSBarry Smith       } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
313ad540459SPierre Jolivet         for (j = 0; j < row_nnz; j++) B->icols[i][j] = icols[j] - i;
3144e1ff37aSBarry Smith       } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
315ad540459SPierre Jolivet         for (j = 0; j < row_nnz; j++) B->icols[i][j] = icols[j] - icols[0];
316b2f1ab58SBarry Smith       }
317b2f1ab58SBarry Smith       ptr += B->row_nnz[i];
318b2f1ab58SBarry Smith     }
319b2f1ab58SBarry Smith   }
320b2f1ab58SBarry Smith 
321c328eeadSBarry Smith   /* Point to the right places for all data */
322ad540459SPierre Jolivet   for (i = 0; i < nrows; i++) B->icols[i] = B->icols[ipoint[i]];
3239566063dSJacob Faibussowitsch   PetscCall(PetscInfo(NULL, "Row patterns have been compressed\n"));
3249566063dSJacob Faibussowitsch   PetscCall(PetscInfo(NULL, "         (%g nonzeros per row)\n", (double)((PetscReal)nnz / (PetscReal)nrows)));
325b2f1ab58SBarry Smith 
3269566063dSJacob Faibussowitsch   PetscCall(PetscFree(isort));
3279566063dSJacob Faibussowitsch   PetscCall(PetscFree(used));
3289566063dSJacob Faibussowitsch   PetscCall(PetscFree(ipoint));
329b2f1ab58SBarry Smith 
330b2f1ab58SBarry Smith   mem_compressed = spbas_memory_requirement(*B);
3314e1ff37aSBarry Smith   *mem_reduction = 100.0 * (PetscReal)(mem_orig - mem_compressed) / (PetscReal)mem_orig;
332b2f1ab58SBarry Smith   PetscFunctionReturn(0);
333b2f1ab58SBarry Smith }
334b2f1ab58SBarry Smith 
335b2f1ab58SBarry Smith /*
336b2f1ab58SBarry Smith    spbas_incomplete_cholesky
337b2f1ab58SBarry Smith        Incomplete Cholesky decomposition
338b2f1ab58SBarry Smith */
339c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/bas/spbas_cholesky.h>
340b2f1ab58SBarry Smith 
341b2f1ab58SBarry Smith /*
342b2f1ab58SBarry Smith   spbas_delete : de-allocate the arrays owned by this matrix
343b2f1ab58SBarry Smith */
3449371c9d4SSatish Balay PetscErrorCode spbas_delete(spbas_matrix matrix) {
345b2f1ab58SBarry Smith   PetscInt i;
3469ccc4a9bSBarry Smith 
347b2f1ab58SBarry Smith   PetscFunctionBegin;
3489ccc4a9bSBarry Smith   if (matrix.block_data) {
3499566063dSJacob Faibussowitsch     PetscCall(PetscFree(matrix.alloc_icol));
3509566063dSJacob Faibussowitsch     if (matrix.values) PetscCall(PetscFree(matrix.alloc_val));
3519ccc4a9bSBarry Smith   } else {
3529566063dSJacob Faibussowitsch     for (i = 0; i < matrix.nrows; i++) PetscCall(PetscFree(matrix.icols[i]));
3539566063dSJacob Faibussowitsch     PetscCall(PetscFree(matrix.icols));
3549ccc4a9bSBarry Smith     if (matrix.values) {
3559566063dSJacob Faibussowitsch       for (i = 0; i < matrix.nrows; i++) PetscCall(PetscFree(matrix.values[i]));
356b2f1ab58SBarry Smith     }
357b2f1ab58SBarry Smith   }
358b2f1ab58SBarry Smith 
3599566063dSJacob Faibussowitsch   PetscCall(PetscFree(matrix.row_nnz));
3609566063dSJacob Faibussowitsch   PetscCall(PetscFree(matrix.icols));
3619566063dSJacob Faibussowitsch   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) PetscCall(PetscFree(matrix.icol0));
3629566063dSJacob Faibussowitsch   PetscCall(PetscFree(matrix.values));
363b2f1ab58SBarry Smith   PetscFunctionReturn(0);
364b2f1ab58SBarry Smith }
365b2f1ab58SBarry Smith 
366b2f1ab58SBarry Smith /*
367b2f1ab58SBarry Smith spbas_matrix_to_crs:
368b2f1ab58SBarry Smith    Convert an spbas_matrix to compessed row storage
369b2f1ab58SBarry Smith */
3709371c9d4SSatish Balay PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A, MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out) {
371b2f1ab58SBarry Smith   PetscInt     nrows = matrix_A.nrows;
372b2f1ab58SBarry Smith   PetscInt     nnz   = matrix_A.nnz;
373b2f1ab58SBarry Smith   PetscInt     i, j, r_nnz, i0;
374b2f1ab58SBarry Smith   PetscInt    *irow;
375b2f1ab58SBarry Smith   PetscInt    *icol;
376b2f1ab58SBarry Smith   PetscInt    *icol_A;
377b2f1ab58SBarry Smith   MatScalar   *val;
378b2f1ab58SBarry Smith   PetscScalar *val_A;
379b2f1ab58SBarry Smith   PetscInt     col_idx_type = matrix_A.col_idx_type;
380ace3abfcSBarry Smith   PetscBool    do_values    = matrix_A.values ? PETSC_TRUE : PETSC_FALSE;
381b2f1ab58SBarry Smith 
382b2f1ab58SBarry Smith   PetscFunctionBegin;
3839566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nrows + 1, &irow));
3849566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nnz, &icol));
3859ccc4a9bSBarry Smith   *icol_out = icol;
3869ccc4a9bSBarry Smith   *irow_out = irow;
3879ccc4a9bSBarry Smith   if (do_values) {
3889566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nnz, &val));
3899371c9d4SSatish Balay     *val_out  = val;
3909371c9d4SSatish Balay     *icol_out = icol;
3919371c9d4SSatish Balay     *irow_out = irow;
392b2f1ab58SBarry Smith   }
393b2f1ab58SBarry Smith 
394b2f1ab58SBarry Smith   irow[0] = 0;
3959ccc4a9bSBarry Smith   for (i = 0; i < nrows; i++) {
396b2f1ab58SBarry Smith     r_nnz       = matrix_A.row_nnz[i];
397b2f1ab58SBarry Smith     i0          = irow[i];
398b2f1ab58SBarry Smith     irow[i + 1] = i0 + r_nnz;
399b2f1ab58SBarry Smith     icol_A      = matrix_A.icols[i];
400b2f1ab58SBarry Smith 
4019ccc4a9bSBarry Smith     if (do_values) {
402b2f1ab58SBarry Smith       val_A = matrix_A.values[i];
4039ccc4a9bSBarry Smith       for (j = 0; j < r_nnz; j++) {
404b2f1ab58SBarry Smith         icol[i0 + j] = icol_A[j];
405b2f1ab58SBarry Smith         val[i0 + j]  = val_A[j];
406b2f1ab58SBarry Smith       }
4079ccc4a9bSBarry Smith     } else {
4082205254eSKarl Rupp       for (j = 0; j < r_nnz; j++) icol[i0 + j] = icol_A[j];
409b2f1ab58SBarry Smith     }
410b2f1ab58SBarry Smith 
4119ccc4a9bSBarry Smith     if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
4122205254eSKarl Rupp       for (j = 0; j < r_nnz; j++) icol[i0 + j] += i;
4132205254eSKarl Rupp     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
414b2f1ab58SBarry Smith       i0 = matrix_A.icol0[i];
4152205254eSKarl Rupp       for (j = 0; j < r_nnz; j++) icol[i0 + j] += i0;
416b2f1ab58SBarry Smith     }
417b2f1ab58SBarry Smith   }
418b2f1ab58SBarry Smith   PetscFunctionReturn(0);
419b2f1ab58SBarry Smith }
420b2f1ab58SBarry Smith 
421b2f1ab58SBarry Smith /*
422b2f1ab58SBarry Smith     spbas_transpose
423b2f1ab58SBarry Smith        return the transpose of a matrix
424b2f1ab58SBarry Smith */
4259371c9d4SSatish Balay PetscErrorCode spbas_transpose(spbas_matrix in_matrix, spbas_matrix *result) {
426b2f1ab58SBarry Smith   PetscInt     col_idx_type = in_matrix.col_idx_type;
427b2f1ab58SBarry Smith   PetscInt     nnz          = in_matrix.nnz;
428b2f1ab58SBarry Smith   PetscInt     ncols        = in_matrix.nrows;
429b2f1ab58SBarry Smith   PetscInt     nrows        = in_matrix.ncols;
430b2f1ab58SBarry Smith   PetscInt     i, j, k;
431b2f1ab58SBarry Smith   PetscInt     r_nnz;
432b2f1ab58SBarry Smith   PetscInt    *irow;
4334efc9174SBarry Smith   PetscInt     icol0 = 0;
434b2f1ab58SBarry Smith   PetscScalar *val;
4354e1ff37aSBarry Smith 
436b2f1ab58SBarry Smith   PetscFunctionBegin;
437c328eeadSBarry Smith   /* Copy input values */
438b2f1ab58SBarry Smith   result->nrows        = nrows;
439b2f1ab58SBarry Smith   result->ncols        = ncols;
440b2f1ab58SBarry Smith   result->nnz          = nnz;
441b2f1ab58SBarry Smith   result->col_idx_type = SPBAS_COLUMN_NUMBERS;
442b2f1ab58SBarry Smith   result->block_data   = PETSC_TRUE;
443b2f1ab58SBarry Smith 
444c328eeadSBarry Smith   /* Allocate sparseness pattern */
4459566063dSJacob Faibussowitsch   PetscCall(spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE));
446b2f1ab58SBarry Smith 
447c328eeadSBarry Smith   /*  Count the number of nonzeros in each row */
4482205254eSKarl Rupp   for (i = 0; i < nrows; i++) result->row_nnz[i] = 0;
449b2f1ab58SBarry Smith 
4509ccc4a9bSBarry Smith   for (i = 0; i < ncols; i++) {
451b2f1ab58SBarry Smith     r_nnz = in_matrix.row_nnz[i];
452b2f1ab58SBarry Smith     irow  = in_matrix.icols[i];
4539ccc4a9bSBarry Smith     if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
4542205254eSKarl Rupp       for (j = 0; j < r_nnz; j++) result->row_nnz[irow[j]]++;
4559ccc4a9bSBarry Smith     } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
4562205254eSKarl Rupp       for (j = 0; j < r_nnz; j++) result->row_nnz[i + irow[j]]++;
4579ccc4a9bSBarry Smith     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
458b2f1ab58SBarry Smith       icol0 = in_matrix.icol0[i];
4592205254eSKarl Rupp       for (j = 0; j < r_nnz; j++) result->row_nnz[icol0 + irow[j]]++;
460b2f1ab58SBarry Smith     }
461b2f1ab58SBarry Smith   }
462b2f1ab58SBarry Smith 
463c328eeadSBarry Smith   /* Set the pointers to the data */
4649566063dSJacob Faibussowitsch   PetscCall(spbas_allocate_data(result));
465b2f1ab58SBarry Smith 
466c328eeadSBarry Smith   /* Reset the number of nonzeros in each row */
4672205254eSKarl Rupp   for (i = 0; i < nrows; i++) result->row_nnz[i] = 0;
468b2f1ab58SBarry Smith 
469c328eeadSBarry Smith   /* Fill the data arrays */
4709ccc4a9bSBarry Smith   if (in_matrix.values) {
4719ccc4a9bSBarry Smith     for (i = 0; i < ncols; i++) {
472b2f1ab58SBarry Smith       r_nnz = in_matrix.row_nnz[i];
473b2f1ab58SBarry Smith       irow  = in_matrix.icols[i];
474b2f1ab58SBarry Smith       val   = in_matrix.values[i];
475b2f1ab58SBarry Smith 
4762205254eSKarl Rupp       if (col_idx_type == SPBAS_COLUMN_NUMBERS) icol0 = 0;
4772205254eSKarl Rupp       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i;
4782205254eSKarl Rupp       else if (col_idx_type == SPBAS_OFFSET_ARRAY) icol0 = in_matrix.icol0[i];
4799ccc4a9bSBarry Smith       for (j = 0; j < r_nnz; j++) {
480b2f1ab58SBarry Smith         k                                     = icol0 + irow[j];
481b2f1ab58SBarry Smith         result->icols[k][result->row_nnz[k]]  = i;
482b2f1ab58SBarry Smith         result->values[k][result->row_nnz[k]] = val[j];
483b2f1ab58SBarry Smith         result->row_nnz[k]++;
484b2f1ab58SBarry Smith       }
485b2f1ab58SBarry Smith     }
4869ccc4a9bSBarry Smith   } else {
4879ccc4a9bSBarry Smith     for (i = 0; i < ncols; i++) {
488b2f1ab58SBarry Smith       r_nnz = in_matrix.row_nnz[i];
489b2f1ab58SBarry Smith       irow  = in_matrix.icols[i];
490b2f1ab58SBarry Smith 
4912205254eSKarl Rupp       if (col_idx_type == SPBAS_COLUMN_NUMBERS) icol0 = 0;
4922205254eSKarl Rupp       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i;
4932205254eSKarl Rupp       else if (col_idx_type == SPBAS_OFFSET_ARRAY) icol0 = in_matrix.icol0[i];
494b2f1ab58SBarry Smith 
4959ccc4a9bSBarry Smith       for (j = 0; j < r_nnz; j++) {
496b2f1ab58SBarry Smith         k                                    = icol0 + irow[j];
497b2f1ab58SBarry Smith         result->icols[k][result->row_nnz[k]] = i;
498b2f1ab58SBarry Smith         result->row_nnz[k]++;
499b2f1ab58SBarry Smith       }
500b2f1ab58SBarry Smith     }
501b2f1ab58SBarry Smith   }
502b2f1ab58SBarry Smith   PetscFunctionReturn(0);
503b2f1ab58SBarry Smith }
504b2f1ab58SBarry Smith 
505b2f1ab58SBarry Smith /*
506b2f1ab58SBarry Smith    spbas_mergesort
507b2f1ab58SBarry Smith 
508bb42e86aSJed Brown       mergesort for an array of integers and an array of associated
509b2f1ab58SBarry Smith       reals
510b2f1ab58SBarry Smith 
511b2f1ab58SBarry Smith       on output, icol[0..nnz-1] is increasing;
512b2f1ab58SBarry Smith                   val[0..nnz-1] has undergone the same permutation as icol
513b2f1ab58SBarry Smith 
5140298fd71SBarry Smith       NB: val may be NULL: in that case, only the integers are sorted
515b2f1ab58SBarry Smith 
516b2f1ab58SBarry Smith */
5179371c9d4SSatish Balay PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val) {
518c328eeadSBarry Smith   PetscInt     istep;                /* Chunk-sizes of already sorted parts of arrays */
519c328eeadSBarry Smith   PetscInt     i, i1, i2;            /* Loop counters for (partly) sorted arrays */
520c328eeadSBarry Smith   PetscInt     istart, i1end, i2end; /* start of newly sorted array part, end of both parts */
521c328eeadSBarry Smith   PetscInt    *ialloc;               /* Allocated arrays */
5220298fd71SBarry Smith   PetscScalar *valloc = NULL;
523c328eeadSBarry Smith   PetscInt    *iswap; /* auxiliary pointers for swapping */
524b2f1ab58SBarry Smith   PetscScalar *vswap;
525c328eeadSBarry Smith   PetscInt    *ihlp1;        /* Pointers to new version of arrays, */
5260298fd71SBarry Smith   PetscScalar *vhlp1 = NULL; /* (arrays under construction) */
527c328eeadSBarry Smith   PetscInt    *ihlp2;        /* Pointers to previous version of arrays, */
5280298fd71SBarry Smith   PetscScalar *vhlp2 = NULL;
529b2f1ab58SBarry Smith 
530362febeeSStefano Zampini   PetscFunctionBegin;
5319566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nnz, &ialloc));
532b2f1ab58SBarry Smith   ihlp1 = ialloc;
533b2f1ab58SBarry Smith   ihlp2 = icol;
534b2f1ab58SBarry Smith 
5359ccc4a9bSBarry Smith   if (val) {
5369566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nnz, &valloc));
537b2f1ab58SBarry Smith     vhlp1 = valloc;
538b2f1ab58SBarry Smith     vhlp2 = val;
539b2f1ab58SBarry Smith   }
540b2f1ab58SBarry Smith 
541c328eeadSBarry Smith   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
5429ccc4a9bSBarry Smith   for (istep = 1; istep < nnz; istep *= 2) {
543c328eeadSBarry Smith     /*
544c328eeadSBarry Smith       Combine sorted parts
545c328eeadSBarry Smith           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
546c328eeadSBarry Smith       of ihlp2 and vhlp2
547c328eeadSBarry Smith 
548c328eeadSBarry Smith       into one sorted part
549c328eeadSBarry Smith           istart:istart+2*istep-1
550c328eeadSBarry Smith       of ihlp1 and vhlp1
551c328eeadSBarry Smith     */
5529ccc4a9bSBarry Smith     for (istart = 0; istart < nnz; istart += 2 * istep) {
553c328eeadSBarry Smith       /* Set counters and bound array part endings */
5549371c9d4SSatish Balay       i1    = istart;
5559371c9d4SSatish Balay       i1end = i1 + istep;
5569371c9d4SSatish Balay       if (i1end > nnz) i1end = nnz;
5579371c9d4SSatish Balay       i2    = istart + istep;
5589371c9d4SSatish Balay       i2end = i2 + istep;
5599371c9d4SSatish Balay       if (i2end > nnz) i2end = nnz;
560b2f1ab58SBarry Smith 
561c328eeadSBarry Smith       /* Merge the two array parts */
5629ccc4a9bSBarry Smith       if (val) {
5639ccc4a9bSBarry Smith         for (i = istart; i < i2end; i++) {
5649ccc4a9bSBarry Smith           if (i1 < i1end && i2 < i2end && ihlp2[i1] < ihlp2[i2]) {
565b2f1ab58SBarry Smith             ihlp1[i] = ihlp2[i1];
566b2f1ab58SBarry Smith             vhlp1[i] = vhlp2[i1];
567b2f1ab58SBarry Smith             i1++;
5689ccc4a9bSBarry Smith           } else if (i2 < i2end) {
569b2f1ab58SBarry Smith             ihlp1[i] = ihlp2[i2];
570b2f1ab58SBarry Smith             vhlp1[i] = vhlp2[i2];
571b2f1ab58SBarry Smith             i2++;
5729ccc4a9bSBarry Smith           } else {
573b2f1ab58SBarry Smith             ihlp1[i] = ihlp2[i1];
574b2f1ab58SBarry Smith             vhlp1[i] = vhlp2[i1];
575b2f1ab58SBarry Smith             i1++;
576b2f1ab58SBarry Smith           }
577b2f1ab58SBarry Smith         }
5789ccc4a9bSBarry Smith       } else {
5796322f4bdSBarry Smith         for (i = istart; i < i2end; i++) {
5806322f4bdSBarry Smith           if (i1 < i1end && i2 < i2end && ihlp2[i1] < ihlp2[i2]) {
581b2f1ab58SBarry Smith             ihlp1[i] = ihlp2[i1];
582b2f1ab58SBarry Smith             i1++;
5836322f4bdSBarry Smith           } else if (i2 < i2end) {
584b2f1ab58SBarry Smith             ihlp1[i] = ihlp2[i2];
585b2f1ab58SBarry Smith             i2++;
5866322f4bdSBarry Smith           } else {
587b2f1ab58SBarry Smith             ihlp1[i] = ihlp2[i1];
588b2f1ab58SBarry Smith             i1++;
589b2f1ab58SBarry Smith           }
590b2f1ab58SBarry Smith         }
591b2f1ab58SBarry Smith       }
592b2f1ab58SBarry Smith     }
593b2f1ab58SBarry Smith 
594c328eeadSBarry Smith     /* Swap the two array sets */
5959371c9d4SSatish Balay     iswap = ihlp2;
5969371c9d4SSatish Balay     ihlp2 = ihlp1;
5979371c9d4SSatish Balay     ihlp1 = iswap;
5989371c9d4SSatish Balay     vswap = vhlp2;
5999371c9d4SSatish Balay     vhlp2 = vhlp1;
6009371c9d4SSatish Balay     vhlp1 = vswap;
601b2f1ab58SBarry Smith   }
602b2f1ab58SBarry Smith 
603c328eeadSBarry Smith   /* Copy one more time in case the sorted arrays are the temporary ones */
6046322f4bdSBarry Smith   if (ihlp2 != icol) {
6052205254eSKarl Rupp     for (i = 0; i < nnz; i++) icol[i] = ihlp2[i];
6066322f4bdSBarry Smith     if (val) {
6072205254eSKarl Rupp       for (i = 0; i < nnz; i++) val[i] = vhlp2[i];
608b2f1ab58SBarry Smith     }
609b2f1ab58SBarry Smith   }
610b2f1ab58SBarry Smith 
6119566063dSJacob Faibussowitsch   PetscCall(PetscFree(ialloc));
6129566063dSJacob Faibussowitsch   if (val) PetscCall(PetscFree(valloc));
613b2f1ab58SBarry Smith   PetscFunctionReturn(0);
614b2f1ab58SBarry Smith }
615b2f1ab58SBarry Smith 
616b2f1ab58SBarry Smith /*
617b2f1ab58SBarry Smith   spbas_apply_reordering_rows:
618b2f1ab58SBarry Smith     apply the given reordering to the rows:  matrix_A = matrix_A(perm,:);
619b2f1ab58SBarry Smith */
6209371c9d4SSatish Balay PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation) {
621b2f1ab58SBarry Smith   PetscInt      i, j, ip;
622b2f1ab58SBarry Smith   PetscInt      nrows = matrix_A->nrows;
623b2f1ab58SBarry Smith   PetscInt     *row_nnz;
624b2f1ab58SBarry Smith   PetscInt    **icols;
625ace3abfcSBarry Smith   PetscBool     do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
6260298fd71SBarry Smith   PetscScalar **vals      = NULL;
627b2f1ab58SBarry Smith 
628b2f1ab58SBarry Smith   PetscFunctionBegin;
62908401ef6SPierre Jolivet   PetscCheck(matrix_A->col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern");
630b2f1ab58SBarry Smith 
63148a46eb9SPierre Jolivet   if (do_values) PetscCall(PetscMalloc1(nrows, &vals));
6329566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nrows, &row_nnz));
6339566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nrows, &icols));
634b2f1ab58SBarry Smith 
6356322f4bdSBarry Smith   for (i = 0; i < nrows; i++) {
636b2f1ab58SBarry Smith     ip = permutation[i];
6372205254eSKarl Rupp     if (do_values) vals[i] = matrix_A->values[ip];
638b2f1ab58SBarry Smith     icols[i]   = matrix_A->icols[ip];
639b2f1ab58SBarry Smith     row_nnz[i] = matrix_A->row_nnz[ip];
6402205254eSKarl Rupp     for (j = 0; j < row_nnz[i]; j++) icols[i][j] += ip - i;
641b2f1ab58SBarry Smith   }
642b2f1ab58SBarry Smith 
6439566063dSJacob Faibussowitsch   if (do_values) PetscCall(PetscFree(matrix_A->values));
6449566063dSJacob Faibussowitsch   PetscCall(PetscFree(matrix_A->icols));
6459566063dSJacob Faibussowitsch   PetscCall(PetscFree(matrix_A->row_nnz));
646b2f1ab58SBarry Smith 
6472205254eSKarl Rupp   if (do_values) matrix_A->values = vals;
648b2f1ab58SBarry Smith   matrix_A->icols   = icols;
649b2f1ab58SBarry Smith   matrix_A->row_nnz = row_nnz;
650b2f1ab58SBarry Smith   PetscFunctionReturn(0);
651b2f1ab58SBarry Smith }
652b2f1ab58SBarry Smith 
653b2f1ab58SBarry Smith /*
654b2f1ab58SBarry Smith   spbas_apply_reordering_cols:
655b2f1ab58SBarry Smith     apply the given reordering to the columns:  matrix_A(:,perm) = matrix_A;
656b2f1ab58SBarry Smith */
6579371c9d4SSatish Balay PetscErrorCode spbas_apply_reordering_cols(spbas_matrix *matrix_A, const PetscInt *permutation) {
658b2f1ab58SBarry Smith   PetscInt     i, j;
659b2f1ab58SBarry Smith   PetscInt     nrows = matrix_A->nrows;
660b2f1ab58SBarry Smith   PetscInt     row_nnz;
661b2f1ab58SBarry Smith   PetscInt    *icols;
662ace3abfcSBarry Smith   PetscBool    do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
6630298fd71SBarry Smith   PetscScalar *vals      = NULL;
664b2f1ab58SBarry Smith 
665b2f1ab58SBarry Smith   PetscFunctionBegin;
66608401ef6SPierre Jolivet   PetscCheck(matrix_A->col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern");
667b2f1ab58SBarry Smith 
6686322f4bdSBarry Smith   for (i = 0; i < nrows; i++) {
669b2f1ab58SBarry Smith     icols   = matrix_A->icols[i];
670b2f1ab58SBarry Smith     row_nnz = matrix_A->row_nnz[i];
6712205254eSKarl Rupp     if (do_values) vals = matrix_A->values[i];
672b2f1ab58SBarry Smith 
673ad540459SPierre Jolivet     for (j = 0; j < row_nnz; j++) icols[j] = permutation[i + icols[j]] - i;
6749566063dSJacob Faibussowitsch     PetscCall(spbas_mergesort(row_nnz, icols, vals));
675b2f1ab58SBarry Smith   }
676b2f1ab58SBarry Smith   PetscFunctionReturn(0);
677b2f1ab58SBarry Smith }
678b2f1ab58SBarry Smith 
679b2f1ab58SBarry Smith /*
680b2f1ab58SBarry Smith   spbas_apply_reordering:
681b2f1ab58SBarry Smith     apply the given reordering:  matrix_A(perm,perm) = matrix_A;
682b2f1ab58SBarry Smith */
6839371c9d4SSatish Balay PetscErrorCode spbas_apply_reordering(spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt *inv_perm) {
684b2f1ab58SBarry Smith   PetscFunctionBegin;
6859566063dSJacob Faibussowitsch   PetscCall(spbas_apply_reordering_rows(matrix_A, inv_perm));
6869566063dSJacob Faibussowitsch   PetscCall(spbas_apply_reordering_cols(matrix_A, permutation));
687b2f1ab58SBarry Smith   PetscFunctionReturn(0);
688b2f1ab58SBarry Smith }
689b2f1ab58SBarry Smith 
6909371c9d4SSatish Balay PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix *result) {
691b2f1ab58SBarry Smith   spbas_matrix retval;
692b2f1ab58SBarry Smith   PetscInt     i, j, i0, r_nnz;
693b2f1ab58SBarry Smith 
694b2f1ab58SBarry Smith   PetscFunctionBegin;
695c328eeadSBarry Smith   /* Copy input values */
696b2f1ab58SBarry Smith   retval.nrows = nrows;
697b2f1ab58SBarry Smith   retval.ncols = ncols;
698b2f1ab58SBarry Smith   retval.nnz   = ai[nrows];
699b2f1ab58SBarry Smith 
700b2f1ab58SBarry Smith   retval.block_data   = PETSC_TRUE;
701b2f1ab58SBarry Smith   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
702b2f1ab58SBarry Smith 
703c328eeadSBarry Smith   /* Allocate output matrix */
7049566063dSJacob Faibussowitsch   PetscCall(spbas_allocate_pattern(&retval, PETSC_FALSE));
7052205254eSKarl Rupp   for (i = 0; i < nrows; i++) retval.row_nnz[i] = ai[i + 1] - ai[i];
7069566063dSJacob Faibussowitsch   PetscCall(spbas_allocate_data(&retval));
707c328eeadSBarry Smith   /* Copy the structure */
7086322f4bdSBarry Smith   for (i = 0; i < retval.nrows; i++) {
709b2f1ab58SBarry Smith     i0    = ai[i];
710b2f1ab58SBarry Smith     r_nnz = ai[i + 1] - i0;
711b2f1ab58SBarry Smith 
712ad540459SPierre Jolivet     for (j = 0; j < r_nnz; j++) retval.icols[i][j] = aj[i0 + j] - i;
713b2f1ab58SBarry Smith   }
714b2f1ab58SBarry Smith   *result = retval;
715b2f1ab58SBarry Smith   PetscFunctionReturn(0);
716b2f1ab58SBarry Smith }
717b2f1ab58SBarry Smith 
718b2f1ab58SBarry Smith /*
719b2f1ab58SBarry Smith    spbas_mark_row_power:
720b2f1ab58SBarry Smith       Mark the columns in row 'row' which are nonzero in
721b2f1ab58SBarry Smith           matrix^2log(marker).
722b2f1ab58SBarry Smith */
723be332245SKarl Rupp PetscErrorCode spbas_mark_row_power(PetscInt     *iwork,     /* marker-vector */
724c328eeadSBarry Smith                                     PetscInt      row,       /* row for which the columns are marked */
725c328eeadSBarry Smith                                     spbas_matrix *in_matrix, /* matrix for which the power is being  calculated */
726c328eeadSBarry Smith                                     PetscInt      marker,    /* marker-value: 2^power */
727c328eeadSBarry Smith                                     PetscInt      minmrk,    /* lower bound for marked points */
728c328eeadSBarry Smith                                     PetscInt      maxmrk)         /* upper bound for marked points */
729b2f1ab58SBarry Smith {
730b2f1ab58SBarry Smith   PetscInt i, j, nnz;
731b2f1ab58SBarry Smith 
732b2f1ab58SBarry Smith   PetscFunctionBegin;
733b2f1ab58SBarry Smith   nnz = in_matrix->row_nnz[row];
734b2f1ab58SBarry Smith 
735c328eeadSBarry Smith   /* For higher powers, call this function recursively */
7366322f4bdSBarry Smith   if (marker > 1) {
7376322f4bdSBarry Smith     for (i = 0; i < nnz; i++) {
738b2f1ab58SBarry Smith       j = row + in_matrix->icols[row][i];
7396322f4bdSBarry Smith       if (minmrk <= j && j < maxmrk && iwork[j] < marker) {
7409566063dSJacob Faibussowitsch         PetscCall(spbas_mark_row_power(iwork, row + in_matrix->icols[row][i], in_matrix, marker / 2, minmrk, maxmrk));
741b2f1ab58SBarry Smith         iwork[j] |= marker;
742b2f1ab58SBarry Smith       }
743b2f1ab58SBarry Smith     }
7446322f4bdSBarry Smith   } else {
745c328eeadSBarry Smith     /*  Mark the columns reached */
7466322f4bdSBarry Smith     for (i = 0; i < nnz; i++) {
747b2f1ab58SBarry Smith       j = row + in_matrix->icols[row][i];
7482205254eSKarl Rupp       if (minmrk <= j && j < maxmrk) iwork[j] |= 1;
749b2f1ab58SBarry Smith     }
750b2f1ab58SBarry Smith   }
751b2f1ab58SBarry Smith   PetscFunctionReturn(0);
752b2f1ab58SBarry Smith }
753b2f1ab58SBarry Smith 
754b2f1ab58SBarry Smith /*
755b2f1ab58SBarry Smith    spbas_power
756b2f1ab58SBarry Smith       Calculate sparseness patterns for incomplete Cholesky decompositions
757b2f1ab58SBarry Smith       of a given order: (almost) all nonzeros of the matrix^(order+1) which
758b2f1ab58SBarry Smith       are inside the band width are found and stored in the output sparseness
759b2f1ab58SBarry Smith       pattern.
760b2f1ab58SBarry Smith */
7619371c9d4SSatish Balay PetscErrorCode spbas_power(spbas_matrix in_matrix, PetscInt power, spbas_matrix *result) {
762b2f1ab58SBarry Smith   spbas_matrix retval;
763b2f1ab58SBarry Smith   PetscInt     nrows = in_matrix.nrows;
764b2f1ab58SBarry Smith   PetscInt     ncols = in_matrix.ncols;
765b2f1ab58SBarry Smith   PetscInt     i, j, kend;
766b2f1ab58SBarry Smith   PetscInt     nnz, inz;
767b2f1ab58SBarry Smith   PetscInt    *iwork;
768b2f1ab58SBarry Smith   PetscInt     marker;
769b2f1ab58SBarry Smith   PetscInt     maxmrk = 0;
770b2f1ab58SBarry Smith 
771b2f1ab58SBarry Smith   PetscFunctionBegin;
77208401ef6SPierre Jolivet   PetscCheck(in_matrix.col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern");
77308401ef6SPierre Jolivet   PetscCheck(ncols == nrows, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Dimension error");
774aed4548fSBarry Smith   PetscCheck(!in_matrix.values, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Input array must be sparseness pattern (no values)");
77508401ef6SPierre Jolivet   PetscCheck(power > 0, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Power must be 1 or up");
776b2f1ab58SBarry Smith 
777c328eeadSBarry Smith   /* Copy input values*/
778b2f1ab58SBarry Smith   retval.nrows        = ncols;
779b2f1ab58SBarry Smith   retval.ncols        = nrows;
780b2f1ab58SBarry Smith   retval.nnz          = 0;
781b2f1ab58SBarry Smith   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
782b2f1ab58SBarry Smith   retval.block_data   = PETSC_FALSE;
783b2f1ab58SBarry Smith 
784c328eeadSBarry Smith   /* Allocate sparseness pattern */
7859566063dSJacob Faibussowitsch   PetscCall(spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE));
786b2f1ab58SBarry Smith 
787580bdb30SBarry Smith   /* Allocate marker array: note sure the max needed so use the max of the two */
7889566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(PetscMax(ncols, nrows), &iwork));
789b2f1ab58SBarry Smith 
790c328eeadSBarry Smith   /* Calculate marker values */
7919371c9d4SSatish Balay   marker = 1;
7929371c9d4SSatish Balay   for (i = 1; i < power; i++) marker *= 2;
793b2f1ab58SBarry Smith 
7946322f4bdSBarry Smith   for (i = 0; i < nrows; i++) {
795c328eeadSBarry Smith     /* Calculate the pattern for each row */
796b2f1ab58SBarry Smith 
797b2f1ab58SBarry Smith     nnz  = in_matrix.row_nnz[i];
798b2f1ab58SBarry Smith     kend = i + in_matrix.icols[i][nnz - 1];
7992205254eSKarl Rupp     if (maxmrk <= kend) maxmrk = kend + 1;
8009566063dSJacob Faibussowitsch     PetscCall(spbas_mark_row_power(iwork, i, &in_matrix, marker, i, maxmrk));
801b2f1ab58SBarry Smith 
802c328eeadSBarry Smith     /* Count the columns*/
803b2f1ab58SBarry Smith     nnz = 0;
8042205254eSKarl Rupp     for (j = i; j < maxmrk; j++) nnz += (iwork[j] != 0);
805b2f1ab58SBarry Smith 
806c328eeadSBarry Smith     /* Allocate the column indices */
807b2f1ab58SBarry Smith     retval.row_nnz[i] = nnz;
8089566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nnz, &retval.icols[i]));
809b2f1ab58SBarry Smith 
810c328eeadSBarry Smith     /* Administrate the column indices */
811b2f1ab58SBarry Smith     inz = 0;
8126322f4bdSBarry Smith     for (j = i; j < maxmrk; j++) {
8136322f4bdSBarry Smith       if (iwork[j]) {
814b2f1ab58SBarry Smith         retval.icols[i][inz] = j - i;
815b2f1ab58SBarry Smith         inz++;
816b2f1ab58SBarry Smith         iwork[j] = 0;
817b2f1ab58SBarry Smith       }
818b2f1ab58SBarry Smith     }
819b2f1ab58SBarry Smith     retval.nnz += nnz;
820b2f1ab58SBarry Smith   };
8219566063dSJacob Faibussowitsch   PetscCall(PetscFree(iwork));
822b2f1ab58SBarry Smith   *result = retval;
823b2f1ab58SBarry Smith   PetscFunctionReturn(0);
824b2f1ab58SBarry Smith }
825b2f1ab58SBarry Smith 
826b2f1ab58SBarry Smith /*
827b2f1ab58SBarry Smith    spbas_keep_upper:
828b2f1ab58SBarry Smith       remove the lower part of the matrix: keep the upper part
829b2f1ab58SBarry Smith */
8309371c9d4SSatish Balay PetscErrorCode spbas_keep_upper(spbas_matrix *inout_matrix) {
831b2f1ab58SBarry Smith   PetscInt i, j;
832b2f1ab58SBarry Smith   PetscInt jstart;
833b2f1ab58SBarry Smith 
834b2f1ab58SBarry Smith   PetscFunctionBegin;
8355f80ce2aSJacob Faibussowitsch   PetscCheck(!inout_matrix->block_data, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Not yet for block data matrices");
8366322f4bdSBarry Smith   for (i = 0; i < inout_matrix->nrows; i++) {
8376322f4bdSBarry Smith     for (jstart = 0; (jstart < inout_matrix->row_nnz[i]) && (inout_matrix->icols[i][jstart] < 0); jstart++) { }
8386322f4bdSBarry Smith     if (jstart > 0) {
839ad540459SPierre Jolivet       for (j = 0; j < inout_matrix->row_nnz[i] - jstart; j++) inout_matrix->icols[i][j] = inout_matrix->icols[i][j + jstart];
840b2f1ab58SBarry Smith 
8416322f4bdSBarry Smith       if (inout_matrix->values) {
842ad540459SPierre Jolivet         for (j = 0; j < inout_matrix->row_nnz[i] - jstart; j++) inout_matrix->values[i][j] = inout_matrix->values[i][j + jstart];
843b2f1ab58SBarry Smith       }
844b2f1ab58SBarry Smith 
845b2f1ab58SBarry Smith       inout_matrix->row_nnz[i] -= jstart;
846b2f1ab58SBarry Smith 
8476322f4bdSBarry Smith       inout_matrix->icols[i] = (PetscInt *)realloc((void *)inout_matrix->icols[i], inout_matrix->row_nnz[i] * sizeof(PetscInt));
848b2f1ab58SBarry Smith 
849ad540459SPierre Jolivet       if (inout_matrix->values) inout_matrix->values[i] = (PetscScalar *)realloc((void *)inout_matrix->values[i], inout_matrix->row_nnz[i] * sizeof(PetscScalar));
850b2f1ab58SBarry Smith       inout_matrix->nnz -= jstart;
851b2f1ab58SBarry Smith     }
852b2f1ab58SBarry Smith   }
853b2f1ab58SBarry Smith   PetscFunctionReturn(0);
854b2f1ab58SBarry Smith }
855