xref: /petsc/src/mat/impls/sell/seq/sell.h (revision 07425a8d4172ec73b7b53c5ce4d6ba1b92fe45cf)
1 #pragma once
2 
3 #include <petsc/private/matimpl.h>
4 #include <petsc/private/hashmapi.h>
5 
6 /*
7  For NVIDIA GPUs each slice should be padded to the boundary of 16 elements for best performance.
8  The optimal memory alignment in device memory is 128 bytes, 64 bytes, 32 bytes for double precision, single precision and half precision.
9 */
10 #if defined(PETSC_HAVE_DEVICE)
11   #define DEVICE_MEM_ALIGN 16
12 #endif
13 
14 /*
15  Struct header for SeqSELL matrix format
16 */
17 #define SEQSELLHEADER(datatype) \
18   PetscBool        roworiented;        /* if true, row-oriented input, default */ \
19   PetscInt         nonew;              /* 1 don't add new nonzeros, -1 generate error on new */ \
20   PetscInt         nounused;           /* -1 generate error on unused space */ \
21   PetscBool        singlemalloc;       /* if true a, i, and j have been obtained with one big malloc */ \
22   PetscInt         maxallocmat;        /* max allocated space for the matrix */ \
23   PetscInt         maxallocrow;        /* max allocated space for each row */ \
24   PetscInt         nz;                 /* actual nonzeros */ \
25   PetscInt         rlenmax;            /* max actual row length, rmax cannot exceed maxallocrow */ \
26   PetscInt        *rlen;               /* actual length of each row (padding zeros excluded) */ \
27   PetscBool        free_rlen;          /* free rlen array ? */ \
28   PetscInt         reallocs;           /* number of mallocs done during MatSetValues() \
29 as more values are set than were prealloced */ \
30   PetscBool        keepnonzeropattern; /* keeps matrix nonzero structure the same in calls to MatZeroRows()*/ \
31   PetscBool        ignorezeroentries; \
32   PetscBool        free_colidx;      /* free the column indices colidx when the matrix is destroyed */ \
33   PetscBool        free_val;         /* free the numerical values when matrix is destroy */ \
34   PetscInt        *colidx;           /* column index */ \
35   PetscInt        *diag;             /* pointers to diagonal elements */ \
36   PetscObjectState diagNonzeroState; /* nonzero state of the matrix when diag was obtained */ \
37   PetscBool        diagDense;        /* matrix contains all the diagonal entries */ \
38   PetscInt         nonzerorowcnt;    /* how many rows have nonzero entries */ \
39   PetscBool        free_diag;        /* free diag ? */ \
40   datatype        *val;              /* elements including nonzeros and padding zeros */ \
41   PetscScalar     *solve_work;       /* work space used in MatSolve */ \
42   IS               row, col, icol;   /* index sets, used for reorderings */ \
43   PetscBool        pivotinblocks;    /* pivot inside factorization of each diagonal block */ \
44   Mat              parent;           /* set if this matrix was formed with MatDuplicate(...,MAT_SHARE_NONZERO_PATTERN,....);
45 means that this shares some data structures with the parent including diag, ilen, imax, i, j */ \
46   PetscInt        *sliidx;           /* slice index */ \
47   PetscInt         totalslices;      /* total number of slices */ \
48   PetscInt         sliceheight;      /* slice height */ \
49   PetscReal        fillratio;        /* ratio of number of padded zeros over total number of elements  */ \
50   PetscReal        avgslicewidth;    /* average slice width */ \
51   PetscInt         maxslicewidth;    /* maximum slice width */ \
52   PetscReal        varslicesize;     /* variance of slice size */ \
53   PetscInt        *sliperm;          /* slice permutation array, CUDA only */ \
54   PetscInt         totalblocks;      /* total number of blocks, CUDA only */ \
55   PetscInt        *blockidx;         /* block index, CUDA only */ \
56   PetscInt        *block_row_map;    /* starting row of the current block, CUDA only */ \
57   PetscInt         chunksize;        /* chunk size, CUDA only */ \
58   PetscInt         totalchunks;      /* total number of chunks, CUDA only */ \
59   PetscInt        *chunk_slice_map;  /* starting slice of the current chunk, CUDA only */ \
60   PetscInt        *getrowcols;       /* workarray for MatGetRow_SeqSELL */ \
61   PetscScalar     *getrowvals        /* workarray for MatGetRow_SeqSELL */
62 
63 typedef struct {
64   SEQSELLHEADER(MatScalar);
65   MatScalar *saved_values; /* location for stashing nonzero values of matrix */
66 
67   /* data needed for MatSOR_SeqAIJ() */
68   PetscScalar     *mdiag, *idiag; /* diagonal values, inverse of diagonal entries */
69   PetscScalar     *ssor_work;     /* workspace for Eisenstat trick */
70   PetscObjectState idiagState;    /* state of the matrix when mdiag and idiag was obtained */
71   PetscScalar      fshift, omega; /* last used omega and fshift */
72 
73   ISColoring coloring; /* set with MatADSetColoring() used by MatADSetValues() */
74 } Mat_SeqSELL;
75 
76 /*
77  Frees the arrays from the XSELLPACK matrix type
78  */
79 static inline PetscErrorCode MatSeqXSELLFreeSELL(Mat AA, MatScalar **val, PetscInt **colidx)
80 {
81   Mat_SeqSELL *A = (Mat_SeqSELL *)AA->data;
82   if (A->singlemalloc) {
83     PetscCall(PetscFree2(*val, *colidx));
84   } else {
85     if (A->free_val) PetscCall(PetscFree(*val));
86     if (A->free_colidx) PetscCall(PetscFree(*colidx));
87   }
88   return PETSC_SUCCESS;
89 }
90 
91 #define MatSeqXSELLReallocateSELL(Amat, AM, BS2, WIDTH, SIDX, SH, SID, ROW, COL, COLIDX, VAL, CP, VP, NONEW, datatype, MUL) \
92   do { \
93     if (WIDTH >= (SIDX[SID + 1] - SIDX[SID]) / SH) { \
94       Mat_SeqSELL *Ain = (Mat_SeqSELL *)Amat->data; \
95       /* there is no extra room in row, therefore enlarge 1 slice column */ \
96       PetscInt  new_size = Ain->maxallocmat + SH * MUL, *new_colidx; \
97       datatype *new_val; \
98 \
99       PetscCheck(NONEW != -2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "New nonzero at (%" PetscInt_FMT ",%" PetscInt_FMT ") caused a malloc. Use MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE) to turn off this check", ROW, COL); \
100       /* malloc new storage space */ \
101       PetscCall(PetscMalloc2(BS2 * new_size, &new_val, BS2 * new_size, &new_colidx)); \
102 \
103       /* copy over old data into new slots by two steps: one step for data before the current slice and the other for the rest */ \
104       PetscCall(PetscArraycpy(new_val, VAL, SIDX[SID + 1])); \
105       PetscCall(PetscArraycpy(new_colidx, COLIDX, SIDX[SID + 1])); \
106       PetscCall(PetscArraycpy(new_val + SIDX[SID + 1] + SH * MUL, VAL + SIDX[SID + 1], SIDX[Ain->totalslices] - SIDX[SID + 1])); \
107       PetscCall(PetscArraycpy(new_colidx + SIDX[SID + 1] + SH * MUL, COLIDX + SIDX[SID + 1], SIDX[Ain->totalslices] - SIDX[SID + 1])); \
108       /* update slice_idx */ \
109       for (ii = SID + 1; ii <= Ain->totalslices; ii++) SIDX[ii] += SH * MUL; \
110       /* update pointers. Notice that they point to the FIRST position of the row */ \
111       CP = new_colidx + SIDX[SID] + (ROW % SH); \
112       VP = new_val + SIDX[SID] + (ROW % SH); \
113       /* free up old matrix storage */ \
114       PetscCall(MatSeqXSELLFreeSELL(A, &Ain->val, &Ain->colidx)); \
115       Ain->val          = new_val; \
116       Ain->colidx       = new_colidx; \
117       Ain->singlemalloc = PETSC_TRUE; \
118       Ain->maxallocmat  = new_size; \
119       Ain->reallocs++; \
120       A->nonzerostate++; \
121       if (WIDTH >= Ain->maxallocrow) Ain->maxallocrow += MUL; \
122       if (WIDTH >= Ain->rlenmax) Ain->rlenmax++; \
123     } \
124   } while (0)
125 
126 #define MatSetValue_SeqSELL_Private(A, row, col, value, addv, orow, ocol, cp, vp, lastcol, low, high) \
127   do { \
128     Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; \
129     found          = PETSC_FALSE; \
130     if (col <= lastcol) low = 0; \
131     else high = a->rlen[row]; \
132     lastcol = col; \
133     while (high - low > 5) { \
134       t = (low + high) / 2; \
135       if (*(cp + a->sliceheight * t) > col) high = t; \
136       else low = t; \
137     } \
138     for (_i = low; _i < high; _i++) { \
139       if (*(cp + a->sliceheight * _i) > col) break; \
140       if (*(cp + a->sliceheight * _i) == col) { \
141         if (addv == ADD_VALUES) *(vp + a->sliceheight * _i) += value; \
142         else *(vp + a->sliceheight * _i) = value; \
143         found = PETSC_TRUE; \
144         break; \
145       } \
146     } \
147     if (!found) { \
148       PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
149       if (a->nonew != 1 && !(value == 0.0 && a->ignorezeroentries) && a->rlen[row] >= (a->sliidx[row / a->sliceheight + 1] - a->sliidx[row / a->sliceheight]) / a->sliceheight) { \
150         /* there is no extra room in row, therefore enlarge 1 slice column */ \
151         if (a->maxallocmat < a->sliidx[a->totalslices] + a->sliceheight) { \
152           /* allocates a larger array for the XSELL matrix types; only extend the current slice by one more column. */ \
153           PetscInt   new_size = a->maxallocmat + a->sliceheight, *new_colidx; \
154           MatScalar *new_val; \
155           PetscCheck(a->nonew != -2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "New nonzero at (%" PetscInt_FMT ",%" PetscInt_FMT ") caused a malloc. Use MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE) to turn off this check", orow, ocol); \
156           /* malloc new storage space */ \
157           PetscCall(PetscMalloc2(new_size, &new_val, new_size, &new_colidx)); \
158           /* copy over old data into new slots by two steps: one step for data before the current slice and the other for the rest */ \
159           PetscCall(PetscArraycpy(new_val, a->val, a->sliidx[row / a->sliceheight + 1])); \
160           PetscCall(PetscArraycpy(new_colidx, a->colidx, a->sliidx[row / a->sliceheight + 1])); \
161           PetscCall(PetscArraycpy(new_val + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, PetscSafePointerPlusOffset(a->val, a->sliidx[row / a->sliceheight + 1]), a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \
162           PetscCall(PetscArraycpy(new_colidx + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, PetscSafePointerPlusOffset(a->colidx, a->sliidx[row / a->sliceheight + 1]), a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \
163           /* update pointers. Notice that they point to the FIRST position of the row */ \
164           cp = new_colidx + a->sliidx[row / a->sliceheight] + (row % a->sliceheight); \
165           vp = new_val + a->sliidx[row / a->sliceheight] + (row % a->sliceheight); \
166           /* free up old matrix storage */ \
167           PetscCall(MatSeqXSELLFreeSELL(A, &a->val, &a->colidx)); \
168           a->val          = new_val; \
169           a->colidx       = new_colidx; \
170           a->singlemalloc = PETSC_TRUE; \
171           a->maxallocmat  = new_size; \
172           a->reallocs++; \
173         } else { \
174           /* no need to reallocate, just shift the following slices to create space for the added slice column */ \
175           PetscCall(PetscArraymove(a->val + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->val + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \
176           PetscCall(PetscArraymove(a->colidx + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->colidx + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \
177         } \
178         /* update slice_idx */ \
179         for (ii = row / a->sliceheight + 1; ii <= a->totalslices; ii++) a->sliidx[ii] += a->sliceheight; \
180         if (a->rlen[row] >= a->maxallocrow) a->maxallocrow++; \
181         if (a->rlen[row] >= a->rlenmax) a->rlenmax++; \
182       } \
183       /* shift up all the later entries in this row */ \
184       for (ii = a->rlen[row] - 1; ii >= _i; ii--) { \
185         *(cp + a->sliceheight * (ii + 1)) = *(cp + a->sliceheight * ii); \
186         *(vp + a->sliceheight * (ii + 1)) = *(vp + a->sliceheight * ii); \
187       } \
188       *(cp + a->sliceheight * _i) = col; \
189       *(vp + a->sliceheight * _i) = value; \
190       a->nz++; \
191       a->rlen[row]++; \
192       A->nonzerostate++; \
193       low = _i + 1; \
194       high++; \
195     } \
196   } while (0)
197 
198 #define PetscCeilIntMacro(x, y) ((((PetscInt)(x)) / ((PetscInt)(y))) + ((((PetscInt)(x)) % ((PetscInt)(y))) ? 1 : 0))
199 
200 PETSC_INTERN PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat, PetscInt, const PetscInt[]);
201 PETSC_INTERN PetscErrorCode MatMult_SeqSELL(Mat, Vec, Vec);
202 PETSC_INTERN PetscErrorCode MatMultAdd_SeqSELL(Mat, Vec, Vec, Vec);
203 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqSELL(Mat, Vec, Vec);
204 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat, Vec, Vec, Vec);
205 PETSC_INTERN PetscErrorCode MatMissingDiagonal_SeqSELL(Mat, PetscBool *, PetscInt *);
206 PETSC_INTERN PetscErrorCode MatMarkDiagonal_SeqSELL(Mat);
207 PETSC_INTERN PetscErrorCode MatZeroEntries_SeqSELL(Mat);
208 PETSC_INTERN PetscErrorCode MatDestroy_SeqSELL(Mat);
209 PETSC_INTERN PetscErrorCode MatSetOption_SeqSELL(Mat, MatOption, PetscBool);
210 PETSC_INTERN PetscErrorCode MatGetDiagonal_SeqSELL(Mat, Vec v);
211 PETSC_INTERN PetscErrorCode MatGetValues_SeqSELL(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], PetscScalar[]);
212 PETSC_INTERN PetscErrorCode MatView_SeqSELL(Mat, PetscViewer);
213 PETSC_INTERN PetscErrorCode MatAssemblyEnd_SeqSELL(Mat, MatAssemblyType);
214 PETSC_INTERN PetscErrorCode MatGetInfo_SeqSELL(Mat, MatInfoType, MatInfo *);
215 PETSC_INTERN PetscErrorCode MatSetValues_SeqSELL(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
216 PETSC_INTERN PetscErrorCode MatCopy_SeqSELL(Mat, Mat, MatStructure);
217 PETSC_INTERN PetscErrorCode MatSetUp_SeqSELL(Mat);
218 PETSC_INTERN PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat, PetscScalar *[]);
219 PETSC_INTERN PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat, PetscScalar *[]);
220 PETSC_INTERN PetscErrorCode MatShift_SeqSELL(Mat, PetscScalar);
221 PETSC_INTERN PetscErrorCode MatSOR_SeqSELL(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec);
222 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat);
223 PETSC_INTERN PetscErrorCode MatDuplicate_SeqSELL(Mat, MatDuplicateOption, Mat *);
224 PETSC_INTERN PetscErrorCode MatEqual_SeqSELL(Mat, Mat, PetscBool *);
225 PETSC_INTERN PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat, MatType, MatReuse, Mat *);
226 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat, MatType, MatReuse, Mat *);
227 PETSC_INTERN PetscErrorCode MatFDColoringCreate_SeqSELL(Mat, ISColoring, MatFDColoring);
228 PETSC_INTERN PetscErrorCode MatFDColoringSetUp_SeqSELL(Mat, ISColoring, MatFDColoring);
229 PETSC_INTERN PetscErrorCode MatGetColumnIJ_SeqSELL_Color(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscInt *[], PetscBool *);
230 PETSC_INTERN PetscErrorCode MatRestoreColumnIJ_SeqSELL_Color(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscInt *[], PetscBool *);
231 PETSC_INTERN PetscErrorCode MatConjugate_SeqSELL(Mat A);
232 PETSC_INTERN PetscErrorCode MatScale_SeqSELL(Mat, PetscScalar);
233 PETSC_INTERN PetscErrorCode MatDiagonalScale_SeqSELL(Mat, Vec, Vec);
234