xref: /petsc/src/mat/impls/sell/seq/sell.h (revision 4e58db63d9a16e1e6d7a6b59b385108b2e578961)
1d4002b98SHong Zhang 
26524c165SJacob Faibussowitsch #ifndef __SELL_H
3d4002b98SHong Zhang #define __SELL_H
4d4002b98SHong Zhang 
5d4002b98SHong Zhang #include <petsc/private/matimpl.h>
6eec179cfSJacob Faibussowitsch #include <petsc/private/hashmapi.h>
7d4002b98SHong Zhang 
8d4002b98SHong Zhang /*
9*4e58db63SHong Zhang  For NVIDIA GPUs each slice should be padded to the boundary of 16 elements for best performance.
10*4e58db63SHong Zhang  The optimal memory alignment in device memory is 128 bytes, 64 bytes, 32 bytes for double precision, single precision and half precision.
11*4e58db63SHong Zhang */
12*4e58db63SHong Zhang #if defined(PETSC_HAVE_DEVICE)
13*4e58db63SHong Zhang   #define DEVICE_MEM_ALIGN 16
14*4e58db63SHong Zhang #endif
15*4e58db63SHong Zhang 
16*4e58db63SHong Zhang /*
17d4002b98SHong Zhang  Struct header for SeqSELL matrix format
18d4002b98SHong Zhang */
19d4002b98SHong Zhang #define SEQSELLHEADER(datatype) \
20d4002b98SHong Zhang   PetscBool    roworiented;        /* if true, row-oriented input, default */ \
21d4002b98SHong Zhang   PetscInt     nonew;              /* 1 don't add new nonzeros, -1 generate error on new */ \
22d4002b98SHong Zhang   PetscInt     nounused;           /* -1 generate error on unused space */ \
23d4002b98SHong Zhang   PetscBool    singlemalloc;       /* if true a, i, and j have been obtained with one big malloc */ \
24d4002b98SHong Zhang   PetscInt     maxallocmat;        /* max allocated space for the matrix */ \
25d4002b98SHong Zhang   PetscInt     maxallocrow;        /* max allocated space for each row */ \
26d4002b98SHong Zhang   PetscInt     nz;                 /* actual nonzeros */ \
27d4002b98SHong Zhang   PetscInt     rlenmax;            /* max actual row length, rmax cannot exceed maxallocrow */ \
28d4002b98SHong Zhang   PetscInt    *rlen;               /* actual length of each row (padding zeros excluded) */ \
29d4002b98SHong Zhang   PetscBool    free_rlen;          /* free rlen array ? */ \
30d4002b98SHong Zhang   PetscInt     reallocs;           /* number of mallocs done during MatSetValues() \
31d4002b98SHong Zhang as more values are set than were prealloced */ \
32d4002b98SHong Zhang   PetscBool    keepnonzeropattern; /* keeps matrix structure same in calls to MatZeroRows()*/ \
33d4002b98SHong Zhang   PetscBool    ignorezeroentries; \
34d4002b98SHong Zhang   PetscBool    free_colidx;    /* free the column indices colidx when the matrix is destroyed */ \
35d4002b98SHong Zhang   PetscBool    free_val;       /* free the numerical values when matrix is destroy */ \
36d4002b98SHong Zhang   PetscInt    *colidx;         /* column index */ \
37d4002b98SHong Zhang   PetscInt    *diag;           /* pointers to diagonal elements */ \
38d4002b98SHong Zhang   PetscInt     nonzerorowcnt;  /* how many rows have nonzero entries */ \
39d4002b98SHong Zhang   PetscBool    free_diag;      /* free diag ? */ \
40d4002b98SHong Zhang   datatype    *val;            /* elements including nonzeros and padding zeros */ \
41d4002b98SHong Zhang   PetscScalar *solve_work;     /* work space used in MatSolve */ \
42d4002b98SHong Zhang   IS           row, col, icol; /* index sets, used for reorderings */ \
43d4002b98SHong Zhang   PetscBool    pivotinblocks;  /* pivot inside factorization of each diagonal block */ \
44d4002b98SHong Zhang   Mat          parent;         /* set if this matrix was formed with MatDuplicate(...,MAT_SHARE_NONZERO_PATTERN,....);
45d4002b98SHong Zhang means that this shares some data structures with the parent including diag, ilen, imax, i, j */ \
46d4002b98SHong Zhang   PetscInt    *sliidx;         /* slice index */ \
476108893eSStefano Zampini   PetscInt     totalslices;    /* total number of slices */ \
4807e43b41SHong Zhang   PetscInt     sliceheight;    /* slice height */ \
4907e43b41SHong Zhang   PetscReal    fillratio;      /* ratio of number of padded zeros over total number of elements  */ \
5007e43b41SHong Zhang   PetscReal    avgslicewidth;  /* average slice width */ \
5107e43b41SHong Zhang   PetscInt     maxslicewidth;  /* maximum slice width */ \
5207e43b41SHong Zhang   PetscInt    *sliperm;        /* slice permutation array, CUDA only */ \
5307e43b41SHong Zhang   PetscInt     totalblocks;    /* total number of blocks, CUDA only */ \
5407e43b41SHong Zhang   PetscInt    *blockidx;       /* block index, CUDA only */ \
5507e43b41SHong Zhang   PetscInt    *block_row_map;  /* starting row of the current block, CUDA only */ \
566108893eSStefano Zampini   PetscInt    *getrowcols;     /* workarray for MatGetRow_SeqSELL */ \
579371c9d4SSatish Balay   PetscScalar *getrowvals      /* workarray for MatGetRow_SeqSELL */
58d4002b98SHong Zhang 
59d4002b98SHong Zhang typedef struct {
60d4002b98SHong Zhang   SEQSELLHEADER(MatScalar);
61d4002b98SHong Zhang   MatScalar   *saved_values;              /* location for stashing nonzero values of matrix */
62d4002b98SHong Zhang   PetscScalar *idiag, *mdiag, *ssor_work; /* inverse of diagonal entries, diagonal values and workspace for Eisenstat trick */
63d4002b98SHong Zhang   PetscBool    idiagvalid;                /* current idiag[] and mdiag[] are valid */
64d4002b98SHong Zhang   PetscScalar  fshift, omega;             /* last used omega and fshift */
65d4002b98SHong Zhang   ISColoring   coloring;                  /* set with MatADSetColoring() used by MatADSetValues() */
66d4002b98SHong Zhang } Mat_SeqSELL;
67d4002b98SHong Zhang 
68d4002b98SHong Zhang /*
69d4002b98SHong Zhang  Frees the arrays from the XSELLPACK matrix type
70d4002b98SHong Zhang  */
71d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatSeqXSELLFreeSELL(Mat AA, MatScalar **val, PetscInt **colidx)
72d71ae5a4SJacob Faibussowitsch {
73d4002b98SHong Zhang   Mat_SeqSELL *A = (Mat_SeqSELL *)AA->data;
74d4002b98SHong Zhang   if (A->singlemalloc) {
759566063dSJacob Faibussowitsch     PetscCall(PetscFree2(*val, *colidx));
76d4002b98SHong Zhang   } else {
779566063dSJacob Faibussowitsch     if (A->free_val) PetscCall(PetscFree(*val));
789566063dSJacob Faibussowitsch     if (A->free_colidx) PetscCall(PetscFree(*colidx));
79d4002b98SHong Zhang   }
803ba16761SJacob Faibussowitsch   return PETSC_SUCCESS;
81d4002b98SHong Zhang }
82d4002b98SHong Zhang 
83*4e58db63SHong Zhang #define MatSeqXSELLReallocateSELL(Amat, AM, BS2, WIDTH, SIDX, SH, SID, ROW, COL, COLIDX, VAL, CP, VP, NONEW, datatype, MUL) \
8407e43b41SHong Zhang   if (WIDTH >= (SIDX[SID + 1] - SIDX[SID]) / SH) { \
85d4002b98SHong Zhang     Mat_SeqSELL *Ain = (Mat_SeqSELL *)Amat->data; \
862d1451d4SHong Zhang     /* there is no extra room in row, therefore enlarge 1 slice column */ \
87*4e58db63SHong Zhang     PetscInt  new_size = Ain->maxallocmat + SH * MUL, *new_colidx; \
88d4002b98SHong Zhang     datatype *new_val; \
89d4002b98SHong Zhang \
9008401ef6SPierre Jolivet     PetscCheck(NONEW != -2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "New nonzero at (%" PetscInt_FMT ",%" PetscInt_FMT ") caused a malloc\nUse MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE) to turn off this check", ROW, COL); \
91d4002b98SHong Zhang     /* malloc new storage space */ \
929566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(BS2 *new_size, &new_val, BS2 *new_size, &new_colidx)); \
93d4002b98SHong Zhang \
94d4002b98SHong Zhang     /* copy over old data into new slots by two steps: one step for data before the current slice and the other for the rest */ \
959566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(new_val, VAL, SIDX[SID + 1])); \
969566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(new_colidx, COLIDX, SIDX[SID + 1])); \
97*4e58db63SHong Zhang     PetscCall(PetscArraycpy(new_val + SIDX[SID + 1] + SH * MUL, VAL + SIDX[SID + 1], SIDX[Ain->totalslices] - SIDX[SID + 1])); \
98*4e58db63SHong Zhang     PetscCall(PetscArraycpy(new_colidx + SIDX[SID + 1] + SH * MUL, COLIDX + SIDX[SID + 1], SIDX[Ain->totalslices] - SIDX[SID + 1])); \
99d4002b98SHong Zhang     /* update slice_idx */ \
100*4e58db63SHong Zhang     for (ii = SID + 1; ii <= Ain->totalslices; ii++) { SIDX[ii] += SH * MUL; } \
1012d1451d4SHong Zhang     /* update pointers. Notice that they point to the FIRST postion of the row */ \
10207e43b41SHong Zhang     CP = new_colidx + SIDX[SID] + (ROW % SH); \
10307e43b41SHong Zhang     VP = new_val + SIDX[SID] + (ROW % SH); \
104d4002b98SHong Zhang     /* free up old matrix storage */ \
1059566063dSJacob Faibussowitsch     PetscCall(MatSeqXSELLFreeSELL(A, &Ain->val, &Ain->colidx)); \
106d4002b98SHong Zhang     Ain->val          = (MatScalar *)new_val; \
107d4002b98SHong Zhang     Ain->colidx       = new_colidx; \
108d4002b98SHong Zhang     Ain->singlemalloc = PETSC_TRUE; \
109d4002b98SHong Zhang     Ain->maxallocmat  = new_size; \
110d4002b98SHong Zhang     Ain->reallocs++; \
111*4e58db63SHong Zhang     if (WIDTH >= Ain->maxallocrow) Ain->maxallocrow += MUL; \
112d4002b98SHong Zhang     if (WIDTH >= Ain->rlenmax) Ain->rlenmax++; \
1139371c9d4SSatish Balay   }
114d4002b98SHong Zhang 
115d4002b98SHong Zhang #define MatSetValue_SeqSELL_Private(A, row, col, value, addv, orow, ocol, cp, vp, lastcol, low, high) \
116d4002b98SHong Zhang   { \
117d4002b98SHong Zhang     Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; \
118d4002b98SHong Zhang     found          = PETSC_FALSE; \
119d4002b98SHong Zhang     if (col <= lastcol) low = 0; \
120d4002b98SHong Zhang     else high = a->rlen[row]; \
121d4002b98SHong Zhang     lastcol = col; \
122d4002b98SHong Zhang     while (high - low > 5) { \
123d4002b98SHong Zhang       t = (low + high) / 2; \
12407e43b41SHong Zhang       if (*(cp + a->sliceheight * t) > col) high = t; \
125d4002b98SHong Zhang       else low = t; \
126d4002b98SHong Zhang     } \
127d4002b98SHong Zhang     for (_i = low; _i < high; _i++) { \
12807e43b41SHong Zhang       if (*(cp + a->sliceheight * _i) > col) break; \
12907e43b41SHong Zhang       if (*(cp + a->sliceheight * _i) == col) { \
13007e43b41SHong Zhang         if (addv == ADD_VALUES) *(vp + a->sliceheight * _i) += value; \
13107e43b41SHong Zhang         else *(vp + a->sliceheight * _i) = value; \
132d4002b98SHong Zhang         found = PETSC_TRUE; \
133d4002b98SHong Zhang         break; \
134d4002b98SHong Zhang       } \
135d4002b98SHong Zhang     } \
136d4002b98SHong Zhang     if (!found) { \
13708401ef6SPierre Jolivet       PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
13807e43b41SHong Zhang       if (a->nonew != 1 && !(value == 0.0 && a->ignorezeroentries) && a->rlen[row] >= (a->sliidx[row / a->sliceheight + 1] - a->sliidx[row / a->sliceheight]) / a->sliceheight) { \
1392d1451d4SHong Zhang         /* there is no extra room in row, therefore enlarge 1 slice column */ \
14007e43b41SHong Zhang         if (a->maxallocmat < a->sliidx[a->totalslices] + a->sliceheight) { \
141d4002b98SHong Zhang           /* allocates a larger array for the XSELL matrix types; only extend the current slice by one more column. */ \
14207e43b41SHong Zhang           PetscInt   new_size = a->maxallocmat + a->sliceheight, *new_colidx; \
143d4002b98SHong Zhang           MatScalar *new_val; \
14408401ef6SPierre Jolivet           PetscCheck(a->nonew != -2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "New nonzero at (%" PetscInt_FMT ",%" PetscInt_FMT ") caused a malloc\nUse MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE) to turn off this check", orow, ocol); \
145d4002b98SHong Zhang           /* malloc new storage space */ \
1469566063dSJacob Faibussowitsch           PetscCall(PetscMalloc2(new_size, &new_val, new_size, &new_colidx)); \
147d4002b98SHong Zhang           /* copy over old data into new slots by two steps: one step for data before the current slice and the other for the rest */ \
14807e43b41SHong Zhang           PetscCall(PetscArraycpy(new_val, a->val, a->sliidx[row / a->sliceheight + 1])); \
14907e43b41SHong Zhang           PetscCall(PetscArraycpy(new_colidx, a->colidx, a->sliidx[row / a->sliceheight + 1])); \
15007e43b41SHong Zhang           PetscCall(PetscArraycpy(new_val + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->val + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \
15107e43b41SHong Zhang           PetscCall(PetscArraycpy(new_colidx + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->colidx + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \
1522d1451d4SHong Zhang           /* update pointers. Notice that they point to the FIRST postion of the row */ \
15307e43b41SHong Zhang           cp = new_colidx + a->sliidx[row / a->sliceheight] + (row % a->sliceheight); \
15407e43b41SHong Zhang           vp = new_val + a->sliidx[row / a->sliceheight] + (row % a->sliceheight); \
155d4002b98SHong Zhang           /* free up old matrix storage */ \
1569566063dSJacob Faibussowitsch           PetscCall(MatSeqXSELLFreeSELL(A, &a->val, &a->colidx)); \
157d4002b98SHong Zhang           a->val          = (MatScalar *)new_val; \
158d4002b98SHong Zhang           a->colidx       = new_colidx; \
159d4002b98SHong Zhang           a->singlemalloc = PETSC_TRUE; \
160d4002b98SHong Zhang           a->maxallocmat  = new_size; \
161d4002b98SHong Zhang           a->reallocs++; \
162d4002b98SHong Zhang         } else { \
163d4002b98SHong Zhang           /* no need to reallocate, just shift the following slices to create space for the added slice column */ \
16407e43b41SHong Zhang           PetscCall(PetscArraymove(a->val + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->val + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \
16507e43b41SHong Zhang           PetscCall(PetscArraymove(a->colidx + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->colidx + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \
166d4002b98SHong Zhang         } \
167d4002b98SHong Zhang         /* update slice_idx */ \
16807e43b41SHong Zhang         for (ii = row / a->sliceheight + 1; ii <= a->totalslices; ii++) a->sliidx[ii] += a->sliceheight; \
169d4002b98SHong Zhang         if (a->rlen[row] >= a->maxallocrow) a->maxallocrow++; \
170d4002b98SHong Zhang         if (a->rlen[row] >= a->rlenmax) a->rlenmax++; \
171d4002b98SHong Zhang       } \
172d4002b98SHong Zhang       /* shift up all the later entries in this row */ \
173d4002b98SHong Zhang       for (ii = a->rlen[row] - 1; ii >= _i; ii--) { \
17407e43b41SHong Zhang         *(cp + a->sliceheight * (ii + 1)) = *(cp + a->sliceheight * ii); \
17507e43b41SHong Zhang         *(vp + a->sliceheight * (ii + 1)) = *(vp + a->sliceheight * ii); \
176d4002b98SHong Zhang       } \
17707e43b41SHong Zhang       *(cp + a->sliceheight * _i) = col; \
17807e43b41SHong Zhang       *(vp + a->sliceheight * _i) = value; \
1799371c9d4SSatish Balay       a->nz++; \
1809371c9d4SSatish Balay       a->rlen[row]++; \
1819371c9d4SSatish Balay       A->nonzerostate++; \
1829371c9d4SSatish Balay       low = _i + 1; \
1839371c9d4SSatish Balay       high++; \
184d4002b98SHong Zhang     } \
1859371c9d4SSatish Balay   }
186d4002b98SHong Zhang 
187d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat, PetscInt, const PetscInt[]);
188d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatMult_SeqSELL(Mat, Vec, Vec);
189d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatMultAdd_SeqSELL(Mat, Vec, Vec, Vec);
190d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatMultTranspose_SeqSELL(Mat, Vec, Vec);
191d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat, Vec, Vec, Vec);
192d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatMissingDiagonal_SeqSELL(Mat, PetscBool *, PetscInt *);
193d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatMarkDiagonal_SeqSELL(Mat);
194d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatInvertDiagonal_SeqSELL(Mat, PetscScalar, PetscScalar);
195d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatZeroEntries_SeqSELL(Mat);
196d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatDestroy_SeqSELL(Mat);
197d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatSetOption_SeqSELL(Mat, MatOption, PetscBool);
198d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatGetDiagonal_SeqSELL(Mat, Vec v);
199d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatGetValues_SeqSELL(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], PetscScalar[]);
200d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatView_SeqSELL(Mat, PetscViewer);
201d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatAssemblyEnd_SeqSELL(Mat, MatAssemblyType);
202d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatGetInfo_SeqSELL(Mat, MatInfoType, MatInfo *);
203d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatSetValues_SeqSELL(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
204d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatCopy_SeqSELL(Mat, Mat, MatStructure);
205d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatSetUp_SeqSELL(Mat);
206d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat, PetscScalar *[]);
207d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat, PetscScalar *[]);
208d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatShift_SeqSELL(Mat, PetscScalar);
209d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatSOR_SeqSELL(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec);
210d4002b98SHong Zhang PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat);
211d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatDuplicate_SeqSELL(Mat, MatDuplicateOption, Mat *);
212d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatEqual_SeqSELL(Mat, Mat, PetscBool *);
213d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat);
214d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat, MatType, MatReuse, Mat *);
215d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat, MatType, MatReuse, Mat *);
216d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatFDColoringCreate_SeqSELL(Mat, ISColoring, MatFDColoring);
217d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatFDColoringSetUp_SeqSELL(Mat, ISColoring, MatFDColoring);
218d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatGetColumnIJ_SeqSELL_Color(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscInt *[], PetscBool *);
219d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatRestoreColumnIJ_SeqSELL_Color(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscInt *[], PetscBool *);
220d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatConjugate_SeqSELL(Mat A);
221d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatScale_SeqSELL(Mat, PetscScalar);
222d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatDiagonalScale_SeqSELL(Mat, Vec, Vec);
223d4002b98SHong Zhang #endif
224