1 #pragma once 2 3 #include <petsc/private/matimpl.h> 4 #include <petsc/private/hashmapi.h> 5 6 /* 7 For NVIDIA GPUs each slice should be padded to the boundary of 16 elements for best performance. 8 The optimal memory alignment in device memory is 128 bytes, 64 bytes, 32 bytes for double precision, single precision and half precision. 9 */ 10 #if defined(PETSC_HAVE_DEVICE) 11 #define DEVICE_MEM_ALIGN 16 12 #endif 13 14 /* 15 Struct header for SeqSELL matrix format 16 */ 17 #define SEQSELLHEADER(datatype) \ 18 PetscBool roworiented; /* if true, row-oriented input, default */ \ 19 PetscInt nonew; /* 1 don't add new nonzeros, -1 generate error on new */ \ 20 PetscInt nounused; /* -1 generate error on unused space */ \ 21 PetscBool singlemalloc; /* if true a, i, and j have been obtained with one big malloc */ \ 22 PetscInt maxallocmat; /* max allocated space for the matrix */ \ 23 PetscInt maxallocrow; /* max allocated space for each row */ \ 24 PetscInt nz; /* actual nonzeros */ \ 25 PetscInt rlenmax; /* max actual row length, rmax cannot exceed maxallocrow */ \ 26 PetscInt *rlen; /* actual length of each row (padding zeros excluded) */ \ 27 PetscBool free_rlen; /* free rlen array ? */ \ 28 PetscInt reallocs; /* number of mallocs done during MatSetValues() \ 29 as more values are set than were prealloced */ \ 30 PetscBool keepnonzeropattern; /* keeps matrix nonzero structure the same in calls to MatZeroRows()*/ \ 31 PetscBool ignorezeroentries; \ 32 PetscBool free_colidx; /* free the column indices colidx when the matrix is destroyed */ \ 33 PetscBool free_val; /* free the numerical values when matrix is destroy */ \ 34 PetscInt *colidx; /* column index */ \ 35 PetscInt *diag; /* pointers to diagonal elements */ \ 36 PetscObjectState diagNonzeroState; /* nonzero state of the matrix when diag was obtained */ \ 37 PetscBool diagDense; /* matrix contains all the diagonal entries */ \ 38 PetscInt nonzerorowcnt; /* how many rows have nonzero entries */ \ 39 PetscBool free_diag; /* free diag ? */ \ 40 datatype *val; /* elements including nonzeros and padding zeros */ \ 41 PetscScalar *solve_work; /* work space used in MatSolve */ \ 42 IS row, col, icol; /* index sets, used for reorderings */ \ 43 PetscBool pivotinblocks; /* pivot inside factorization of each diagonal block */ \ 44 Mat parent; /* set if this matrix was formed with MatDuplicate(...,MAT_SHARE_NONZERO_PATTERN,....); 45 means that this shares some data structures with the parent including diag, ilen, imax, i, j */ \ 46 PetscInt *sliidx; /* slice index */ \ 47 PetscInt totalslices; /* total number of slices */ \ 48 PetscInt sliceheight; /* slice height */ \ 49 PetscReal fillratio; /* ratio of number of padded zeros over total number of elements */ \ 50 PetscReal avgslicewidth; /* average slice width */ \ 51 PetscInt maxslicewidth; /* maximum slice width */ \ 52 PetscReal varslicesize; /* variance of slice size */ \ 53 PetscInt *sliperm; /* slice permutation array, CUDA only */ \ 54 PetscInt totalblocks; /* total number of blocks, CUDA only */ \ 55 PetscInt *blockidx; /* block index, CUDA only */ \ 56 PetscInt *block_row_map; /* starting row of the current block, CUDA only */ \ 57 PetscInt chunksize; /* chunk size, CUDA only */ \ 58 PetscInt totalchunks; /* total number of chunks, CUDA only */ \ 59 PetscInt *chunk_slice_map; /* starting slice of the current chunk, CUDA only */ \ 60 PetscInt *getrowcols; /* workarray for MatGetRow_SeqSELL */ \ 61 PetscScalar *getrowvals /* workarray for MatGetRow_SeqSELL */ 62 63 typedef struct { 64 SEQSELLHEADER(MatScalar); 65 MatScalar *saved_values; /* location for stashing nonzero values of matrix */ 66 67 /* data needed for MatSOR_SeqAIJ() */ 68 PetscScalar *mdiag, *idiag; /* diagonal values, inverse of diagonal entries */ 69 PetscScalar *ssor_work; /* workspace for Eisenstat trick */ 70 PetscObjectState idiagState; /* state of the matrix when mdiag and idiag was obtained */ 71 PetscScalar fshift, omega; /* last used omega and fshift */ 72 73 ISColoring coloring; /* set with MatADSetColoring() used by MatADSetValues() */ 74 } Mat_SeqSELL; 75 76 /* 77 Frees the arrays from the XSELLPACK matrix type 78 */ 79 static inline PetscErrorCode MatSeqXSELLFreeSELL(Mat AA, MatScalar **val, PetscInt **colidx) 80 { 81 Mat_SeqSELL *A = (Mat_SeqSELL *)AA->data; 82 if (A->singlemalloc) { 83 PetscCall(PetscFree2(*val, *colidx)); 84 } else { 85 if (A->free_val) PetscCall(PetscFree(*val)); 86 if (A->free_colidx) PetscCall(PetscFree(*colidx)); 87 } 88 return PETSC_SUCCESS; 89 } 90 91 #define MatSeqXSELLReallocateSELL(Amat, AM, BS2, WIDTH, SIDX, SH, SID, ROW, COL, COLIDX, VAL, CP, VP, NONEW, datatype, MUL) \ 92 do { \ 93 if (WIDTH >= (SIDX[SID + 1] - SIDX[SID]) / SH) { \ 94 Mat_SeqSELL *Ain = (Mat_SeqSELL *)Amat->data; \ 95 /* there is no extra room in row, therefore enlarge 1 slice column */ \ 96 PetscInt new_size = Ain->maxallocmat + SH * MUL, *new_colidx; \ 97 datatype *new_val; \ 98 \ 99 PetscCheck(NONEW != -2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "New nonzero at (%" PetscInt_FMT ",%" PetscInt_FMT ") caused a malloc. Use MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE) to turn off this check", ROW, COL); \ 100 /* malloc new storage space */ \ 101 PetscCall(PetscMalloc2(BS2 * new_size, &new_val, BS2 * new_size, &new_colidx)); \ 102 \ 103 /* copy over old data into new slots by two steps: one step for data before the current slice and the other for the rest */ \ 104 PetscCall(PetscArraycpy(new_val, VAL, SIDX[SID + 1])); \ 105 PetscCall(PetscArraycpy(new_colidx, COLIDX, SIDX[SID + 1])); \ 106 PetscCall(PetscArraycpy(new_val + SIDX[SID + 1] + SH * MUL, VAL + SIDX[SID + 1], SIDX[Ain->totalslices] - SIDX[SID + 1])); \ 107 PetscCall(PetscArraycpy(new_colidx + SIDX[SID + 1] + SH * MUL, COLIDX + SIDX[SID + 1], SIDX[Ain->totalslices] - SIDX[SID + 1])); \ 108 /* update slice_idx */ \ 109 for (ii = SID + 1; ii <= Ain->totalslices; ii++) SIDX[ii] += SH * MUL; \ 110 /* update pointers. Notice that they point to the FIRST position of the row */ \ 111 CP = new_colidx + SIDX[SID] + (ROW % SH); \ 112 VP = new_val + SIDX[SID] + (ROW % SH); \ 113 /* free up old matrix storage */ \ 114 PetscCall(MatSeqXSELLFreeSELL(A, &Ain->val, &Ain->colidx)); \ 115 Ain->val = new_val; \ 116 Ain->colidx = new_colidx; \ 117 Ain->singlemalloc = PETSC_TRUE; \ 118 Ain->maxallocmat = new_size; \ 119 Ain->reallocs++; \ 120 A->nonzerostate++; \ 121 if (WIDTH >= Ain->maxallocrow) Ain->maxallocrow += MUL; \ 122 if (WIDTH >= Ain->rlenmax) Ain->rlenmax++; \ 123 } \ 124 } while (0) 125 126 #define MatSetValue_SeqSELL_Private(A, row, col, value, addv, orow, ocol, cp, vp, lastcol, low, high) \ 127 do { \ 128 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; \ 129 found = PETSC_FALSE; \ 130 if (col <= lastcol) low = 0; \ 131 else high = a->rlen[row]; \ 132 lastcol = col; \ 133 while (high - low > 5) { \ 134 t = (low + high) / 2; \ 135 if (*(cp + a->sliceheight * t) > col) high = t; \ 136 else low = t; \ 137 } \ 138 for (_i = low; _i < high; _i++) { \ 139 if (*(cp + a->sliceheight * _i) > col) break; \ 140 if (*(cp + a->sliceheight * _i) == col) { \ 141 if (addv == ADD_VALUES) *(vp + a->sliceheight * _i) += value; \ 142 else *(vp + a->sliceheight * _i) = value; \ 143 found = PETSC_TRUE; \ 144 break; \ 145 } \ 146 } \ 147 if (!found) { \ 148 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 149 if (a->nonew != 1 && !(value == 0.0 && a->ignorezeroentries) && a->rlen[row] >= (a->sliidx[row / a->sliceheight + 1] - a->sliidx[row / a->sliceheight]) / a->sliceheight) { \ 150 /* there is no extra room in row, therefore enlarge 1 slice column */ \ 151 if (a->maxallocmat < a->sliidx[a->totalslices] + a->sliceheight) { \ 152 /* allocates a larger array for the XSELL matrix types; only extend the current slice by one more column. */ \ 153 PetscInt new_size = a->maxallocmat + a->sliceheight, *new_colidx; \ 154 MatScalar *new_val; \ 155 PetscCheck(a->nonew != -2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "New nonzero at (%" PetscInt_FMT ",%" PetscInt_FMT ") caused a malloc. Use MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE) to turn off this check", orow, ocol); \ 156 /* malloc new storage space */ \ 157 PetscCall(PetscMalloc2(new_size, &new_val, new_size, &new_colidx)); \ 158 /* copy over old data into new slots by two steps: one step for data before the current slice and the other for the rest */ \ 159 PetscCall(PetscArraycpy(new_val, a->val, a->sliidx[row / a->sliceheight + 1])); \ 160 PetscCall(PetscArraycpy(new_colidx, a->colidx, a->sliidx[row / a->sliceheight + 1])); \ 161 PetscCall(PetscArraycpy(new_val + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, PetscSafePointerPlusOffset(a->val, a->sliidx[row / a->sliceheight + 1]), a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \ 162 PetscCall(PetscArraycpy(new_colidx + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, PetscSafePointerPlusOffset(a->colidx, a->sliidx[row / a->sliceheight + 1]), a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \ 163 /* update pointers. Notice that they point to the FIRST position of the row */ \ 164 cp = new_colidx + a->sliidx[row / a->sliceheight] + (row % a->sliceheight); \ 165 vp = new_val + a->sliidx[row / a->sliceheight] + (row % a->sliceheight); \ 166 /* free up old matrix storage */ \ 167 PetscCall(MatSeqXSELLFreeSELL(A, &a->val, &a->colidx)); \ 168 a->val = new_val; \ 169 a->colidx = new_colidx; \ 170 a->singlemalloc = PETSC_TRUE; \ 171 a->maxallocmat = new_size; \ 172 a->reallocs++; \ 173 } else { \ 174 /* no need to reallocate, just shift the following slices to create space for the added slice column */ \ 175 PetscCall(PetscArraymove(a->val + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->val + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \ 176 PetscCall(PetscArraymove(a->colidx + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->colidx + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \ 177 } \ 178 /* update slice_idx */ \ 179 for (ii = row / a->sliceheight + 1; ii <= a->totalslices; ii++) a->sliidx[ii] += a->sliceheight; \ 180 if (a->rlen[row] >= a->maxallocrow) a->maxallocrow++; \ 181 if (a->rlen[row] >= a->rlenmax) a->rlenmax++; \ 182 } \ 183 /* shift up all the later entries in this row */ \ 184 for (ii = a->rlen[row] - 1; ii >= _i; ii--) { \ 185 *(cp + a->sliceheight * (ii + 1)) = *(cp + a->sliceheight * ii); \ 186 *(vp + a->sliceheight * (ii + 1)) = *(vp + a->sliceheight * ii); \ 187 } \ 188 *(cp + a->sliceheight * _i) = col; \ 189 *(vp + a->sliceheight * _i) = value; \ 190 a->nz++; \ 191 a->rlen[row]++; \ 192 A->nonzerostate++; \ 193 low = _i + 1; \ 194 high++; \ 195 } \ 196 } while (0) 197 198 #define PetscCeilIntMacro(x, y) ((((PetscInt)(x)) / ((PetscInt)(y))) + ((((PetscInt)(x)) % ((PetscInt)(y))) ? 1 : 0)) 199 200 PETSC_INTERN PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat, PetscInt, const PetscInt[]); 201 PETSC_INTERN PetscErrorCode MatMult_SeqSELL(Mat, Vec, Vec); 202 PETSC_INTERN PetscErrorCode MatMultAdd_SeqSELL(Mat, Vec, Vec, Vec); 203 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqSELL(Mat, Vec, Vec); 204 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat, Vec, Vec, Vec); 205 PETSC_INTERN PetscErrorCode MatMissingDiagonal_SeqSELL(Mat, PetscBool *, PetscInt *); 206 PETSC_INTERN PetscErrorCode MatMarkDiagonal_SeqSELL(Mat); 207 PETSC_INTERN PetscErrorCode MatZeroEntries_SeqSELL(Mat); 208 PETSC_INTERN PetscErrorCode MatDestroy_SeqSELL(Mat); 209 PETSC_INTERN PetscErrorCode MatSetOption_SeqSELL(Mat, MatOption, PetscBool); 210 PETSC_INTERN PetscErrorCode MatGetDiagonal_SeqSELL(Mat, Vec v); 211 PETSC_INTERN PetscErrorCode MatGetValues_SeqSELL(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], PetscScalar[]); 212 PETSC_INTERN PetscErrorCode MatView_SeqSELL(Mat, PetscViewer); 213 PETSC_INTERN PetscErrorCode MatAssemblyEnd_SeqSELL(Mat, MatAssemblyType); 214 PETSC_INTERN PetscErrorCode MatGetInfo_SeqSELL(Mat, MatInfoType, MatInfo *); 215 PETSC_INTERN PetscErrorCode MatSetValues_SeqSELL(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 216 PETSC_INTERN PetscErrorCode MatCopy_SeqSELL(Mat, Mat, MatStructure); 217 PETSC_INTERN PetscErrorCode MatSetUp_SeqSELL(Mat); 218 PETSC_INTERN PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat, PetscScalar *[]); 219 PETSC_INTERN PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat, PetscScalar *[]); 220 PETSC_INTERN PetscErrorCode MatShift_SeqSELL(Mat, PetscScalar); 221 PETSC_INTERN PetscErrorCode MatSOR_SeqSELL(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec); 222 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat); 223 PETSC_INTERN PetscErrorCode MatDuplicate_SeqSELL(Mat, MatDuplicateOption, Mat *); 224 PETSC_INTERN PetscErrorCode MatEqual_SeqSELL(Mat, Mat, PetscBool *); 225 PETSC_INTERN PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat, MatType, MatReuse, Mat *); 226 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat, MatType, MatReuse, Mat *); 227 PETSC_INTERN PetscErrorCode MatFDColoringCreate_SeqSELL(Mat, ISColoring, MatFDColoring); 228 PETSC_INTERN PetscErrorCode MatFDColoringSetUp_SeqSELL(Mat, ISColoring, MatFDColoring); 229 PETSC_INTERN PetscErrorCode MatGetColumnIJ_SeqSELL_Color(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscInt *[], PetscBool *); 230 PETSC_INTERN PetscErrorCode MatRestoreColumnIJ_SeqSELL_Color(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscInt *[], PetscBool *); 231 PETSC_INTERN PetscErrorCode MatConjugate_SeqSELL(Mat A); 232 PETSC_INTERN PetscErrorCode MatScale_SeqSELL(Mat, PetscScalar); 233 PETSC_INTERN PetscErrorCode MatDiagonalScale_SeqSELL(Mat, Vec, Vec); 234