1 2 #ifndef __SELL_H 3 #define __SELL_H 4 5 #include <petsc/private/matimpl.h> 6 #include <petsc/private/hashmapi.h> 7 8 /* 9 For NVIDIA GPUs each slice should be padded to the boundary of 16 elements for best performance. 10 The optimal memory alignment in device memory is 128 bytes, 64 bytes, 32 bytes for double precision, single precision and half precision. 11 */ 12 #if defined(PETSC_HAVE_DEVICE) 13 #define DEVICE_MEM_ALIGN 16 14 #endif 15 16 /* 17 Struct header for SeqSELL matrix format 18 */ 19 #define SEQSELLHEADER(datatype) \ 20 PetscBool roworiented; /* if true, row-oriented input, default */ \ 21 PetscInt nonew; /* 1 don't add new nonzeros, -1 generate error on new */ \ 22 PetscInt nounused; /* -1 generate error on unused space */ \ 23 PetscBool singlemalloc; /* if true a, i, and j have been obtained with one big malloc */ \ 24 PetscInt maxallocmat; /* max allocated space for the matrix */ \ 25 PetscInt maxallocrow; /* max allocated space for each row */ \ 26 PetscInt nz; /* actual nonzeros */ \ 27 PetscInt rlenmax; /* max actual row length, rmax cannot exceed maxallocrow */ \ 28 PetscInt *rlen; /* actual length of each row (padding zeros excluded) */ \ 29 PetscBool free_rlen; /* free rlen array ? */ \ 30 PetscInt reallocs; /* number of mallocs done during MatSetValues() \ 31 as more values are set than were prealloced */ \ 32 PetscBool keepnonzeropattern; /* keeps matrix structure same in calls to MatZeroRows()*/ \ 33 PetscBool ignorezeroentries; \ 34 PetscBool free_colidx; /* free the column indices colidx when the matrix is destroyed */ \ 35 PetscBool free_val; /* free the numerical values when matrix is destroy */ \ 36 PetscInt *colidx; /* column index */ \ 37 PetscInt *diag; /* pointers to diagonal elements */ \ 38 PetscInt nonzerorowcnt; /* how many rows have nonzero entries */ \ 39 PetscBool free_diag; /* free diag ? */ \ 40 datatype *val; /* elements including nonzeros and padding zeros */ \ 41 PetscScalar *solve_work; /* work space used in MatSolve */ \ 42 IS row, col, icol; /* index sets, used for reorderings */ \ 43 PetscBool pivotinblocks; /* pivot inside factorization of each diagonal block */ \ 44 Mat parent; /* set if this matrix was formed with MatDuplicate(...,MAT_SHARE_NONZERO_PATTERN,....); 45 means that this shares some data structures with the parent including diag, ilen, imax, i, j */ \ 46 PetscInt *sliidx; /* slice index */ \ 47 PetscInt totalslices; /* total number of slices */ \ 48 PetscInt sliceheight; /* slice height */ \ 49 PetscReal fillratio; /* ratio of number of padded zeros over total number of elements */ \ 50 PetscReal avgslicewidth; /* average slice width */ \ 51 PetscInt maxslicewidth; /* maximum slice width */ \ 52 PetscInt *sliperm; /* slice permutation array, CUDA only */ \ 53 PetscInt totalblocks; /* total number of blocks, CUDA only */ \ 54 PetscInt *blockidx; /* block index, CUDA only */ \ 55 PetscInt *block_row_map; /* starting row of the current block, CUDA only */ \ 56 PetscInt *getrowcols; /* workarray for MatGetRow_SeqSELL */ \ 57 PetscScalar *getrowvals /* workarray for MatGetRow_SeqSELL */ 58 59 typedef struct { 60 SEQSELLHEADER(MatScalar); 61 MatScalar *saved_values; /* location for stashing nonzero values of matrix */ 62 PetscScalar *idiag, *mdiag, *ssor_work; /* inverse of diagonal entries, diagonal values and workspace for Eisenstat trick */ 63 PetscBool idiagvalid; /* current idiag[] and mdiag[] are valid */ 64 PetscScalar fshift, omega; /* last used omega and fshift */ 65 ISColoring coloring; /* set with MatADSetColoring() used by MatADSetValues() */ 66 } Mat_SeqSELL; 67 68 /* 69 Frees the arrays from the XSELLPACK matrix type 70 */ 71 static inline PetscErrorCode MatSeqXSELLFreeSELL(Mat AA, MatScalar **val, PetscInt **colidx) 72 { 73 Mat_SeqSELL *A = (Mat_SeqSELL *)AA->data; 74 if (A->singlemalloc) { 75 PetscCall(PetscFree2(*val, *colidx)); 76 } else { 77 if (A->free_val) PetscCall(PetscFree(*val)); 78 if (A->free_colidx) PetscCall(PetscFree(*colidx)); 79 } 80 return PETSC_SUCCESS; 81 } 82 83 #define MatSeqXSELLReallocateSELL(Amat, AM, BS2, WIDTH, SIDX, SH, SID, ROW, COL, COLIDX, VAL, CP, VP, NONEW, datatype, MUL) \ 84 if (WIDTH >= (SIDX[SID + 1] - SIDX[SID]) / SH) { \ 85 Mat_SeqSELL *Ain = (Mat_SeqSELL *)Amat->data; \ 86 /* there is no extra room in row, therefore enlarge 1 slice column */ \ 87 PetscInt new_size = Ain->maxallocmat + SH * MUL, *new_colidx; \ 88 datatype *new_val; \ 89 \ 90 PetscCheck(NONEW != -2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "New nonzero at (%" PetscInt_FMT ",%" PetscInt_FMT ") caused a malloc\nUse MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE) to turn off this check", ROW, COL); \ 91 /* malloc new storage space */ \ 92 PetscCall(PetscMalloc2(BS2 *new_size, &new_val, BS2 *new_size, &new_colidx)); \ 93 \ 94 /* copy over old data into new slots by two steps: one step for data before the current slice and the other for the rest */ \ 95 PetscCall(PetscArraycpy(new_val, VAL, SIDX[SID + 1])); \ 96 PetscCall(PetscArraycpy(new_colidx, COLIDX, SIDX[SID + 1])); \ 97 PetscCall(PetscArraycpy(new_val + SIDX[SID + 1] + SH * MUL, VAL + SIDX[SID + 1], SIDX[Ain->totalslices] - SIDX[SID + 1])); \ 98 PetscCall(PetscArraycpy(new_colidx + SIDX[SID + 1] + SH * MUL, COLIDX + SIDX[SID + 1], SIDX[Ain->totalslices] - SIDX[SID + 1])); \ 99 /* update slice_idx */ \ 100 for (ii = SID + 1; ii <= Ain->totalslices; ii++) { SIDX[ii] += SH * MUL; } \ 101 /* update pointers. Notice that they point to the FIRST postion of the row */ \ 102 CP = new_colidx + SIDX[SID] + (ROW % SH); \ 103 VP = new_val + SIDX[SID] + (ROW % SH); \ 104 /* free up old matrix storage */ \ 105 PetscCall(MatSeqXSELLFreeSELL(A, &Ain->val, &Ain->colidx)); \ 106 Ain->val = (MatScalar *)new_val; \ 107 Ain->colidx = new_colidx; \ 108 Ain->singlemalloc = PETSC_TRUE; \ 109 Ain->maxallocmat = new_size; \ 110 Ain->reallocs++; \ 111 if (WIDTH >= Ain->maxallocrow) Ain->maxallocrow += MUL; \ 112 if (WIDTH >= Ain->rlenmax) Ain->rlenmax++; \ 113 } 114 115 #define MatSetValue_SeqSELL_Private(A, row, col, value, addv, orow, ocol, cp, vp, lastcol, low, high) \ 116 { \ 117 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; \ 118 found = PETSC_FALSE; \ 119 if (col <= lastcol) low = 0; \ 120 else high = a->rlen[row]; \ 121 lastcol = col; \ 122 while (high - low > 5) { \ 123 t = (low + high) / 2; \ 124 if (*(cp + a->sliceheight * t) > col) high = t; \ 125 else low = t; \ 126 } \ 127 for (_i = low; _i < high; _i++) { \ 128 if (*(cp + a->sliceheight * _i) > col) break; \ 129 if (*(cp + a->sliceheight * _i) == col) { \ 130 if (addv == ADD_VALUES) *(vp + a->sliceheight * _i) += value; \ 131 else *(vp + a->sliceheight * _i) = value; \ 132 found = PETSC_TRUE; \ 133 break; \ 134 } \ 135 } \ 136 if (!found) { \ 137 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 138 if (a->nonew != 1 && !(value == 0.0 && a->ignorezeroentries) && a->rlen[row] >= (a->sliidx[row / a->sliceheight + 1] - a->sliidx[row / a->sliceheight]) / a->sliceheight) { \ 139 /* there is no extra room in row, therefore enlarge 1 slice column */ \ 140 if (a->maxallocmat < a->sliidx[a->totalslices] + a->sliceheight) { \ 141 /* allocates a larger array for the XSELL matrix types; only extend the current slice by one more column. */ \ 142 PetscInt new_size = a->maxallocmat + a->sliceheight, *new_colidx; \ 143 MatScalar *new_val; \ 144 PetscCheck(a->nonew != -2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "New nonzero at (%" PetscInt_FMT ",%" PetscInt_FMT ") caused a malloc\nUse MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE) to turn off this check", orow, ocol); \ 145 /* malloc new storage space */ \ 146 PetscCall(PetscMalloc2(new_size, &new_val, new_size, &new_colidx)); \ 147 /* copy over old data into new slots by two steps: one step for data before the current slice and the other for the rest */ \ 148 PetscCall(PetscArraycpy(new_val, a->val, a->sliidx[row / a->sliceheight + 1])); \ 149 PetscCall(PetscArraycpy(new_colidx, a->colidx, a->sliidx[row / a->sliceheight + 1])); \ 150 PetscCall(PetscArraycpy(new_val + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->val + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \ 151 PetscCall(PetscArraycpy(new_colidx + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->colidx + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \ 152 /* update pointers. Notice that they point to the FIRST postion of the row */ \ 153 cp = new_colidx + a->sliidx[row / a->sliceheight] + (row % a->sliceheight); \ 154 vp = new_val + a->sliidx[row / a->sliceheight] + (row % a->sliceheight); \ 155 /* free up old matrix storage */ \ 156 PetscCall(MatSeqXSELLFreeSELL(A, &a->val, &a->colidx)); \ 157 a->val = (MatScalar *)new_val; \ 158 a->colidx = new_colidx; \ 159 a->singlemalloc = PETSC_TRUE; \ 160 a->maxallocmat = new_size; \ 161 a->reallocs++; \ 162 } else { \ 163 /* no need to reallocate, just shift the following slices to create space for the added slice column */ \ 164 PetscCall(PetscArraymove(a->val + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->val + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \ 165 PetscCall(PetscArraymove(a->colidx + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->colidx + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \ 166 } \ 167 /* update slice_idx */ \ 168 for (ii = row / a->sliceheight + 1; ii <= a->totalslices; ii++) a->sliidx[ii] += a->sliceheight; \ 169 if (a->rlen[row] >= a->maxallocrow) a->maxallocrow++; \ 170 if (a->rlen[row] >= a->rlenmax) a->rlenmax++; \ 171 } \ 172 /* shift up all the later entries in this row */ \ 173 for (ii = a->rlen[row] - 1; ii >= _i; ii--) { \ 174 *(cp + a->sliceheight * (ii + 1)) = *(cp + a->sliceheight * ii); \ 175 *(vp + a->sliceheight * (ii + 1)) = *(vp + a->sliceheight * ii); \ 176 } \ 177 *(cp + a->sliceheight * _i) = col; \ 178 *(vp + a->sliceheight * _i) = value; \ 179 a->nz++; \ 180 a->rlen[row]++; \ 181 A->nonzerostate++; \ 182 low = _i + 1; \ 183 high++; \ 184 } \ 185 } 186 187 PETSC_INTERN PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat, PetscInt, const PetscInt[]); 188 PETSC_INTERN PetscErrorCode MatMult_SeqSELL(Mat, Vec, Vec); 189 PETSC_INTERN PetscErrorCode MatMultAdd_SeqSELL(Mat, Vec, Vec, Vec); 190 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqSELL(Mat, Vec, Vec); 191 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat, Vec, Vec, Vec); 192 PETSC_INTERN PetscErrorCode MatMissingDiagonal_SeqSELL(Mat, PetscBool *, PetscInt *); 193 PETSC_INTERN PetscErrorCode MatMarkDiagonal_SeqSELL(Mat); 194 PETSC_INTERN PetscErrorCode MatInvertDiagonal_SeqSELL(Mat, PetscScalar, PetscScalar); 195 PETSC_INTERN PetscErrorCode MatZeroEntries_SeqSELL(Mat); 196 PETSC_INTERN PetscErrorCode MatDestroy_SeqSELL(Mat); 197 PETSC_INTERN PetscErrorCode MatSetOption_SeqSELL(Mat, MatOption, PetscBool); 198 PETSC_INTERN PetscErrorCode MatGetDiagonal_SeqSELL(Mat, Vec v); 199 PETSC_INTERN PetscErrorCode MatGetValues_SeqSELL(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], PetscScalar[]); 200 PETSC_INTERN PetscErrorCode MatView_SeqSELL(Mat, PetscViewer); 201 PETSC_INTERN PetscErrorCode MatAssemblyEnd_SeqSELL(Mat, MatAssemblyType); 202 PETSC_INTERN PetscErrorCode MatGetInfo_SeqSELL(Mat, MatInfoType, MatInfo *); 203 PETSC_INTERN PetscErrorCode MatSetValues_SeqSELL(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 204 PETSC_INTERN PetscErrorCode MatCopy_SeqSELL(Mat, Mat, MatStructure); 205 PETSC_INTERN PetscErrorCode MatSetUp_SeqSELL(Mat); 206 PETSC_INTERN PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat, PetscScalar *[]); 207 PETSC_INTERN PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat, PetscScalar *[]); 208 PETSC_INTERN PetscErrorCode MatShift_SeqSELL(Mat, PetscScalar); 209 PETSC_INTERN PetscErrorCode MatSOR_SeqSELL(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec); 210 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat); 211 PETSC_INTERN PetscErrorCode MatDuplicate_SeqSELL(Mat, MatDuplicateOption, Mat *); 212 PETSC_INTERN PetscErrorCode MatEqual_SeqSELL(Mat, Mat, PetscBool *); 213 PETSC_INTERN PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat); 214 PETSC_INTERN PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat, MatType, MatReuse, Mat *); 215 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat, MatType, MatReuse, Mat *); 216 PETSC_INTERN PetscErrorCode MatFDColoringCreate_SeqSELL(Mat, ISColoring, MatFDColoring); 217 PETSC_INTERN PetscErrorCode MatFDColoringSetUp_SeqSELL(Mat, ISColoring, MatFDColoring); 218 PETSC_INTERN PetscErrorCode MatGetColumnIJ_SeqSELL_Color(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscInt *[], PetscBool *); 219 PETSC_INTERN PetscErrorCode MatRestoreColumnIJ_SeqSELL_Color(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscInt *[], PetscBool *); 220 PETSC_INTERN PetscErrorCode MatConjugate_SeqSELL(Mat A); 221 PETSC_INTERN PetscErrorCode MatScale_SeqSELL(Mat, PetscScalar); 222 PETSC_INTERN PetscErrorCode MatDiagonalScale_SeqSELL(Mat, Vec, Vec); 223 #endif 224