1d4002b98SHong Zhang 26524c165SJacob Faibussowitsch #ifndef __SELL_H 3d4002b98SHong Zhang #define __SELL_H 4d4002b98SHong Zhang 5d4002b98SHong Zhang #include <petsc/private/matimpl.h> 6eec179cfSJacob Faibussowitsch #include <petsc/private/hashmapi.h> 7d4002b98SHong Zhang 8d4002b98SHong Zhang /* 9*4e58db63SHong Zhang For NVIDIA GPUs each slice should be padded to the boundary of 16 elements for best performance. 10*4e58db63SHong Zhang The optimal memory alignment in device memory is 128 bytes, 64 bytes, 32 bytes for double precision, single precision and half precision. 11*4e58db63SHong Zhang */ 12*4e58db63SHong Zhang #if defined(PETSC_HAVE_DEVICE) 13*4e58db63SHong Zhang #define DEVICE_MEM_ALIGN 16 14*4e58db63SHong Zhang #endif 15*4e58db63SHong Zhang 16*4e58db63SHong Zhang /* 17d4002b98SHong Zhang Struct header for SeqSELL matrix format 18d4002b98SHong Zhang */ 19d4002b98SHong Zhang #define SEQSELLHEADER(datatype) \ 20d4002b98SHong Zhang PetscBool roworiented; /* if true, row-oriented input, default */ \ 21d4002b98SHong Zhang PetscInt nonew; /* 1 don't add new nonzeros, -1 generate error on new */ \ 22d4002b98SHong Zhang PetscInt nounused; /* -1 generate error on unused space */ \ 23d4002b98SHong Zhang PetscBool singlemalloc; /* if true a, i, and j have been obtained with one big malloc */ \ 24d4002b98SHong Zhang PetscInt maxallocmat; /* max allocated space for the matrix */ \ 25d4002b98SHong Zhang PetscInt maxallocrow; /* max allocated space for each row */ \ 26d4002b98SHong Zhang PetscInt nz; /* actual nonzeros */ \ 27d4002b98SHong Zhang PetscInt rlenmax; /* max actual row length, rmax cannot exceed maxallocrow */ \ 28d4002b98SHong Zhang PetscInt *rlen; /* actual length of each row (padding zeros excluded) */ \ 29d4002b98SHong Zhang PetscBool free_rlen; /* free rlen array ? */ \ 30d4002b98SHong Zhang PetscInt reallocs; /* number of mallocs done during MatSetValues() \ 31d4002b98SHong Zhang as more values are set than were prealloced */ \ 32d4002b98SHong Zhang PetscBool keepnonzeropattern; /* keeps matrix structure same in calls to MatZeroRows()*/ \ 33d4002b98SHong Zhang PetscBool ignorezeroentries; \ 34d4002b98SHong Zhang PetscBool free_colidx; /* free the column indices colidx when the matrix is destroyed */ \ 35d4002b98SHong Zhang PetscBool free_val; /* free the numerical values when matrix is destroy */ \ 36d4002b98SHong Zhang PetscInt *colidx; /* column index */ \ 37d4002b98SHong Zhang PetscInt *diag; /* pointers to diagonal elements */ \ 38d4002b98SHong Zhang PetscInt nonzerorowcnt; /* how many rows have nonzero entries */ \ 39d4002b98SHong Zhang PetscBool free_diag; /* free diag ? */ \ 40d4002b98SHong Zhang datatype *val; /* elements including nonzeros and padding zeros */ \ 41d4002b98SHong Zhang PetscScalar *solve_work; /* work space used in MatSolve */ \ 42d4002b98SHong Zhang IS row, col, icol; /* index sets, used for reorderings */ \ 43d4002b98SHong Zhang PetscBool pivotinblocks; /* pivot inside factorization of each diagonal block */ \ 44d4002b98SHong Zhang Mat parent; /* set if this matrix was formed with MatDuplicate(...,MAT_SHARE_NONZERO_PATTERN,....); 45d4002b98SHong Zhang means that this shares some data structures with the parent including diag, ilen, imax, i, j */ \ 46d4002b98SHong Zhang PetscInt *sliidx; /* slice index */ \ 476108893eSStefano Zampini PetscInt totalslices; /* total number of slices */ \ 4807e43b41SHong Zhang PetscInt sliceheight; /* slice height */ \ 4907e43b41SHong Zhang PetscReal fillratio; /* ratio of number of padded zeros over total number of elements */ \ 5007e43b41SHong Zhang PetscReal avgslicewidth; /* average slice width */ \ 5107e43b41SHong Zhang PetscInt maxslicewidth; /* maximum slice width */ \ 5207e43b41SHong Zhang PetscInt *sliperm; /* slice permutation array, CUDA only */ \ 5307e43b41SHong Zhang PetscInt totalblocks; /* total number of blocks, CUDA only */ \ 5407e43b41SHong Zhang PetscInt *blockidx; /* block index, CUDA only */ \ 5507e43b41SHong Zhang PetscInt *block_row_map; /* starting row of the current block, CUDA only */ \ 566108893eSStefano Zampini PetscInt *getrowcols; /* workarray for MatGetRow_SeqSELL */ \ 579371c9d4SSatish Balay PetscScalar *getrowvals /* workarray for MatGetRow_SeqSELL */ 58d4002b98SHong Zhang 59d4002b98SHong Zhang typedef struct { 60d4002b98SHong Zhang SEQSELLHEADER(MatScalar); 61d4002b98SHong Zhang MatScalar *saved_values; /* location for stashing nonzero values of matrix */ 62d4002b98SHong Zhang PetscScalar *idiag, *mdiag, *ssor_work; /* inverse of diagonal entries, diagonal values and workspace for Eisenstat trick */ 63d4002b98SHong Zhang PetscBool idiagvalid; /* current idiag[] and mdiag[] are valid */ 64d4002b98SHong Zhang PetscScalar fshift, omega; /* last used omega and fshift */ 65d4002b98SHong Zhang ISColoring coloring; /* set with MatADSetColoring() used by MatADSetValues() */ 66d4002b98SHong Zhang } Mat_SeqSELL; 67d4002b98SHong Zhang 68d4002b98SHong Zhang /* 69d4002b98SHong Zhang Frees the arrays from the XSELLPACK matrix type 70d4002b98SHong Zhang */ 71d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatSeqXSELLFreeSELL(Mat AA, MatScalar **val, PetscInt **colidx) 72d71ae5a4SJacob Faibussowitsch { 73d4002b98SHong Zhang Mat_SeqSELL *A = (Mat_SeqSELL *)AA->data; 74d4002b98SHong Zhang if (A->singlemalloc) { 759566063dSJacob Faibussowitsch PetscCall(PetscFree2(*val, *colidx)); 76d4002b98SHong Zhang } else { 779566063dSJacob Faibussowitsch if (A->free_val) PetscCall(PetscFree(*val)); 789566063dSJacob Faibussowitsch if (A->free_colidx) PetscCall(PetscFree(*colidx)); 79d4002b98SHong Zhang } 803ba16761SJacob Faibussowitsch return PETSC_SUCCESS; 81d4002b98SHong Zhang } 82d4002b98SHong Zhang 83*4e58db63SHong Zhang #define MatSeqXSELLReallocateSELL(Amat, AM, BS2, WIDTH, SIDX, SH, SID, ROW, COL, COLIDX, VAL, CP, VP, NONEW, datatype, MUL) \ 8407e43b41SHong Zhang if (WIDTH >= (SIDX[SID + 1] - SIDX[SID]) / SH) { \ 85d4002b98SHong Zhang Mat_SeqSELL *Ain = (Mat_SeqSELL *)Amat->data; \ 862d1451d4SHong Zhang /* there is no extra room in row, therefore enlarge 1 slice column */ \ 87*4e58db63SHong Zhang PetscInt new_size = Ain->maxallocmat + SH * MUL, *new_colidx; \ 88d4002b98SHong Zhang datatype *new_val; \ 89d4002b98SHong Zhang \ 9008401ef6SPierre Jolivet PetscCheck(NONEW != -2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "New nonzero at (%" PetscInt_FMT ",%" PetscInt_FMT ") caused a malloc\nUse MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE) to turn off this check", ROW, COL); \ 91d4002b98SHong Zhang /* malloc new storage space */ \ 929566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(BS2 *new_size, &new_val, BS2 *new_size, &new_colidx)); \ 93d4002b98SHong Zhang \ 94d4002b98SHong Zhang /* copy over old data into new slots by two steps: one step for data before the current slice and the other for the rest */ \ 959566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(new_val, VAL, SIDX[SID + 1])); \ 969566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(new_colidx, COLIDX, SIDX[SID + 1])); \ 97*4e58db63SHong Zhang PetscCall(PetscArraycpy(new_val + SIDX[SID + 1] + SH * MUL, VAL + SIDX[SID + 1], SIDX[Ain->totalslices] - SIDX[SID + 1])); \ 98*4e58db63SHong Zhang PetscCall(PetscArraycpy(new_colidx + SIDX[SID + 1] + SH * MUL, COLIDX + SIDX[SID + 1], SIDX[Ain->totalslices] - SIDX[SID + 1])); \ 99d4002b98SHong Zhang /* update slice_idx */ \ 100*4e58db63SHong Zhang for (ii = SID + 1; ii <= Ain->totalslices; ii++) { SIDX[ii] += SH * MUL; } \ 1012d1451d4SHong Zhang /* update pointers. Notice that they point to the FIRST postion of the row */ \ 10207e43b41SHong Zhang CP = new_colidx + SIDX[SID] + (ROW % SH); \ 10307e43b41SHong Zhang VP = new_val + SIDX[SID] + (ROW % SH); \ 104d4002b98SHong Zhang /* free up old matrix storage */ \ 1059566063dSJacob Faibussowitsch PetscCall(MatSeqXSELLFreeSELL(A, &Ain->val, &Ain->colidx)); \ 106d4002b98SHong Zhang Ain->val = (MatScalar *)new_val; \ 107d4002b98SHong Zhang Ain->colidx = new_colidx; \ 108d4002b98SHong Zhang Ain->singlemalloc = PETSC_TRUE; \ 109d4002b98SHong Zhang Ain->maxallocmat = new_size; \ 110d4002b98SHong Zhang Ain->reallocs++; \ 111*4e58db63SHong Zhang if (WIDTH >= Ain->maxallocrow) Ain->maxallocrow += MUL; \ 112d4002b98SHong Zhang if (WIDTH >= Ain->rlenmax) Ain->rlenmax++; \ 1139371c9d4SSatish Balay } 114d4002b98SHong Zhang 115d4002b98SHong Zhang #define MatSetValue_SeqSELL_Private(A, row, col, value, addv, orow, ocol, cp, vp, lastcol, low, high) \ 116d4002b98SHong Zhang { \ 117d4002b98SHong Zhang Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; \ 118d4002b98SHong Zhang found = PETSC_FALSE; \ 119d4002b98SHong Zhang if (col <= lastcol) low = 0; \ 120d4002b98SHong Zhang else high = a->rlen[row]; \ 121d4002b98SHong Zhang lastcol = col; \ 122d4002b98SHong Zhang while (high - low > 5) { \ 123d4002b98SHong Zhang t = (low + high) / 2; \ 12407e43b41SHong Zhang if (*(cp + a->sliceheight * t) > col) high = t; \ 125d4002b98SHong Zhang else low = t; \ 126d4002b98SHong Zhang } \ 127d4002b98SHong Zhang for (_i = low; _i < high; _i++) { \ 12807e43b41SHong Zhang if (*(cp + a->sliceheight * _i) > col) break; \ 12907e43b41SHong Zhang if (*(cp + a->sliceheight * _i) == col) { \ 13007e43b41SHong Zhang if (addv == ADD_VALUES) *(vp + a->sliceheight * _i) += value; \ 13107e43b41SHong Zhang else *(vp + a->sliceheight * _i) = value; \ 132d4002b98SHong Zhang found = PETSC_TRUE; \ 133d4002b98SHong Zhang break; \ 134d4002b98SHong Zhang } \ 135d4002b98SHong Zhang } \ 136d4002b98SHong Zhang if (!found) { \ 13708401ef6SPierre Jolivet PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 13807e43b41SHong Zhang if (a->nonew != 1 && !(value == 0.0 && a->ignorezeroentries) && a->rlen[row] >= (a->sliidx[row / a->sliceheight + 1] - a->sliidx[row / a->sliceheight]) / a->sliceheight) { \ 1392d1451d4SHong Zhang /* there is no extra room in row, therefore enlarge 1 slice column */ \ 14007e43b41SHong Zhang if (a->maxallocmat < a->sliidx[a->totalslices] + a->sliceheight) { \ 141d4002b98SHong Zhang /* allocates a larger array for the XSELL matrix types; only extend the current slice by one more column. */ \ 14207e43b41SHong Zhang PetscInt new_size = a->maxallocmat + a->sliceheight, *new_colidx; \ 143d4002b98SHong Zhang MatScalar *new_val; \ 14408401ef6SPierre Jolivet PetscCheck(a->nonew != -2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "New nonzero at (%" PetscInt_FMT ",%" PetscInt_FMT ") caused a malloc\nUse MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE) to turn off this check", orow, ocol); \ 145d4002b98SHong Zhang /* malloc new storage space */ \ 1469566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(new_size, &new_val, new_size, &new_colidx)); \ 147d4002b98SHong Zhang /* copy over old data into new slots by two steps: one step for data before the current slice and the other for the rest */ \ 14807e43b41SHong Zhang PetscCall(PetscArraycpy(new_val, a->val, a->sliidx[row / a->sliceheight + 1])); \ 14907e43b41SHong Zhang PetscCall(PetscArraycpy(new_colidx, a->colidx, a->sliidx[row / a->sliceheight + 1])); \ 15007e43b41SHong Zhang PetscCall(PetscArraycpy(new_val + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->val + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \ 15107e43b41SHong Zhang PetscCall(PetscArraycpy(new_colidx + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->colidx + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \ 1522d1451d4SHong Zhang /* update pointers. Notice that they point to the FIRST postion of the row */ \ 15307e43b41SHong Zhang cp = new_colidx + a->sliidx[row / a->sliceheight] + (row % a->sliceheight); \ 15407e43b41SHong Zhang vp = new_val + a->sliidx[row / a->sliceheight] + (row % a->sliceheight); \ 155d4002b98SHong Zhang /* free up old matrix storage */ \ 1569566063dSJacob Faibussowitsch PetscCall(MatSeqXSELLFreeSELL(A, &a->val, &a->colidx)); \ 157d4002b98SHong Zhang a->val = (MatScalar *)new_val; \ 158d4002b98SHong Zhang a->colidx = new_colidx; \ 159d4002b98SHong Zhang a->singlemalloc = PETSC_TRUE; \ 160d4002b98SHong Zhang a->maxallocmat = new_size; \ 161d4002b98SHong Zhang a->reallocs++; \ 162d4002b98SHong Zhang } else { \ 163d4002b98SHong Zhang /* no need to reallocate, just shift the following slices to create space for the added slice column */ \ 16407e43b41SHong Zhang PetscCall(PetscArraymove(a->val + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->val + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \ 16507e43b41SHong Zhang PetscCall(PetscArraymove(a->colidx + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->colidx + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \ 166d4002b98SHong Zhang } \ 167d4002b98SHong Zhang /* update slice_idx */ \ 16807e43b41SHong Zhang for (ii = row / a->sliceheight + 1; ii <= a->totalslices; ii++) a->sliidx[ii] += a->sliceheight; \ 169d4002b98SHong Zhang if (a->rlen[row] >= a->maxallocrow) a->maxallocrow++; \ 170d4002b98SHong Zhang if (a->rlen[row] >= a->rlenmax) a->rlenmax++; \ 171d4002b98SHong Zhang } \ 172d4002b98SHong Zhang /* shift up all the later entries in this row */ \ 173d4002b98SHong Zhang for (ii = a->rlen[row] - 1; ii >= _i; ii--) { \ 17407e43b41SHong Zhang *(cp + a->sliceheight * (ii + 1)) = *(cp + a->sliceheight * ii); \ 17507e43b41SHong Zhang *(vp + a->sliceheight * (ii + 1)) = *(vp + a->sliceheight * ii); \ 176d4002b98SHong Zhang } \ 17707e43b41SHong Zhang *(cp + a->sliceheight * _i) = col; \ 17807e43b41SHong Zhang *(vp + a->sliceheight * _i) = value; \ 1799371c9d4SSatish Balay a->nz++; \ 1809371c9d4SSatish Balay a->rlen[row]++; \ 1819371c9d4SSatish Balay A->nonzerostate++; \ 1829371c9d4SSatish Balay low = _i + 1; \ 1839371c9d4SSatish Balay high++; \ 184d4002b98SHong Zhang } \ 1859371c9d4SSatish Balay } 186d4002b98SHong Zhang 187d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat, PetscInt, const PetscInt[]); 188d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatMult_SeqSELL(Mat, Vec, Vec); 189d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatMultAdd_SeqSELL(Mat, Vec, Vec, Vec); 190d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatMultTranspose_SeqSELL(Mat, Vec, Vec); 191d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat, Vec, Vec, Vec); 192d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatMissingDiagonal_SeqSELL(Mat, PetscBool *, PetscInt *); 193d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatMarkDiagonal_SeqSELL(Mat); 194d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatInvertDiagonal_SeqSELL(Mat, PetscScalar, PetscScalar); 195d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatZeroEntries_SeqSELL(Mat); 196d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatDestroy_SeqSELL(Mat); 197d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatSetOption_SeqSELL(Mat, MatOption, PetscBool); 198d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatGetDiagonal_SeqSELL(Mat, Vec v); 199d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatGetValues_SeqSELL(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], PetscScalar[]); 200d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatView_SeqSELL(Mat, PetscViewer); 201d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatAssemblyEnd_SeqSELL(Mat, MatAssemblyType); 202d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatGetInfo_SeqSELL(Mat, MatInfoType, MatInfo *); 203d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatSetValues_SeqSELL(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 204d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatCopy_SeqSELL(Mat, Mat, MatStructure); 205d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatSetUp_SeqSELL(Mat); 206d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat, PetscScalar *[]); 207d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat, PetscScalar *[]); 208d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatShift_SeqSELL(Mat, PetscScalar); 209d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatSOR_SeqSELL(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec); 210d4002b98SHong Zhang PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat); 211d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatDuplicate_SeqSELL(Mat, MatDuplicateOption, Mat *); 212d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatEqual_SeqSELL(Mat, Mat, PetscBool *); 213d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat); 214d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat, MatType, MatReuse, Mat *); 215d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat, MatType, MatReuse, Mat *); 216d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatFDColoringCreate_SeqSELL(Mat, ISColoring, MatFDColoring); 217d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatFDColoringSetUp_SeqSELL(Mat, ISColoring, MatFDColoring); 218d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatGetColumnIJ_SeqSELL_Color(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscInt *[], PetscBool *); 219d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatRestoreColumnIJ_SeqSELL_Color(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscInt *[], PetscBool *); 220d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatConjugate_SeqSELL(Mat A); 221d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatScale_SeqSELL(Mat, PetscScalar); 222d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatDiagonalScale_SeqSELL(Mat, Vec, Vec); 223d4002b98SHong Zhang #endif 224