1d4002b98SHong Zhang 26524c165SJacob Faibussowitsch #ifndef __SELL_H 3d4002b98SHong Zhang #define __SELL_H 4d4002b98SHong Zhang 5d4002b98SHong Zhang #include <petsc/private/matimpl.h> 6eec179cfSJacob Faibussowitsch #include <petsc/private/hashmapi.h> 7d4002b98SHong Zhang 8d4002b98SHong Zhang /* 94e58db63SHong Zhang For NVIDIA GPUs each slice should be padded to the boundary of 16 elements for best performance. 104e58db63SHong Zhang The optimal memory alignment in device memory is 128 bytes, 64 bytes, 32 bytes for double precision, single precision and half precision. 114e58db63SHong Zhang */ 124e58db63SHong Zhang #if defined(PETSC_HAVE_DEVICE) 134e58db63SHong Zhang #define DEVICE_MEM_ALIGN 16 144e58db63SHong Zhang #endif 154e58db63SHong Zhang 164e58db63SHong Zhang /* 17d4002b98SHong Zhang Struct header for SeqSELL matrix format 18d4002b98SHong Zhang */ 19d4002b98SHong Zhang #define SEQSELLHEADER(datatype) \ 20d4002b98SHong Zhang PetscBool roworiented; /* if true, row-oriented input, default */ \ 21d4002b98SHong Zhang PetscInt nonew; /* 1 don't add new nonzeros, -1 generate error on new */ \ 22d4002b98SHong Zhang PetscInt nounused; /* -1 generate error on unused space */ \ 23d4002b98SHong Zhang PetscBool singlemalloc; /* if true a, i, and j have been obtained with one big malloc */ \ 24d4002b98SHong Zhang PetscInt maxallocmat; /* max allocated space for the matrix */ \ 25d4002b98SHong Zhang PetscInt maxallocrow; /* max allocated space for each row */ \ 26d4002b98SHong Zhang PetscInt nz; /* actual nonzeros */ \ 27d4002b98SHong Zhang PetscInt rlenmax; /* max actual row length, rmax cannot exceed maxallocrow */ \ 28d4002b98SHong Zhang PetscInt *rlen; /* actual length of each row (padding zeros excluded) */ \ 29d4002b98SHong Zhang PetscBool free_rlen; /* free rlen array ? */ \ 30d4002b98SHong Zhang PetscInt reallocs; /* number of mallocs done during MatSetValues() \ 31d4002b98SHong Zhang as more values are set than were prealloced */ \ 32d4002b98SHong Zhang PetscBool keepnonzeropattern; /* keeps matrix structure same in calls to MatZeroRows()*/ \ 33d4002b98SHong Zhang PetscBool ignorezeroentries; \ 34d4002b98SHong Zhang PetscBool free_colidx; /* free the column indices colidx when the matrix is destroyed */ \ 35d4002b98SHong Zhang PetscBool free_val; /* free the numerical values when matrix is destroy */ \ 36d4002b98SHong Zhang PetscInt *colidx; /* column index */ \ 37d4002b98SHong Zhang PetscInt *diag; /* pointers to diagonal elements */ \ 38d4002b98SHong Zhang PetscInt nonzerorowcnt; /* how many rows have nonzero entries */ \ 39d4002b98SHong Zhang PetscBool free_diag; /* free diag ? */ \ 40d4002b98SHong Zhang datatype *val; /* elements including nonzeros and padding zeros */ \ 41d4002b98SHong Zhang PetscScalar *solve_work; /* work space used in MatSolve */ \ 42d4002b98SHong Zhang IS row, col, icol; /* index sets, used for reorderings */ \ 43d4002b98SHong Zhang PetscBool pivotinblocks; /* pivot inside factorization of each diagonal block */ \ 44d4002b98SHong Zhang Mat parent; /* set if this matrix was formed with MatDuplicate(...,MAT_SHARE_NONZERO_PATTERN,....); 45d4002b98SHong Zhang means that this shares some data structures with the parent including diag, ilen, imax, i, j */ \ 46d4002b98SHong Zhang PetscInt *sliidx; /* slice index */ \ 476108893eSStefano Zampini PetscInt totalslices; /* total number of slices */ \ 4807e43b41SHong Zhang PetscInt sliceheight; /* slice height */ \ 4907e43b41SHong Zhang PetscReal fillratio; /* ratio of number of padded zeros over total number of elements */ \ 5007e43b41SHong Zhang PetscReal avgslicewidth; /* average slice width */ \ 5107e43b41SHong Zhang PetscInt maxslicewidth; /* maximum slice width */ \ 5207e43b41SHong Zhang PetscInt *sliperm; /* slice permutation array, CUDA only */ \ 5307e43b41SHong Zhang PetscInt totalblocks; /* total number of blocks, CUDA only */ \ 5407e43b41SHong Zhang PetscInt *blockidx; /* block index, CUDA only */ \ 5507e43b41SHong Zhang PetscInt *block_row_map; /* starting row of the current block, CUDA only */ \ 56*90d2215bSHong Zhang PetscInt chunksize; /* chunk size, CUDA only */ \ 57*90d2215bSHong Zhang PetscInt totalchunks; /* total number of chunks, CUDA only */ \ 58*90d2215bSHong Zhang PetscInt *chunk_slice_map; /* starting slice of the currect chunk, CUDA only */ \ 596108893eSStefano Zampini PetscInt *getrowcols; /* workarray for MatGetRow_SeqSELL */ \ 609371c9d4SSatish Balay PetscScalar *getrowvals /* workarray for MatGetRow_SeqSELL */ 61d4002b98SHong Zhang 62d4002b98SHong Zhang typedef struct { 63d4002b98SHong Zhang SEQSELLHEADER(MatScalar); 64d4002b98SHong Zhang MatScalar *saved_values; /* location for stashing nonzero values of matrix */ 65d4002b98SHong Zhang PetscScalar *idiag, *mdiag, *ssor_work; /* inverse of diagonal entries, diagonal values and workspace for Eisenstat trick */ 66d4002b98SHong Zhang PetscBool idiagvalid; /* current idiag[] and mdiag[] are valid */ 67d4002b98SHong Zhang PetscScalar fshift, omega; /* last used omega and fshift */ 68d4002b98SHong Zhang ISColoring coloring; /* set with MatADSetColoring() used by MatADSetValues() */ 69d4002b98SHong Zhang } Mat_SeqSELL; 70d4002b98SHong Zhang 71d4002b98SHong Zhang /* 72d4002b98SHong Zhang Frees the arrays from the XSELLPACK matrix type 73d4002b98SHong Zhang */ 74d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatSeqXSELLFreeSELL(Mat AA, MatScalar **val, PetscInt **colidx) 75d71ae5a4SJacob Faibussowitsch { 76d4002b98SHong Zhang Mat_SeqSELL *A = (Mat_SeqSELL *)AA->data; 77d4002b98SHong Zhang if (A->singlemalloc) { 789566063dSJacob Faibussowitsch PetscCall(PetscFree2(*val, *colidx)); 79d4002b98SHong Zhang } else { 809566063dSJacob Faibussowitsch if (A->free_val) PetscCall(PetscFree(*val)); 819566063dSJacob Faibussowitsch if (A->free_colidx) PetscCall(PetscFree(*colidx)); 82d4002b98SHong Zhang } 833ba16761SJacob Faibussowitsch return PETSC_SUCCESS; 84d4002b98SHong Zhang } 85d4002b98SHong Zhang 864e58db63SHong Zhang #define MatSeqXSELLReallocateSELL(Amat, AM, BS2, WIDTH, SIDX, SH, SID, ROW, COL, COLIDX, VAL, CP, VP, NONEW, datatype, MUL) \ 8707e43b41SHong Zhang if (WIDTH >= (SIDX[SID + 1] - SIDX[SID]) / SH) { \ 88d4002b98SHong Zhang Mat_SeqSELL *Ain = (Mat_SeqSELL *)Amat->data; \ 892d1451d4SHong Zhang /* there is no extra room in row, therefore enlarge 1 slice column */ \ 904e58db63SHong Zhang PetscInt new_size = Ain->maxallocmat + SH * MUL, *new_colidx; \ 91d4002b98SHong Zhang datatype *new_val; \ 92d4002b98SHong Zhang \ 9308401ef6SPierre Jolivet PetscCheck(NONEW != -2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "New nonzero at (%" PetscInt_FMT ",%" PetscInt_FMT ") caused a malloc\nUse MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE) to turn off this check", ROW, COL); \ 94d4002b98SHong Zhang /* malloc new storage space */ \ 959566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(BS2 *new_size, &new_val, BS2 *new_size, &new_colidx)); \ 96d4002b98SHong Zhang \ 97d4002b98SHong Zhang /* copy over old data into new slots by two steps: one step for data before the current slice and the other for the rest */ \ 989566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(new_val, VAL, SIDX[SID + 1])); \ 999566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(new_colidx, COLIDX, SIDX[SID + 1])); \ 1004e58db63SHong Zhang PetscCall(PetscArraycpy(new_val + SIDX[SID + 1] + SH * MUL, VAL + SIDX[SID + 1], SIDX[Ain->totalslices] - SIDX[SID + 1])); \ 1014e58db63SHong Zhang PetscCall(PetscArraycpy(new_colidx + SIDX[SID + 1] + SH * MUL, COLIDX + SIDX[SID + 1], SIDX[Ain->totalslices] - SIDX[SID + 1])); \ 102d4002b98SHong Zhang /* update slice_idx */ \ 1034e58db63SHong Zhang for (ii = SID + 1; ii <= Ain->totalslices; ii++) { SIDX[ii] += SH * MUL; } \ 1042d1451d4SHong Zhang /* update pointers. Notice that they point to the FIRST postion of the row */ \ 10507e43b41SHong Zhang CP = new_colidx + SIDX[SID] + (ROW % SH); \ 10607e43b41SHong Zhang VP = new_val + SIDX[SID] + (ROW % SH); \ 107d4002b98SHong Zhang /* free up old matrix storage */ \ 1089566063dSJacob Faibussowitsch PetscCall(MatSeqXSELLFreeSELL(A, &Ain->val, &Ain->colidx)); \ 109d4002b98SHong Zhang Ain->val = (MatScalar *)new_val; \ 110d4002b98SHong Zhang Ain->colidx = new_colidx; \ 111d4002b98SHong Zhang Ain->singlemalloc = PETSC_TRUE; \ 112d4002b98SHong Zhang Ain->maxallocmat = new_size; \ 113d4002b98SHong Zhang Ain->reallocs++; \ 1144e58db63SHong Zhang if (WIDTH >= Ain->maxallocrow) Ain->maxallocrow += MUL; \ 115d4002b98SHong Zhang if (WIDTH >= Ain->rlenmax) Ain->rlenmax++; \ 1169371c9d4SSatish Balay } 117d4002b98SHong Zhang 118d4002b98SHong Zhang #define MatSetValue_SeqSELL_Private(A, row, col, value, addv, orow, ocol, cp, vp, lastcol, low, high) \ 119d4002b98SHong Zhang { \ 120d4002b98SHong Zhang Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; \ 121d4002b98SHong Zhang found = PETSC_FALSE; \ 122d4002b98SHong Zhang if (col <= lastcol) low = 0; \ 123d4002b98SHong Zhang else high = a->rlen[row]; \ 124d4002b98SHong Zhang lastcol = col; \ 125d4002b98SHong Zhang while (high - low > 5) { \ 126d4002b98SHong Zhang t = (low + high) / 2; \ 12707e43b41SHong Zhang if (*(cp + a->sliceheight * t) > col) high = t; \ 128d4002b98SHong Zhang else low = t; \ 129d4002b98SHong Zhang } \ 130d4002b98SHong Zhang for (_i = low; _i < high; _i++) { \ 13107e43b41SHong Zhang if (*(cp + a->sliceheight * _i) > col) break; \ 13207e43b41SHong Zhang if (*(cp + a->sliceheight * _i) == col) { \ 13307e43b41SHong Zhang if (addv == ADD_VALUES) *(vp + a->sliceheight * _i) += value; \ 13407e43b41SHong Zhang else *(vp + a->sliceheight * _i) = value; \ 135d4002b98SHong Zhang found = PETSC_TRUE; \ 136d4002b98SHong Zhang break; \ 137d4002b98SHong Zhang } \ 138d4002b98SHong Zhang } \ 139d4002b98SHong Zhang if (!found) { \ 14008401ef6SPierre Jolivet PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 14107e43b41SHong Zhang if (a->nonew != 1 && !(value == 0.0 && a->ignorezeroentries) && a->rlen[row] >= (a->sliidx[row / a->sliceheight + 1] - a->sliidx[row / a->sliceheight]) / a->sliceheight) { \ 1422d1451d4SHong Zhang /* there is no extra room in row, therefore enlarge 1 slice column */ \ 14307e43b41SHong Zhang if (a->maxallocmat < a->sliidx[a->totalslices] + a->sliceheight) { \ 144d4002b98SHong Zhang /* allocates a larger array for the XSELL matrix types; only extend the current slice by one more column. */ \ 14507e43b41SHong Zhang PetscInt new_size = a->maxallocmat + a->sliceheight, *new_colidx; \ 146d4002b98SHong Zhang MatScalar *new_val; \ 14708401ef6SPierre Jolivet PetscCheck(a->nonew != -2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "New nonzero at (%" PetscInt_FMT ",%" PetscInt_FMT ") caused a malloc\nUse MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE) to turn off this check", orow, ocol); \ 148d4002b98SHong Zhang /* malloc new storage space */ \ 1499566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(new_size, &new_val, new_size, &new_colidx)); \ 150d4002b98SHong Zhang /* copy over old data into new slots by two steps: one step for data before the current slice and the other for the rest */ \ 15107e43b41SHong Zhang PetscCall(PetscArraycpy(new_val, a->val, a->sliidx[row / a->sliceheight + 1])); \ 15207e43b41SHong Zhang PetscCall(PetscArraycpy(new_colidx, a->colidx, a->sliidx[row / a->sliceheight + 1])); \ 15307e43b41SHong Zhang PetscCall(PetscArraycpy(new_val + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->val + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \ 15407e43b41SHong Zhang PetscCall(PetscArraycpy(new_colidx + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->colidx + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \ 1552d1451d4SHong Zhang /* update pointers. Notice that they point to the FIRST postion of the row */ \ 15607e43b41SHong Zhang cp = new_colidx + a->sliidx[row / a->sliceheight] + (row % a->sliceheight); \ 15707e43b41SHong Zhang vp = new_val + a->sliidx[row / a->sliceheight] + (row % a->sliceheight); \ 158d4002b98SHong Zhang /* free up old matrix storage */ \ 1599566063dSJacob Faibussowitsch PetscCall(MatSeqXSELLFreeSELL(A, &a->val, &a->colidx)); \ 160d4002b98SHong Zhang a->val = (MatScalar *)new_val; \ 161d4002b98SHong Zhang a->colidx = new_colidx; \ 162d4002b98SHong Zhang a->singlemalloc = PETSC_TRUE; \ 163d4002b98SHong Zhang a->maxallocmat = new_size; \ 164d4002b98SHong Zhang a->reallocs++; \ 165d4002b98SHong Zhang } else { \ 166d4002b98SHong Zhang /* no need to reallocate, just shift the following slices to create space for the added slice column */ \ 16707e43b41SHong Zhang PetscCall(PetscArraymove(a->val + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->val + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \ 16807e43b41SHong Zhang PetscCall(PetscArraymove(a->colidx + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->colidx + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \ 169d4002b98SHong Zhang } \ 170d4002b98SHong Zhang /* update slice_idx */ \ 17107e43b41SHong Zhang for (ii = row / a->sliceheight + 1; ii <= a->totalslices; ii++) a->sliidx[ii] += a->sliceheight; \ 172d4002b98SHong Zhang if (a->rlen[row] >= a->maxallocrow) a->maxallocrow++; \ 173d4002b98SHong Zhang if (a->rlen[row] >= a->rlenmax) a->rlenmax++; \ 174d4002b98SHong Zhang } \ 175d4002b98SHong Zhang /* shift up all the later entries in this row */ \ 176d4002b98SHong Zhang for (ii = a->rlen[row] - 1; ii >= _i; ii--) { \ 17707e43b41SHong Zhang *(cp + a->sliceheight * (ii + 1)) = *(cp + a->sliceheight * ii); \ 17807e43b41SHong Zhang *(vp + a->sliceheight * (ii + 1)) = *(vp + a->sliceheight * ii); \ 179d4002b98SHong Zhang } \ 18007e43b41SHong Zhang *(cp + a->sliceheight * _i) = col; \ 18107e43b41SHong Zhang *(vp + a->sliceheight * _i) = value; \ 1829371c9d4SSatish Balay a->nz++; \ 1839371c9d4SSatish Balay a->rlen[row]++; \ 1849371c9d4SSatish Balay A->nonzerostate++; \ 1859371c9d4SSatish Balay low = _i + 1; \ 1869371c9d4SSatish Balay high++; \ 187d4002b98SHong Zhang } \ 1889371c9d4SSatish Balay } 189d4002b98SHong Zhang 190d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat, PetscInt, const PetscInt[]); 191d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatMult_SeqSELL(Mat, Vec, Vec); 192d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatMultAdd_SeqSELL(Mat, Vec, Vec, Vec); 193d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatMultTranspose_SeqSELL(Mat, Vec, Vec); 194d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat, Vec, Vec, Vec); 195d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatMissingDiagonal_SeqSELL(Mat, PetscBool *, PetscInt *); 196d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatMarkDiagonal_SeqSELL(Mat); 197d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatInvertDiagonal_SeqSELL(Mat, PetscScalar, PetscScalar); 198d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatZeroEntries_SeqSELL(Mat); 199d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatDestroy_SeqSELL(Mat); 200d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatSetOption_SeqSELL(Mat, MatOption, PetscBool); 201d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatGetDiagonal_SeqSELL(Mat, Vec v); 202d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatGetValues_SeqSELL(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], PetscScalar[]); 203d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatView_SeqSELL(Mat, PetscViewer); 204d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatAssemblyEnd_SeqSELL(Mat, MatAssemblyType); 205d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatGetInfo_SeqSELL(Mat, MatInfoType, MatInfo *); 206d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatSetValues_SeqSELL(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 207d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatCopy_SeqSELL(Mat, Mat, MatStructure); 208d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatSetUp_SeqSELL(Mat); 209d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat, PetscScalar *[]); 210d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat, PetscScalar *[]); 211d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatShift_SeqSELL(Mat, PetscScalar); 212d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatSOR_SeqSELL(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec); 213d4002b98SHong Zhang PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat); 214d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatDuplicate_SeqSELL(Mat, MatDuplicateOption, Mat *); 215d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatEqual_SeqSELL(Mat, Mat, PetscBool *); 216d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat); 217d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat, MatType, MatReuse, Mat *); 218d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat, MatType, MatReuse, Mat *); 219d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatFDColoringCreate_SeqSELL(Mat, ISColoring, MatFDColoring); 220d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatFDColoringSetUp_SeqSELL(Mat, ISColoring, MatFDColoring); 221d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatGetColumnIJ_SeqSELL_Color(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscInt *[], PetscBool *); 222d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatRestoreColumnIJ_SeqSELL_Color(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscInt *[], PetscBool *); 223d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatConjugate_SeqSELL(Mat A); 224d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatScale_SeqSELL(Mat, PetscScalar); 225d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatDiagonalScale_SeqSELL(Mat, Vec, Vec); 226d4002b98SHong Zhang #endif 227