1 2 /* 3 Defines the basic matrix operations for the SELL matrix storage format. 4 */ 5 #include <../src/mat/impls/sell/seq/sell.h> /*I "petscmat.h" I*/ 6 #include <petscblaslapack.h> 7 #include <petsc/private/kernels/blocktranspose.h> 8 9 static PetscBool cited = PETSC_FALSE; 10 static const char citation[] = "@inproceedings{ZhangELLPACK2018,\n" 11 " author = {Hong Zhang and Richard T. Mills and Karl Rupp and Barry F. Smith},\n" 12 " title = {Vectorized Parallel Sparse Matrix-Vector Multiplication in {PETSc} Using {AVX-512}},\n" 13 " booktitle = {Proceedings of the 47th International Conference on Parallel Processing},\n" 14 " year = 2018\n" 15 "}\n"; 16 17 #if defined(PETSC_HAVE_IMMINTRIN_H) && (defined(__AVX512F__) || (defined(__AVX2__) && defined(__FMA__)) || defined(__AVX__)) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 18 19 #include <immintrin.h> 20 21 #if !defined(_MM_SCALE_8) 22 #define _MM_SCALE_8 8 23 #endif 24 25 #if defined(__AVX512F__) 26 /* these do not work 27 vec_idx = _mm512_loadunpackhi_epi32(vec_idx,acolidx); 28 vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval); 29 */ 30 #define AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y) \ 31 /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \ 32 vec_idx = _mm256_loadu_si256((__m256i const *)acolidx); \ 33 vec_vals = _mm512_loadu_pd(aval); \ 34 vec_x = _mm512_i32gather_pd(vec_idx, x, _MM_SCALE_8); \ 35 vec_y = _mm512_fmadd_pd(vec_x, vec_vals, vec_y) 36 #elif defined(__AVX2__) && defined(__FMA__) 37 #define AVX2_Mult_Private(vec_idx, vec_x, vec_vals, vec_y) \ 38 vec_vals = _mm256_loadu_pd(aval); \ 39 vec_idx = _mm_loadu_si128((__m128i const *)acolidx); /* SSE2 */ \ 40 vec_x = _mm256_i32gather_pd(x, vec_idx, _MM_SCALE_8); \ 41 vec_y = _mm256_fmadd_pd(vec_x, vec_vals, vec_y) 42 #endif 43 #endif /* PETSC_HAVE_IMMINTRIN_H */ 44 45 /*@C 46 MatSeqSELLSetPreallocation - For good matrix assembly performance 47 the user should preallocate the matrix storage by setting the parameter `nz` 48 (or the array `nnz`). 49 50 Collective 51 52 Input Parameters: 53 + B - The `MATSEQSELL` matrix 54 . rlenmax - number of nonzeros per row (same for all rows), ignored if `rlen` is provided 55 - rlen - array containing the number of nonzeros in the various rows (possibly different for each row) or `NULL` 56 57 Level: intermediate 58 59 Notes: 60 Specify the preallocated storage with either `rlenmax` or `rlen` (not both). 61 Set `rlenmax` = `PETSC_DEFAULT` and `rlen` = `NULL` for PETSc to control dynamic memory 62 allocation. 63 64 You can call `MatGetInfo()` to get information on how effective the preallocation was; 65 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 66 You can also run with the option `-info` and look for messages with the string 67 malloc in them to see if additional memory allocation was needed. 68 69 Developer Note: 70 Use `rlenmax` of `MAT_SKIP_ALLOCATION` to not allocate any space for the matrix 71 entries or columns indices. 72 73 The maximum number of nonzeos in any row should be as accurate as possible. 74 If it is underestimated, you will get bad performance due to reallocation 75 (`MatSeqXSELLReallocateSELL()`). 76 77 .seealso: `Mat`, `MATSEQSELL`, `MATSELL`, `MatCreate()`, `MatCreateSELL()`, `MatSetValues()`, `MatGetInfo()` 78 @*/ 79 PetscErrorCode MatSeqSELLSetPreallocation(Mat B, PetscInt rlenmax, const PetscInt rlen[]) 80 { 81 PetscFunctionBegin; 82 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 83 PetscValidType(B, 1); 84 PetscTryMethod(B, "MatSeqSELLSetPreallocation_C", (Mat, PetscInt, const PetscInt[]), (B, rlenmax, rlen)); 85 PetscFunctionReturn(PETSC_SUCCESS); 86 } 87 88 PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B, PetscInt maxallocrow, const PetscInt rlen[]) 89 { 90 Mat_SeqSELL *b; 91 PetscInt i, j, totalslices; 92 PetscBool skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE; 93 94 PetscFunctionBegin; 95 if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE; 96 if (maxallocrow == MAT_SKIP_ALLOCATION) { 97 skipallocation = PETSC_TRUE; 98 maxallocrow = 0; 99 } 100 101 PetscCall(PetscLayoutSetUp(B->rmap)); 102 PetscCall(PetscLayoutSetUp(B->cmap)); 103 104 /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */ 105 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5; 106 PetscCheck(maxallocrow >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "maxallocrow cannot be less than 0: value %" PetscInt_FMT, maxallocrow); 107 if (rlen) { 108 for (i = 0; i < B->rmap->n; i++) { 109 PetscCheck(rlen[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "rlen cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, rlen[i]); 110 PetscCheck(rlen[i] <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "rlen cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, rlen[i], B->cmap->n); 111 } 112 } 113 114 B->preallocated = PETSC_TRUE; 115 116 b = (Mat_SeqSELL *)B->data; 117 118 totalslices = PetscCeilInt(B->rmap->n, SLICE_HEIGHT); 119 b->totalslices = totalslices; 120 if (!skipallocation) { 121 if (B->rmap->n % SLICE_HEIGHT) PetscCall(PetscInfo(B, "Padding rows to the SEQSELL matrix because the number of rows is not the multiple of the slice height (value %" PetscInt_FMT ")\n", B->rmap->n)); 122 123 if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */ 124 PetscCall(PetscMalloc1(totalslices + 1, &b->sliidx)); 125 } 126 if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */ 127 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10; 128 else if (maxallocrow < 0) maxallocrow = 1; 129 for (i = 0; i <= totalslices; i++) b->sliidx[i] = SLICE_HEIGHT * i * maxallocrow; 130 } else { 131 maxallocrow = 0; 132 b->sliidx[0] = 0; 133 for (i = 1; i < totalslices; i++) { 134 b->sliidx[i] = 0; 135 for (j = 0; j < SLICE_HEIGHT; j++) { b->sliidx[i] = PetscMax(b->sliidx[i], rlen[SLICE_HEIGHT * (i - 1) + j]); } 136 maxallocrow = PetscMax(b->sliidx[i], maxallocrow); 137 PetscCall(PetscIntSumError(b->sliidx[i - 1], SLICE_HEIGHT * b->sliidx[i], &b->sliidx[i])); 138 } 139 /* last slice */ 140 b->sliidx[totalslices] = 0; 141 for (j = SLICE_HEIGHT * (totalslices - 1); j < B->rmap->n; j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices], rlen[j]); 142 maxallocrow = PetscMax(b->sliidx[totalslices], maxallocrow); 143 b->sliidx[totalslices] = b->sliidx[totalslices - 1] + SLICE_HEIGHT * b->sliidx[totalslices]; 144 } 145 146 /* allocate space for val, colidx, rlen */ 147 /* FIXME: should B's old memory be unlogged? */ 148 PetscCall(MatSeqXSELLFreeSELL(B, &b->val, &b->colidx)); 149 /* FIXME: assuming an element of the bit array takes 8 bits */ 150 PetscCall(PetscMalloc2(b->sliidx[totalslices], &b->val, b->sliidx[totalslices], &b->colidx)); 151 /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */ 152 PetscCall(PetscCalloc1(SLICE_HEIGHT * totalslices, &b->rlen)); 153 154 b->singlemalloc = PETSC_TRUE; 155 b->free_val = PETSC_TRUE; 156 b->free_colidx = PETSC_TRUE; 157 } else { 158 b->free_val = PETSC_FALSE; 159 b->free_colidx = PETSC_FALSE; 160 } 161 162 b->nz = 0; 163 b->maxallocrow = maxallocrow; 164 b->rlenmax = maxallocrow; 165 b->maxallocmat = b->sliidx[totalslices]; 166 B->info.nz_unneeded = (double)b->maxallocmat; 167 if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 168 PetscFunctionReturn(PETSC_SUCCESS); 169 } 170 171 PetscErrorCode MatGetRow_SeqSELL(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 172 { 173 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 174 PetscInt shift; 175 176 PetscFunctionBegin; 177 PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 178 if (nz) *nz = a->rlen[row]; 179 shift = a->sliidx[row / SLICE_HEIGHT] + (row % SLICE_HEIGHT); 180 if (!a->getrowcols) { PetscCall(PetscMalloc2(a->rlenmax, &a->getrowcols, a->rlenmax, &a->getrowvals)); } 181 if (idx) { 182 PetscInt j; 183 for (j = 0; j < a->rlen[row]; j++) a->getrowcols[j] = a->colidx[shift + SLICE_HEIGHT * j]; 184 *idx = a->getrowcols; 185 } 186 if (v) { 187 PetscInt j; 188 for (j = 0; j < a->rlen[row]; j++) a->getrowvals[j] = a->val[shift + SLICE_HEIGHT * j]; 189 *v = a->getrowvals; 190 } 191 PetscFunctionReturn(PETSC_SUCCESS); 192 } 193 194 PetscErrorCode MatRestoreRow_SeqSELL(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 195 { 196 PetscFunctionBegin; 197 PetscFunctionReturn(PETSC_SUCCESS); 198 } 199 200 PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 201 { 202 Mat B; 203 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 204 PetscInt i; 205 206 PetscFunctionBegin; 207 if (reuse == MAT_REUSE_MATRIX) { 208 B = *newmat; 209 PetscCall(MatZeroEntries(B)); 210 } else { 211 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 212 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 213 PetscCall(MatSetType(B, MATSEQAIJ)); 214 PetscCall(MatSeqAIJSetPreallocation(B, 0, a->rlen)); 215 } 216 217 for (i = 0; i < A->rmap->n; i++) { 218 PetscInt nz = 0, *cols = NULL; 219 PetscScalar *vals = NULL; 220 221 PetscCall(MatGetRow_SeqSELL(A, i, &nz, &cols, &vals)); 222 PetscCall(MatSetValues(B, 1, &i, nz, cols, vals, INSERT_VALUES)); 223 PetscCall(MatRestoreRow_SeqSELL(A, i, &nz, &cols, &vals)); 224 } 225 226 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 227 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 228 B->rmap->bs = A->rmap->bs; 229 230 if (reuse == MAT_INPLACE_MATRIX) { 231 PetscCall(MatHeaderReplace(A, &B)); 232 } else { 233 *newmat = B; 234 } 235 PetscFunctionReturn(PETSC_SUCCESS); 236 } 237 238 #include <../src/mat/impls/aij/seq/aij.h> 239 240 PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 241 { 242 Mat B; 243 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 244 PetscInt *ai = a->i, m = A->rmap->N, n = A->cmap->N, i, *rowlengths, row, ncols; 245 const PetscInt *cols; 246 const PetscScalar *vals; 247 248 PetscFunctionBegin; 249 250 if (reuse == MAT_REUSE_MATRIX) { 251 B = *newmat; 252 } else { 253 if (PetscDefined(USE_DEBUG) || !a->ilen) { 254 PetscCall(PetscMalloc1(m, &rowlengths)); 255 for (i = 0; i < m; i++) rowlengths[i] = ai[i + 1] - ai[i]; 256 } 257 if (PetscDefined(USE_DEBUG) && a->ilen) { 258 PetscBool eq; 259 PetscCall(PetscMemcmp(rowlengths, a->ilen, m * sizeof(PetscInt), &eq)); 260 PetscCheck(eq, PETSC_COMM_SELF, PETSC_ERR_PLIB, "SeqAIJ ilen array incorrect"); 261 PetscCall(PetscFree(rowlengths)); 262 rowlengths = a->ilen; 263 } else if (a->ilen) rowlengths = a->ilen; 264 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 265 PetscCall(MatSetSizes(B, m, n, m, n)); 266 PetscCall(MatSetType(B, MATSEQSELL)); 267 PetscCall(MatSeqSELLSetPreallocation(B, 0, rowlengths)); 268 if (rowlengths != a->ilen) PetscCall(PetscFree(rowlengths)); 269 } 270 271 for (row = 0; row < m; row++) { 272 PetscCall(MatGetRow_SeqAIJ(A, row, &ncols, (PetscInt **)&cols, (PetscScalar **)&vals)); 273 PetscCall(MatSetValues_SeqSELL(B, 1, &row, ncols, cols, vals, INSERT_VALUES)); 274 PetscCall(MatRestoreRow_SeqAIJ(A, row, &ncols, (PetscInt **)&cols, (PetscScalar **)&vals)); 275 } 276 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 277 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 278 B->rmap->bs = A->rmap->bs; 279 280 if (reuse == MAT_INPLACE_MATRIX) { 281 PetscCall(MatHeaderReplace(A, &B)); 282 } else { 283 *newmat = B; 284 } 285 PetscFunctionReturn(PETSC_SUCCESS); 286 } 287 288 PetscErrorCode MatMult_SeqSELL(Mat A, Vec xx, Vec yy) 289 { 290 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 291 PetscScalar *y; 292 const PetscScalar *x; 293 const MatScalar *aval = a->val; 294 PetscInt totalslices = a->totalslices; 295 const PetscInt *acolidx = a->colidx; 296 PetscInt i, j; 297 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 298 __m512d vec_x, vec_y, vec_vals; 299 __m256i vec_idx; 300 __mmask8 mask; 301 __m512d vec_x2, vec_y2, vec_vals2, vec_x3, vec_y3, vec_vals3, vec_x4, vec_y4, vec_vals4; 302 __m256i vec_idx2, vec_idx3, vec_idx4; 303 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 304 __m128i vec_idx; 305 __m256d vec_x, vec_y, vec_y2, vec_vals; 306 MatScalar yval; 307 PetscInt r, rows_left, row, nnz_in_row; 308 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 309 __m128d vec_x_tmp; 310 __m256d vec_x, vec_y, vec_y2, vec_vals; 311 MatScalar yval; 312 PetscInt r, rows_left, row, nnz_in_row; 313 #else 314 PetscInt k; 315 PetscScalar sum[SLICE_HEIGHT]; 316 #endif 317 318 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 319 #pragma disjoint(*x, *y, *aval) 320 #endif 321 322 PetscFunctionBegin; 323 PetscCall(VecGetArrayRead(xx, &x)); 324 PetscCall(VecGetArray(yy, &y)); 325 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 326 for (i = 0; i < totalslices; i++) { /* loop over slices */ 327 PetscPrefetchBlock(acolidx, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0); 328 PetscPrefetchBlock(aval, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0); 329 330 vec_y = _mm512_setzero_pd(); 331 vec_y2 = _mm512_setzero_pd(); 332 vec_y3 = _mm512_setzero_pd(); 333 vec_y4 = _mm512_setzero_pd(); 334 335 j = a->sliidx[i] >> 3; /* 8 bytes are read at each time, corresponding to a slice column */ 336 switch ((a->sliidx[i + 1] - a->sliidx[i]) / 8 & 3) { 337 case 3: 338 AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y); 339 acolidx += 8; 340 aval += 8; 341 AVX512_Mult_Private(vec_idx2, vec_x2, vec_vals2, vec_y2); 342 acolidx += 8; 343 aval += 8; 344 AVX512_Mult_Private(vec_idx3, vec_x3, vec_vals3, vec_y3); 345 acolidx += 8; 346 aval += 8; 347 j += 3; 348 break; 349 case 2: 350 AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y); 351 acolidx += 8; 352 aval += 8; 353 AVX512_Mult_Private(vec_idx2, vec_x2, vec_vals2, vec_y2); 354 acolidx += 8; 355 aval += 8; 356 j += 2; 357 break; 358 case 1: 359 AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y); 360 acolidx += 8; 361 aval += 8; 362 j += 1; 363 break; 364 } 365 #pragma novector 366 for (; j < (a->sliidx[i + 1] >> 3); j += 4) { 367 AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y); 368 acolidx += 8; 369 aval += 8; 370 AVX512_Mult_Private(vec_idx2, vec_x2, vec_vals2, vec_y2); 371 acolidx += 8; 372 aval += 8; 373 AVX512_Mult_Private(vec_idx3, vec_x3, vec_vals3, vec_y3); 374 acolidx += 8; 375 aval += 8; 376 AVX512_Mult_Private(vec_idx4, vec_x4, vec_vals4, vec_y4); 377 acolidx += 8; 378 aval += 8; 379 } 380 381 vec_y = _mm512_add_pd(vec_y, vec_y2); 382 vec_y = _mm512_add_pd(vec_y, vec_y3); 383 vec_y = _mm512_add_pd(vec_y, vec_y4); 384 if (i == totalslices - 1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 385 mask = (__mmask8)(0xff >> (8 - (A->rmap->n & 0x07))); 386 _mm512_mask_storeu_pd(&y[8 * i], mask, vec_y); 387 } else { 388 _mm512_storeu_pd(&y[8 * i], vec_y); 389 } 390 } 391 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 392 for (i = 0; i < totalslices; i++) { /* loop over full slices */ 393 PetscPrefetchBlock(acolidx, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0); 394 PetscPrefetchBlock(aval, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0); 395 396 /* last slice may have padding rows. Don't use vectorization. */ 397 if (i == totalslices - 1 && (A->rmap->n & 0x07)) { 398 rows_left = A->rmap->n - 8 * i; 399 for (r = 0; r < rows_left; ++r) { 400 yval = (MatScalar)0; 401 row = 8 * i + r; 402 nnz_in_row = a->rlen[row]; 403 for (j = 0; j < nnz_in_row; ++j) yval += aval[8 * j + r] * x[acolidx[8 * j + r]]; 404 y[row] = yval; 405 } 406 break; 407 } 408 409 vec_y = _mm256_setzero_pd(); 410 vec_y2 = _mm256_setzero_pd(); 411 412 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 413 #pragma novector 414 #pragma unroll(2) 415 for (j = a->sliidx[i]; j < a->sliidx[i + 1]; j += 8) { 416 AVX2_Mult_Private(vec_idx, vec_x, vec_vals, vec_y); 417 aval += 4; 418 acolidx += 4; 419 AVX2_Mult_Private(vec_idx, vec_x, vec_vals, vec_y2); 420 aval += 4; 421 acolidx += 4; 422 } 423 424 _mm256_storeu_pd(y + i * 8, vec_y); 425 _mm256_storeu_pd(y + i * 8 + 4, vec_y2); 426 } 427 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 428 for (i = 0; i < totalslices; i++) { /* loop over full slices */ 429 PetscPrefetchBlock(acolidx, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0); 430 PetscPrefetchBlock(aval, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0); 431 432 vec_y = _mm256_setzero_pd(); 433 vec_y2 = _mm256_setzero_pd(); 434 435 /* last slice may have padding rows. Don't use vectorization. */ 436 if (i == totalslices - 1 && (A->rmap->n & 0x07)) { 437 rows_left = A->rmap->n - 8 * i; 438 for (r = 0; r < rows_left; ++r) { 439 yval = (MatScalar)0; 440 row = 8 * i + r; 441 nnz_in_row = a->rlen[row]; 442 for (j = 0; j < nnz_in_row; ++j) yval += aval[8 * j + r] * x[acolidx[8 * j + r]]; 443 y[row] = yval; 444 } 445 break; 446 } 447 448 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 449 #pragma novector 450 #pragma unroll(2) 451 for (j = a->sliidx[i]; j < a->sliidx[i + 1]; j += 8) { 452 vec_vals = _mm256_loadu_pd(aval); 453 vec_x_tmp = _mm_setzero_pd(); 454 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 455 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 456 vec_x = _mm256_insertf128_pd(vec_x, vec_x_tmp, 0); 457 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 458 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 459 vec_x = _mm256_insertf128_pd(vec_x, vec_x_tmp, 1); 460 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x, vec_vals), vec_y); 461 aval += 4; 462 463 vec_vals = _mm256_loadu_pd(aval); 464 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 465 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 466 vec_x = _mm256_insertf128_pd(vec_x, vec_x_tmp, 0); 467 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 468 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 469 vec_x = _mm256_insertf128_pd(vec_x, vec_x_tmp, 1); 470 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x, vec_vals), vec_y2); 471 aval += 4; 472 } 473 474 _mm256_storeu_pd(y + i * 8, vec_y); 475 _mm256_storeu_pd(y + i * 8 + 4, vec_y2); 476 } 477 #else 478 for (i = 0; i < totalslices; i++) { /* loop over slices */ 479 for (j = 0; j < SLICE_HEIGHT; j++) { 480 sum[j] = 0.0; 481 for (k = a->sliidx[i] + j; k < a->sliidx[i + 1]; k += SLICE_HEIGHT) sum[j] += aval[k] * x[acolidx[k]]; 482 } 483 if (i == totalslices - 1 && (A->rmap->n % SLICE_HEIGHT)) { /* if last slice has padding rows */ 484 for (j = 0; j < (A->rmap->n % SLICE_HEIGHT); j++) y[SLICE_HEIGHT * i + j] = sum[j]; 485 } else { 486 for (j = 0; j < SLICE_HEIGHT; j++) y[SLICE_HEIGHT * i + j] = sum[j]; 487 } 488 } 489 #endif 490 491 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); /* theoretical minimal FLOPs */ 492 PetscCall(VecRestoreArrayRead(xx, &x)); 493 PetscCall(VecRestoreArray(yy, &y)); 494 PetscFunctionReturn(PETSC_SUCCESS); 495 } 496 497 #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h> 498 PetscErrorCode MatMultAdd_SeqSELL(Mat A, Vec xx, Vec yy, Vec zz) 499 { 500 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 501 PetscScalar *y, *z; 502 const PetscScalar *x; 503 const MatScalar *aval = a->val; 504 PetscInt totalslices = a->totalslices; 505 const PetscInt *acolidx = a->colidx; 506 PetscInt i, j; 507 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 508 __m512d vec_x, vec_y, vec_vals; 509 __m256i vec_idx; 510 __mmask8 mask; 511 __m512d vec_x2, vec_y2, vec_vals2, vec_x3, vec_y3, vec_vals3, vec_x4, vec_y4, vec_vals4; 512 __m256i vec_idx2, vec_idx3, vec_idx4; 513 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 514 __m128d vec_x_tmp; 515 __m256d vec_x, vec_y, vec_y2, vec_vals; 516 MatScalar yval; 517 PetscInt r, row, nnz_in_row; 518 #else 519 PetscInt k; 520 PetscScalar sum[SLICE_HEIGHT]; 521 #endif 522 523 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 524 #pragma disjoint(*x, *y, *aval) 525 #endif 526 527 PetscFunctionBegin; 528 if (!a->nz) { 529 PetscCall(VecCopy(yy, zz)); 530 PetscFunctionReturn(PETSC_SUCCESS); 531 } 532 PetscCall(VecGetArrayRead(xx, &x)); 533 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 534 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 535 for (i = 0; i < totalslices; i++) { /* loop over slices */ 536 PetscPrefetchBlock(acolidx, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0); 537 PetscPrefetchBlock(aval, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0); 538 539 if (i == totalslices - 1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 540 mask = (__mmask8)(0xff >> (8 - (A->rmap->n & 0x07))); 541 vec_y = _mm512_mask_loadu_pd(vec_y, mask, &y[8 * i]); 542 } else { 543 vec_y = _mm512_loadu_pd(&y[8 * i]); 544 } 545 vec_y2 = _mm512_setzero_pd(); 546 vec_y3 = _mm512_setzero_pd(); 547 vec_y4 = _mm512_setzero_pd(); 548 549 j = a->sliidx[i] >> 3; /* 8 bytes are read at each time, corresponding to a slice column */ 550 switch ((a->sliidx[i + 1] - a->sliidx[i]) / 8 & 3) { 551 case 3: 552 AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y); 553 acolidx += 8; 554 aval += 8; 555 AVX512_Mult_Private(vec_idx2, vec_x2, vec_vals2, vec_y2); 556 acolidx += 8; 557 aval += 8; 558 AVX512_Mult_Private(vec_idx3, vec_x3, vec_vals3, vec_y3); 559 acolidx += 8; 560 aval += 8; 561 j += 3; 562 break; 563 case 2: 564 AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y); 565 acolidx += 8; 566 aval += 8; 567 AVX512_Mult_Private(vec_idx2, vec_x2, vec_vals2, vec_y2); 568 acolidx += 8; 569 aval += 8; 570 j += 2; 571 break; 572 case 1: 573 AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y); 574 acolidx += 8; 575 aval += 8; 576 j += 1; 577 break; 578 } 579 #pragma novector 580 for (; j < (a->sliidx[i + 1] >> 3); j += 4) { 581 AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y); 582 acolidx += 8; 583 aval += 8; 584 AVX512_Mult_Private(vec_idx2, vec_x2, vec_vals2, vec_y2); 585 acolidx += 8; 586 aval += 8; 587 AVX512_Mult_Private(vec_idx3, vec_x3, vec_vals3, vec_y3); 588 acolidx += 8; 589 aval += 8; 590 AVX512_Mult_Private(vec_idx4, vec_x4, vec_vals4, vec_y4); 591 acolidx += 8; 592 aval += 8; 593 } 594 595 vec_y = _mm512_add_pd(vec_y, vec_y2); 596 vec_y = _mm512_add_pd(vec_y, vec_y3); 597 vec_y = _mm512_add_pd(vec_y, vec_y4); 598 if (i == totalslices - 1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 599 _mm512_mask_storeu_pd(&z[8 * i], mask, vec_y); 600 } else { 601 _mm512_storeu_pd(&z[8 * i], vec_y); 602 } 603 } 604 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 605 for (i = 0; i < totalslices; i++) { /* loop over full slices */ 606 PetscPrefetchBlock(acolidx, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0); 607 PetscPrefetchBlock(aval, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0); 608 609 /* last slice may have padding rows. Don't use vectorization. */ 610 if (i == totalslices - 1 && (A->rmap->n & 0x07)) { 611 for (r = 0; r < (A->rmap->n & 0x07); ++r) { 612 row = 8 * i + r; 613 yval = (MatScalar)0.0; 614 nnz_in_row = a->rlen[row]; 615 for (j = 0; j < nnz_in_row; ++j) yval += aval[8 * j + r] * x[acolidx[8 * j + r]]; 616 z[row] = y[row] + yval; 617 } 618 break; 619 } 620 621 vec_y = _mm256_loadu_pd(y + 8 * i); 622 vec_y2 = _mm256_loadu_pd(y + 8 * i + 4); 623 624 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 625 for (j = a->sliidx[i]; j < a->sliidx[i + 1]; j += 8) { 626 vec_vals = _mm256_loadu_pd(aval); 627 vec_x_tmp = _mm_setzero_pd(); 628 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 629 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 630 vec_x = _mm256_setzero_pd(); 631 vec_x = _mm256_insertf128_pd(vec_x, vec_x_tmp, 0); 632 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 633 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 634 vec_x = _mm256_insertf128_pd(vec_x, vec_x_tmp, 1); 635 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x, vec_vals), vec_y); 636 aval += 4; 637 638 vec_vals = _mm256_loadu_pd(aval); 639 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 640 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 641 vec_x = _mm256_insertf128_pd(vec_x, vec_x_tmp, 0); 642 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 643 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 644 vec_x = _mm256_insertf128_pd(vec_x, vec_x_tmp, 1); 645 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x, vec_vals), vec_y2); 646 aval += 4; 647 } 648 649 _mm256_storeu_pd(z + i * 8, vec_y); 650 _mm256_storeu_pd(z + i * 8 + 4, vec_y2); 651 } 652 #else 653 for (i = 0; i < totalslices; i++) { /* loop over slices */ 654 for (j = 0; j < SLICE_HEIGHT; j++) { 655 sum[j] = 0.0; 656 for (k = a->sliidx[i] + j; k < a->sliidx[i + 1]; k += SLICE_HEIGHT) sum[j] += aval[k] * x[acolidx[k]]; 657 } 658 if (i == totalslices - 1 && (A->rmap->n % SLICE_HEIGHT)) { 659 for (j = 0; j < (A->rmap->n % SLICE_HEIGHT); j++) z[SLICE_HEIGHT * i + j] = y[SLICE_HEIGHT * i + j] + sum[j]; 660 } else { 661 for (j = 0; j < SLICE_HEIGHT; j++) z[SLICE_HEIGHT * i + j] = y[SLICE_HEIGHT * i + j] + sum[j]; 662 } 663 } 664 #endif 665 666 PetscCall(PetscLogFlops(2.0 * a->nz)); 667 PetscCall(VecRestoreArrayRead(xx, &x)); 668 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 669 PetscFunctionReturn(PETSC_SUCCESS); 670 } 671 672 PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A, Vec xx, Vec zz, Vec yy) 673 { 674 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 675 PetscScalar *y; 676 const PetscScalar *x; 677 const MatScalar *aval = a->val; 678 const PetscInt *acolidx = a->colidx; 679 PetscInt i, j, r, row, nnz_in_row, totalslices = a->totalslices; 680 681 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 682 #pragma disjoint(*x, *y, *aval) 683 #endif 684 685 PetscFunctionBegin; 686 if (A->symmetric == PETSC_BOOL3_TRUE) { 687 PetscCall(MatMultAdd_SeqSELL(A, xx, zz, yy)); 688 PetscFunctionReturn(PETSC_SUCCESS); 689 } 690 if (zz != yy) PetscCall(VecCopy(zz, yy)); 691 692 if (a->nz) { 693 PetscCall(VecGetArrayRead(xx, &x)); 694 PetscCall(VecGetArray(yy, &y)); 695 for (i = 0; i < a->totalslices; i++) { /* loop over slices */ 696 if (i == totalslices - 1 && (A->rmap->n % SLICE_HEIGHT)) { 697 for (r = 0; r < (A->rmap->n % SLICE_HEIGHT); ++r) { 698 row = SLICE_HEIGHT * i + r; 699 nnz_in_row = a->rlen[row]; 700 for (j = 0; j < nnz_in_row; ++j) y[acolidx[SLICE_HEIGHT * j + r]] += aval[SLICE_HEIGHT * j + r] * x[row]; 701 } 702 break; 703 } 704 for (r = 0; r < SLICE_HEIGHT; ++r) 705 for (j = a->sliidx[i] + r; j < a->sliidx[i + 1]; j += SLICE_HEIGHT) y[acolidx[j]] += aval[j] * x[SLICE_HEIGHT * i + r]; 706 } 707 PetscCall(PetscLogFlops(2.0 * a->nz)); 708 PetscCall(VecRestoreArrayRead(xx, &x)); 709 PetscCall(VecRestoreArray(yy, &y)); 710 } 711 PetscFunctionReturn(PETSC_SUCCESS); 712 } 713 714 PetscErrorCode MatMultTranspose_SeqSELL(Mat A, Vec xx, Vec yy) 715 { 716 PetscFunctionBegin; 717 if (A->symmetric == PETSC_BOOL3_TRUE) { 718 PetscCall(MatMult_SeqSELL(A, xx, yy)); 719 } else { 720 PetscCall(VecSet(yy, 0.0)); 721 PetscCall(MatMultTransposeAdd_SeqSELL(A, xx, yy, yy)); 722 } 723 PetscFunctionReturn(PETSC_SUCCESS); 724 } 725 726 /* 727 Checks for missing diagonals 728 */ 729 PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A, PetscBool *missing, PetscInt *d) 730 { 731 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 732 PetscInt *diag, i; 733 734 PetscFunctionBegin; 735 *missing = PETSC_FALSE; 736 if (A->rmap->n > 0 && !(a->colidx)) { 737 *missing = PETSC_TRUE; 738 if (d) *d = 0; 739 PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n")); 740 } else { 741 diag = a->diag; 742 for (i = 0; i < A->rmap->n; i++) { 743 if (diag[i] == -1) { 744 *missing = PETSC_TRUE; 745 if (d) *d = i; 746 PetscCall(PetscInfo(A, "Matrix is missing diagonal number %" PetscInt_FMT "\n", i)); 747 break; 748 } 749 } 750 } 751 PetscFunctionReturn(PETSC_SUCCESS); 752 } 753 754 PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A) 755 { 756 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 757 PetscInt i, j, m = A->rmap->n, shift; 758 759 PetscFunctionBegin; 760 if (!a->diag) { 761 PetscCall(PetscMalloc1(m, &a->diag)); 762 a->free_diag = PETSC_TRUE; 763 } 764 for (i = 0; i < m; i++) { /* loop over rows */ 765 shift = a->sliidx[i / SLICE_HEIGHT] + i % SLICE_HEIGHT; /* starting index of the row i */ 766 a->diag[i] = -1; 767 for (j = 0; j < a->rlen[i]; j++) { 768 if (a->colidx[shift + SLICE_HEIGHT * j] == i) { 769 a->diag[i] = shift + SLICE_HEIGHT * j; 770 break; 771 } 772 } 773 } 774 PetscFunctionReturn(PETSC_SUCCESS); 775 } 776 777 /* 778 Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways 779 */ 780 PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A, PetscScalar omega, PetscScalar fshift) 781 { 782 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 783 PetscInt i, *diag, m = A->rmap->n; 784 MatScalar *val = a->val; 785 PetscScalar *idiag, *mdiag; 786 787 PetscFunctionBegin; 788 if (a->idiagvalid) PetscFunctionReturn(PETSC_SUCCESS); 789 PetscCall(MatMarkDiagonal_SeqSELL(A)); 790 diag = a->diag; 791 if (!a->idiag) { 792 PetscCall(PetscMalloc3(m, &a->idiag, m, &a->mdiag, m, &a->ssor_work)); 793 val = a->val; 794 } 795 mdiag = a->mdiag; 796 idiag = a->idiag; 797 798 if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) { 799 for (i = 0; i < m; i++) { 800 mdiag[i] = val[diag[i]]; 801 if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */ 802 PetscCheck(PetscRealPart(fshift), PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Zero diagonal on row %" PetscInt_FMT, i); 803 PetscCall(PetscInfo(A, "Zero diagonal on row %" PetscInt_FMT "\n", i)); 804 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 805 A->factorerror_zeropivot_value = 0.0; 806 A->factorerror_zeropivot_row = i; 807 } 808 idiag[i] = 1.0 / val[diag[i]]; 809 } 810 PetscCall(PetscLogFlops(m)); 811 } else { 812 for (i = 0; i < m; i++) { 813 mdiag[i] = val[diag[i]]; 814 idiag[i] = omega / (fshift + val[diag[i]]); 815 } 816 PetscCall(PetscLogFlops(2.0 * m)); 817 } 818 a->idiagvalid = PETSC_TRUE; 819 PetscFunctionReturn(PETSC_SUCCESS); 820 } 821 822 PetscErrorCode MatZeroEntries_SeqSELL(Mat A) 823 { 824 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 825 826 PetscFunctionBegin; 827 PetscCall(PetscArrayzero(a->val, a->sliidx[a->totalslices])); 828 PetscCall(MatSeqSELLInvalidateDiagonal(A)); 829 PetscFunctionReturn(PETSC_SUCCESS); 830 } 831 832 PetscErrorCode MatDestroy_SeqSELL(Mat A) 833 { 834 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 835 836 PetscFunctionBegin; 837 #if defined(PETSC_USE_LOG) 838 PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->n, A->cmap->n, a->nz)); 839 #endif 840 PetscCall(MatSeqXSELLFreeSELL(A, &a->val, &a->colidx)); 841 PetscCall(ISDestroy(&a->row)); 842 PetscCall(ISDestroy(&a->col)); 843 PetscCall(PetscFree(a->diag)); 844 PetscCall(PetscFree(a->rlen)); 845 PetscCall(PetscFree(a->sliidx)); 846 PetscCall(PetscFree3(a->idiag, a->mdiag, a->ssor_work)); 847 PetscCall(PetscFree(a->solve_work)); 848 PetscCall(ISDestroy(&a->icol)); 849 PetscCall(PetscFree(a->saved_values)); 850 PetscCall(PetscFree2(a->getrowcols, a->getrowvals)); 851 852 PetscCall(PetscFree(A->data)); 853 854 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 855 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL)); 856 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL)); 857 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", NULL)); 858 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqSELLGetArray_C", NULL)); 859 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqSELLRestoreArray_C", NULL)); 860 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_SeqSELL_SeqAIJ_C", NULL)); 861 #if defined(PETSC_HAVE_CUDA) 862 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_SeqSELL_SeqSELLCUDA_C", NULL)); 863 #endif 864 PetscFunctionReturn(PETSC_SUCCESS); 865 } 866 867 PetscErrorCode MatSetOption_SeqSELL(Mat A, MatOption op, PetscBool flg) 868 { 869 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 870 871 PetscFunctionBegin; 872 switch (op) { 873 case MAT_ROW_ORIENTED: 874 a->roworiented = flg; 875 break; 876 case MAT_KEEP_NONZERO_PATTERN: 877 a->keepnonzeropattern = flg; 878 break; 879 case MAT_NEW_NONZERO_LOCATIONS: 880 a->nonew = (flg ? 0 : 1); 881 break; 882 case MAT_NEW_NONZERO_LOCATION_ERR: 883 a->nonew = (flg ? -1 : 0); 884 break; 885 case MAT_NEW_NONZERO_ALLOCATION_ERR: 886 a->nonew = (flg ? -2 : 0); 887 break; 888 case MAT_UNUSED_NONZERO_LOCATION_ERR: 889 a->nounused = (flg ? -1 : 0); 890 break; 891 case MAT_FORCE_DIAGONAL_ENTRIES: 892 case MAT_IGNORE_OFF_PROC_ENTRIES: 893 case MAT_USE_HASH_TABLE: 894 case MAT_SORTED_FULL: 895 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 896 break; 897 case MAT_SPD: 898 case MAT_SYMMETRIC: 899 case MAT_STRUCTURALLY_SYMMETRIC: 900 case MAT_HERMITIAN: 901 case MAT_SYMMETRY_ETERNAL: 902 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 903 case MAT_SPD_ETERNAL: 904 /* These options are handled directly by MatSetOption() */ 905 break; 906 default: 907 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 908 } 909 PetscFunctionReturn(PETSC_SUCCESS); 910 } 911 912 PetscErrorCode MatGetDiagonal_SeqSELL(Mat A, Vec v) 913 { 914 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 915 PetscInt i, j, n, shift; 916 PetscScalar *x, zero = 0.0; 917 918 PetscFunctionBegin; 919 PetscCall(VecGetLocalSize(v, &n)); 920 PetscCheck(n == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 921 922 if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) { 923 PetscInt *diag = a->diag; 924 PetscCall(VecGetArray(v, &x)); 925 for (i = 0; i < n; i++) x[i] = 1.0 / a->val[diag[i]]; 926 PetscCall(VecRestoreArray(v, &x)); 927 PetscFunctionReturn(PETSC_SUCCESS); 928 } 929 930 PetscCall(VecSet(v, zero)); 931 PetscCall(VecGetArray(v, &x)); 932 for (i = 0; i < n; i++) { /* loop over rows */ 933 shift = a->sliidx[i / SLICE_HEIGHT] + i % SLICE_HEIGHT; /* starting index of the row i */ 934 x[i] = 0; 935 for (j = 0; j < a->rlen[i]; j++) { 936 if (a->colidx[shift + SLICE_HEIGHT * j] == i) { 937 x[i] = a->val[shift + SLICE_HEIGHT * j]; 938 break; 939 } 940 } 941 } 942 PetscCall(VecRestoreArray(v, &x)); 943 PetscFunctionReturn(PETSC_SUCCESS); 944 } 945 946 PetscErrorCode MatDiagonalScale_SeqSELL(Mat A, Vec ll, Vec rr) 947 { 948 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 949 const PetscScalar *l, *r; 950 PetscInt i, j, m, n, row; 951 952 PetscFunctionBegin; 953 if (ll) { 954 /* The local size is used so that VecMPI can be passed to this routine 955 by MatDiagonalScale_MPISELL */ 956 PetscCall(VecGetLocalSize(ll, &m)); 957 PetscCheck(m == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector wrong length"); 958 PetscCall(VecGetArrayRead(ll, &l)); 959 for (i = 0; i < a->totalslices; i++) { /* loop over slices */ 960 if (i == a->totalslices - 1 && (A->rmap->n % SLICE_HEIGHT)) { /* if last slice has padding rows */ 961 for (j = a->sliidx[i], row = 0; j < a->sliidx[i + 1]; j++, row = (row + 1) % SLICE_HEIGHT) { 962 if (row < (A->rmap->n % SLICE_HEIGHT)) a->val[j] *= l[SLICE_HEIGHT * i + row]; 963 } 964 } else { 965 for (j = a->sliidx[i], row = 0; j < a->sliidx[i + 1]; j++, row = (row + 1) % SLICE_HEIGHT) { a->val[j] *= l[SLICE_HEIGHT * i + row]; } 966 } 967 } 968 PetscCall(VecRestoreArrayRead(ll, &l)); 969 PetscCall(PetscLogFlops(a->nz)); 970 } 971 if (rr) { 972 PetscCall(VecGetLocalSize(rr, &n)); 973 PetscCheck(n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vector wrong length"); 974 PetscCall(VecGetArrayRead(rr, &r)); 975 for (i = 0; i < a->totalslices; i++) { /* loop over slices */ 976 if (i == a->totalslices - 1 && (A->rmap->n % SLICE_HEIGHT)) { /* if last slice has padding rows */ 977 for (j = a->sliidx[i], row = 0; j < a->sliidx[i + 1]; j++, row = ((row + 1) % SLICE_HEIGHT)) { 978 if (row < (A->rmap->n % SLICE_HEIGHT)) a->val[j] *= r[a->colidx[j]]; 979 } 980 } else { 981 for (j = a->sliidx[i]; j < a->sliidx[i + 1]; j++) a->val[j] *= r[a->colidx[j]]; 982 } 983 } 984 PetscCall(VecRestoreArrayRead(rr, &r)); 985 PetscCall(PetscLogFlops(a->nz)); 986 } 987 PetscCall(MatSeqSELLInvalidateDiagonal(A)); 988 #if defined(PETSC_HAVE_CUDA) 989 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED) A->offloadmask = PETSC_OFFLOAD_CPU; 990 #endif 991 PetscFunctionReturn(PETSC_SUCCESS); 992 } 993 994 PetscErrorCode MatGetValues_SeqSELL(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) 995 { 996 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 997 PetscInt *cp, i, k, low, high, t, row, col, l; 998 PetscInt shift; 999 MatScalar *vp; 1000 1001 PetscFunctionBegin; 1002 for (k = 0; k < m; k++) { /* loop over requested rows */ 1003 row = im[k]; 1004 if (row < 0) continue; 1005 PetscCheck(row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->n - 1); 1006 shift = a->sliidx[row / SLICE_HEIGHT] + (row % SLICE_HEIGHT); /* starting index of the row */ 1007 cp = a->colidx + shift; /* pointer to the row */ 1008 vp = a->val + shift; /* pointer to the row */ 1009 for (l = 0; l < n; l++) { /* loop over requested columns */ 1010 col = in[l]; 1011 if (col < 0) continue; 1012 PetscCheck(col < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: row %" PetscInt_FMT " max %" PetscInt_FMT, col, A->cmap->n - 1); 1013 high = a->rlen[row]; 1014 low = 0; /* assume unsorted */ 1015 while (high - low > 5) { 1016 t = (low + high) / 2; 1017 if (*(cp + SLICE_HEIGHT * t) > col) high = t; 1018 else low = t; 1019 } 1020 for (i = low; i < high; i++) { 1021 if (*(cp + SLICE_HEIGHT * i) > col) break; 1022 if (*(cp + SLICE_HEIGHT * i) == col) { 1023 *v++ = *(vp + SLICE_HEIGHT * i); 1024 goto finished; 1025 } 1026 } 1027 *v++ = 0.0; 1028 finished:; 1029 } 1030 } 1031 PetscFunctionReturn(PETSC_SUCCESS); 1032 } 1033 1034 PetscErrorCode MatView_SeqSELL_ASCII(Mat A, PetscViewer viewer) 1035 { 1036 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 1037 PetscInt i, j, m = A->rmap->n, shift; 1038 const char *name; 1039 PetscViewerFormat format; 1040 1041 PetscFunctionBegin; 1042 PetscCall(PetscViewerGetFormat(viewer, &format)); 1043 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1044 PetscInt nofinalvalue = 0; 1045 /* 1046 if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) { 1047 nofinalvalue = 1; 1048 } 1049 */ 1050 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1051 PetscCall(PetscViewerASCIIPrintf(viewer, "%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n", m, A->cmap->n)); 1052 PetscCall(PetscViewerASCIIPrintf(viewer, "%% Nonzeros = %" PetscInt_FMT " \n", a->nz)); 1053 #if defined(PETSC_USE_COMPLEX) 1054 PetscCall(PetscViewerASCIIPrintf(viewer, "zzz = zeros(%" PetscInt_FMT ",4);\n", a->nz + nofinalvalue)); 1055 #else 1056 PetscCall(PetscViewerASCIIPrintf(viewer, "zzz = zeros(%" PetscInt_FMT ",3);\n", a->nz + nofinalvalue)); 1057 #endif 1058 PetscCall(PetscViewerASCIIPrintf(viewer, "zzz = [\n")); 1059 1060 for (i = 0; i < m; i++) { 1061 shift = a->sliidx[i / SLICE_HEIGHT] + i % SLICE_HEIGHT; 1062 for (j = 0; j < a->rlen[i]; j++) { 1063 #if defined(PETSC_USE_COMPLEX) 1064 PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT " %" PetscInt_FMT " %18.16e %18.16e\n", i + 1, a->colidx[shift + SLICE_HEIGHT * j] + 1, (double)PetscRealPart(a->val[shift + SLICE_HEIGHT * j]), (double)PetscImaginaryPart(a->val[shift + SLICE_HEIGHT * j]))); 1065 #else 1066 PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT " %" PetscInt_FMT " %18.16e\n", i + 1, a->colidx[shift + SLICE_HEIGHT * j] + 1, (double)a->val[shift + SLICE_HEIGHT * j])); 1067 #endif 1068 } 1069 } 1070 /* 1071 if (nofinalvalue) { 1072 #if defined(PETSC_USE_COMPLEX) 1073 PetscCall(PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e %18.16e\n",m,A->cmap->n,0.,0.)); 1074 #else 1075 PetscCall(PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e\n",m,A->cmap->n,0.0)); 1076 #endif 1077 } 1078 */ 1079 PetscCall(PetscObjectGetName((PetscObject)A, &name)); 1080 PetscCall(PetscViewerASCIIPrintf(viewer, "];\n %s = spconvert(zzz);\n", name)); 1081 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1082 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 1083 PetscFunctionReturn(PETSC_SUCCESS); 1084 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1085 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1086 for (i = 0; i < m; i++) { 1087 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i)); 1088 shift = a->sliidx[i / SLICE_HEIGHT] + i % SLICE_HEIGHT; 1089 for (j = 0; j < a->rlen[i]; j++) { 1090 #if defined(PETSC_USE_COMPLEX) 1091 if (PetscImaginaryPart(a->val[shift + SLICE_HEIGHT * j]) > 0.0 && PetscRealPart(a->val[shift + SLICE_HEIGHT * j]) != 0.0) { 1092 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i)", a->colidx[shift + SLICE_HEIGHT * j], (double)PetscRealPart(a->val[shift + SLICE_HEIGHT * j]), (double)PetscImaginaryPart(a->val[shift + SLICE_HEIGHT * j]))); 1093 } else if (PetscImaginaryPart(a->val[shift + SLICE_HEIGHT * j]) < 0.0 && PetscRealPart(a->val[shift + SLICE_HEIGHT * j]) != 0.0) { 1094 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i)", a->colidx[shift + SLICE_HEIGHT * j], (double)PetscRealPart(a->val[shift + SLICE_HEIGHT * j]), (double)-PetscImaginaryPart(a->val[shift + SLICE_HEIGHT * j]))); 1095 } else if (PetscRealPart(a->val[shift + SLICE_HEIGHT * j]) != 0.0) { 1096 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[shift + SLICE_HEIGHT * j], (double)PetscRealPart(a->val[shift + SLICE_HEIGHT * j]))); 1097 } 1098 #else 1099 if (a->val[shift + SLICE_HEIGHT * j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[shift + SLICE_HEIGHT * j], (double)a->val[shift + SLICE_HEIGHT * j])); 1100 #endif 1101 } 1102 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1103 } 1104 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1105 } else if (format == PETSC_VIEWER_ASCII_DENSE) { 1106 PetscInt cnt = 0, jcnt; 1107 PetscScalar value; 1108 #if defined(PETSC_USE_COMPLEX) 1109 PetscBool realonly = PETSC_TRUE; 1110 for (i = 0; i < a->sliidx[a->totalslices]; i++) { 1111 if (PetscImaginaryPart(a->val[i]) != 0.0) { 1112 realonly = PETSC_FALSE; 1113 break; 1114 } 1115 } 1116 #endif 1117 1118 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1119 for (i = 0; i < m; i++) { 1120 jcnt = 0; 1121 shift = a->sliidx[i / SLICE_HEIGHT] + i % SLICE_HEIGHT; 1122 for (j = 0; j < A->cmap->n; j++) { 1123 if (jcnt < a->rlen[i] && j == a->colidx[shift + SLICE_HEIGHT * j]) { 1124 value = a->val[cnt++]; 1125 jcnt++; 1126 } else { 1127 value = 0.0; 1128 } 1129 #if defined(PETSC_USE_COMPLEX) 1130 if (realonly) { 1131 PetscCall(PetscViewerASCIIPrintf(viewer, " %7.5e ", (double)PetscRealPart(value))); 1132 } else { 1133 PetscCall(PetscViewerASCIIPrintf(viewer, " %7.5e+%7.5e i ", (double)PetscRealPart(value), (double)PetscImaginaryPart(value))); 1134 } 1135 #else 1136 PetscCall(PetscViewerASCIIPrintf(viewer, " %7.5e ", (double)value)); 1137 #endif 1138 } 1139 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1140 } 1141 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1142 } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) { 1143 PetscInt fshift = 1; 1144 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1145 #if defined(PETSC_USE_COMPLEX) 1146 PetscCall(PetscViewerASCIIPrintf(viewer, "%%%%MatrixMarket matrix coordinate complex general\n")); 1147 #else 1148 PetscCall(PetscViewerASCIIPrintf(viewer, "%%%%MatrixMarket matrix coordinate real general\n")); 1149 #endif 1150 PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", m, A->cmap->n, a->nz)); 1151 for (i = 0; i < m; i++) { 1152 shift = a->sliidx[i / SLICE_HEIGHT] + i % SLICE_HEIGHT; 1153 for (j = 0; j < a->rlen[i]; j++) { 1154 #if defined(PETSC_USE_COMPLEX) 1155 PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT " %" PetscInt_FMT " %g %g\n", i + fshift, a->colidx[shift + SLICE_HEIGHT * j] + fshift, (double)PetscRealPart(a->val[shift + SLICE_HEIGHT * j]), (double)PetscImaginaryPart(a->val[shift + SLICE_HEIGHT * j]))); 1156 #else 1157 PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT " %" PetscInt_FMT " %g\n", i + fshift, a->colidx[shift + SLICE_HEIGHT * j] + fshift, (double)a->val[shift + SLICE_HEIGHT * j])); 1158 #endif 1159 } 1160 } 1161 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1162 } else if (format == PETSC_VIEWER_NATIVE) { 1163 for (i = 0; i < a->totalslices; i++) { /* loop over slices */ 1164 PetscInt row; 1165 PetscCall(PetscViewerASCIIPrintf(viewer, "slice %" PetscInt_FMT ": %" PetscInt_FMT " %" PetscInt_FMT "\n", i, a->sliidx[i], a->sliidx[i + 1])); 1166 for (j = a->sliidx[i], row = 0; j < a->sliidx[i + 1]; j++, row = (row + 1) % SLICE_HEIGHT) { 1167 #if defined(PETSC_USE_COMPLEX) 1168 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1169 PetscCall(PetscViewerASCIIPrintf(viewer, " %" PetscInt_FMT " %" PetscInt_FMT " %g + %g i\n", SLICE_HEIGHT * i + row, a->colidx[j], (double)PetscRealPart(a->val[j]), (double)PetscImaginaryPart(a->val[j]))); 1170 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1171 PetscCall(PetscViewerASCIIPrintf(viewer, " %" PetscInt_FMT " %" PetscInt_FMT " %g - %g i\n", SLICE_HEIGHT * i + row, a->colidx[j], (double)PetscRealPart(a->val[j]), -(double)PetscImaginaryPart(a->val[j]))); 1172 } else { 1173 PetscCall(PetscViewerASCIIPrintf(viewer, " %" PetscInt_FMT " %" PetscInt_FMT " %g\n", SLICE_HEIGHT * i + row, a->colidx[j], (double)PetscRealPart(a->val[j]))); 1174 } 1175 #else 1176 PetscCall(PetscViewerASCIIPrintf(viewer, " %" PetscInt_FMT " %" PetscInt_FMT " %g\n", SLICE_HEIGHT * i + row, a->colidx[j], (double)a->val[j])); 1177 #endif 1178 } 1179 } 1180 } else { 1181 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1182 if (A->factortype) { 1183 for (i = 0; i < m; i++) { 1184 shift = a->sliidx[i / SLICE_HEIGHT] + i % SLICE_HEIGHT; 1185 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i)); 1186 /* L part */ 1187 for (j = shift; j < a->diag[i]; j += SLICE_HEIGHT) { 1188 #if defined(PETSC_USE_COMPLEX) 1189 if (PetscImaginaryPart(a->val[shift + SLICE_HEIGHT * j]) > 0.0) { 1190 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i)", a->colidx[j], (double)PetscRealPart(a->val[j]), (double)PetscImaginaryPart(a->val[j]))); 1191 } else if (PetscImaginaryPart(a->val[shift + SLICE_HEIGHT * j]) < 0.0) { 1192 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i)", a->colidx[j], (double)PetscRealPart(a->val[j]), (double)(-PetscImaginaryPart(a->val[j])))); 1193 } else { 1194 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[j], (double)PetscRealPart(a->val[j]))); 1195 } 1196 #else 1197 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[j], (double)a->val[j])); 1198 #endif 1199 } 1200 /* diagonal */ 1201 j = a->diag[i]; 1202 #if defined(PETSC_USE_COMPLEX) 1203 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1204 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i)", a->colidx[j], (double)PetscRealPart(1.0 / a->val[j]), (double)PetscImaginaryPart(1.0 / a->val[j]))); 1205 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1206 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i)", a->colidx[j], (double)PetscRealPart(1.0 / a->val[j]), (double)(-PetscImaginaryPart(1.0 / a->val[j])))); 1207 } else { 1208 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[j], (double)PetscRealPart(1.0 / a->val[j]))); 1209 } 1210 #else 1211 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[j], (double)(1.0 / a->val[j]))); 1212 #endif 1213 1214 /* U part */ 1215 for (j = a->diag[i] + 1; j < shift + SLICE_HEIGHT * a->rlen[i]; j += SLICE_HEIGHT) { 1216 #if defined(PETSC_USE_COMPLEX) 1217 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1218 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i)", a->colidx[j], (double)PetscRealPart(a->val[j]), (double)PetscImaginaryPart(a->val[j]))); 1219 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1220 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i)", a->colidx[j], (double)PetscRealPart(a->val[j]), (double)(-PetscImaginaryPart(a->val[j])))); 1221 } else { 1222 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[j], (double)PetscRealPart(a->val[j]))); 1223 } 1224 #else 1225 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[j], (double)a->val[j])); 1226 #endif 1227 } 1228 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1229 } 1230 } else { 1231 for (i = 0; i < m; i++) { 1232 shift = a->sliidx[i / SLICE_HEIGHT] + i % SLICE_HEIGHT; 1233 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i)); 1234 for (j = 0; j < a->rlen[i]; j++) { 1235 #if defined(PETSC_USE_COMPLEX) 1236 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1237 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i)", a->colidx[shift + SLICE_HEIGHT * j], (double)PetscRealPart(a->val[shift + SLICE_HEIGHT * j]), (double)PetscImaginaryPart(a->val[shift + SLICE_HEIGHT * j]))); 1238 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1239 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i)", a->colidx[shift + SLICE_HEIGHT * j], (double)PetscRealPart(a->val[shift + SLICE_HEIGHT * j]), (double)-PetscImaginaryPart(a->val[shift + SLICE_HEIGHT * j]))); 1240 } else { 1241 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[shift + SLICE_HEIGHT * j], (double)PetscRealPart(a->val[shift + SLICE_HEIGHT * j]))); 1242 } 1243 #else 1244 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[shift + SLICE_HEIGHT * j], (double)a->val[shift + SLICE_HEIGHT * j])); 1245 #endif 1246 } 1247 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1248 } 1249 } 1250 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1251 } 1252 PetscCall(PetscViewerFlush(viewer)); 1253 PetscFunctionReturn(PETSC_SUCCESS); 1254 } 1255 1256 #include <petscdraw.h> 1257 PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw, void *Aa) 1258 { 1259 Mat A = (Mat)Aa; 1260 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 1261 PetscInt i, j, m = A->rmap->n, shift; 1262 int color; 1263 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1264 PetscViewer viewer; 1265 PetscViewerFormat format; 1266 1267 PetscFunctionBegin; 1268 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1269 PetscCall(PetscViewerGetFormat(viewer, &format)); 1270 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1271 1272 /* loop over matrix elements drawing boxes */ 1273 1274 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1275 PetscDrawCollectiveBegin(draw); 1276 /* Blue for negative, Cyan for zero and Red for positive */ 1277 color = PETSC_DRAW_BLUE; 1278 for (i = 0; i < m; i++) { 1279 shift = a->sliidx[i / SLICE_HEIGHT] + i % SLICE_HEIGHT; /* starting index of the row i */ 1280 y_l = m - i - 1.0; 1281 y_r = y_l + 1.0; 1282 for (j = 0; j < a->rlen[i]; j++) { 1283 x_l = a->colidx[shift + SLICE_HEIGHT * j]; 1284 x_r = x_l + 1.0; 1285 if (PetscRealPart(a->val[shift + SLICE_HEIGHT * j]) >= 0.) continue; 1286 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1287 } 1288 } 1289 color = PETSC_DRAW_CYAN; 1290 for (i = 0; i < m; i++) { 1291 shift = a->sliidx[i / SLICE_HEIGHT] + i % SLICE_HEIGHT; 1292 y_l = m - i - 1.0; 1293 y_r = y_l + 1.0; 1294 for (j = 0; j < a->rlen[i]; j++) { 1295 x_l = a->colidx[shift + SLICE_HEIGHT * j]; 1296 x_r = x_l + 1.0; 1297 if (a->val[shift + SLICE_HEIGHT * j] != 0.) continue; 1298 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1299 } 1300 } 1301 color = PETSC_DRAW_RED; 1302 for (i = 0; i < m; i++) { 1303 shift = a->sliidx[i / SLICE_HEIGHT] + i % SLICE_HEIGHT; 1304 y_l = m - i - 1.0; 1305 y_r = y_l + 1.0; 1306 for (j = 0; j < a->rlen[i]; j++) { 1307 x_l = a->colidx[shift + SLICE_HEIGHT * j]; 1308 x_r = x_l + 1.0; 1309 if (PetscRealPart(a->val[shift + SLICE_HEIGHT * j]) <= 0.) continue; 1310 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1311 } 1312 } 1313 PetscDrawCollectiveEnd(draw); 1314 } else { 1315 /* use contour shading to indicate magnitude of values */ 1316 /* first determine max of all nonzero values */ 1317 PetscReal minv = 0.0, maxv = 0.0; 1318 PetscInt count = 0; 1319 PetscDraw popup; 1320 for (i = 0; i < a->sliidx[a->totalslices]; i++) { 1321 if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]); 1322 } 1323 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1324 PetscCall(PetscDrawGetPopup(draw, &popup)); 1325 PetscCall(PetscDrawScalePopup(popup, minv, maxv)); 1326 1327 PetscDrawCollectiveBegin(draw); 1328 for (i = 0; i < m; i++) { 1329 shift = a->sliidx[i / SLICE_HEIGHT] + i % SLICE_HEIGHT; 1330 y_l = m - i - 1.0; 1331 y_r = y_l + 1.0; 1332 for (j = 0; j < a->rlen[i]; j++) { 1333 x_l = a->colidx[shift + SLICE_HEIGHT * j]; 1334 x_r = x_l + 1.0; 1335 color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]), minv, maxv); 1336 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1337 count++; 1338 } 1339 } 1340 PetscDrawCollectiveEnd(draw); 1341 } 1342 PetscFunctionReturn(PETSC_SUCCESS); 1343 } 1344 1345 #include <petscdraw.h> 1346 PetscErrorCode MatView_SeqSELL_Draw(Mat A, PetscViewer viewer) 1347 { 1348 PetscDraw draw; 1349 PetscReal xr, yr, xl, yl, h, w; 1350 PetscBool isnull; 1351 1352 PetscFunctionBegin; 1353 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1354 PetscCall(PetscDrawIsNull(draw, &isnull)); 1355 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1356 1357 xr = A->cmap->n; 1358 yr = A->rmap->n; 1359 h = yr / 10.0; 1360 w = xr / 10.0; 1361 xr += w; 1362 yr += h; 1363 xl = -w; 1364 yl = -h; 1365 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 1366 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 1367 PetscCall(PetscDrawZoom(draw, MatView_SeqSELL_Draw_Zoom, A)); 1368 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 1369 PetscCall(PetscDrawSave(draw)); 1370 PetscFunctionReturn(PETSC_SUCCESS); 1371 } 1372 1373 PetscErrorCode MatView_SeqSELL(Mat A, PetscViewer viewer) 1374 { 1375 PetscBool iascii, isbinary, isdraw; 1376 1377 PetscFunctionBegin; 1378 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1379 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1380 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1381 if (iascii) { 1382 PetscCall(MatView_SeqSELL_ASCII(A, viewer)); 1383 } else if (isbinary) { 1384 /* PetscCall(MatView_SeqSELL_Binary(A,viewer)); */ 1385 } else if (isdraw) PetscCall(MatView_SeqSELL_Draw(A, viewer)); 1386 PetscFunctionReturn(PETSC_SUCCESS); 1387 } 1388 1389 PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A, MatAssemblyType mode) 1390 { 1391 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 1392 PetscInt i, shift, row_in_slice, row, nrow, *cp, lastcol, j, k; 1393 MatScalar *vp; 1394 1395 PetscFunctionBegin; 1396 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 1397 /* To do: compress out the unused elements */ 1398 PetscCall(MatMarkDiagonal_SeqSELL(A)); 1399 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " allocated %" PetscInt_FMT " used (%" PetscInt_FMT " nonzeros+%" PetscInt_FMT " paddedzeros)\n", A->rmap->n, A->cmap->n, a->maxallocmat, a->sliidx[a->totalslices], a->nz, a->sliidx[a->totalslices] - a->nz)); 1400 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n", a->reallocs)); 1401 PetscCall(PetscInfo(A, "Maximum nonzeros in any row is %" PetscInt_FMT "\n", a->rlenmax)); 1402 a->nonzerorowcnt = 0; 1403 /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */ 1404 for (i = 0; i < a->totalslices; ++i) { 1405 shift = a->sliidx[i]; /* starting index of the slice */ 1406 cp = a->colidx + shift; /* pointer to the column indices of the slice */ 1407 vp = a->val + shift; /* pointer to the nonzero values of the slice */ 1408 for (row_in_slice = 0; row_in_slice < SLICE_HEIGHT; ++row_in_slice) { /* loop over rows in the slice */ 1409 row = SLICE_HEIGHT * i + row_in_slice; 1410 nrow = a->rlen[row]; /* number of nonzeros in row */ 1411 /* 1412 Search for the nearest nonzero. Normally setting the index to zero may cause extra communication. 1413 But if the entire slice are empty, it is fine to use 0 since the index will not be loaded. 1414 */ 1415 lastcol = 0; 1416 if (nrow > 0) { /* nonempty row */ 1417 a->nonzerorowcnt++; 1418 lastcol = cp[SLICE_HEIGHT * (nrow - 1) + row_in_slice]; /* use the index from the last nonzero at current row */ 1419 } else if (!row_in_slice) { /* first row of the correct slice is empty */ 1420 for (j = 1; j < SLICE_HEIGHT; j++) { 1421 if (a->rlen[SLICE_HEIGHT * i + j]) { 1422 lastcol = cp[j]; 1423 break; 1424 } 1425 } 1426 } else { 1427 if (a->sliidx[i + 1] != shift) lastcol = cp[row_in_slice - 1]; /* use the index from the previous row */ 1428 } 1429 1430 for (k = nrow; k < (a->sliidx[i + 1] - shift) / SLICE_HEIGHT; ++k) { 1431 cp[SLICE_HEIGHT * k + row_in_slice] = lastcol; 1432 vp[SLICE_HEIGHT * k + row_in_slice] = (MatScalar)0; 1433 } 1434 } 1435 } 1436 1437 A->info.mallocs += a->reallocs; 1438 a->reallocs = 0; 1439 1440 PetscCall(MatSeqSELLInvalidateDiagonal(A)); 1441 PetscFunctionReturn(PETSC_SUCCESS); 1442 } 1443 1444 PetscErrorCode MatGetInfo_SeqSELL(Mat A, MatInfoType flag, MatInfo *info) 1445 { 1446 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 1447 1448 PetscFunctionBegin; 1449 info->block_size = 1.0; 1450 info->nz_allocated = a->maxallocmat; 1451 info->nz_used = a->sliidx[a->totalslices]; /* include padding zeros */ 1452 info->nz_unneeded = (a->maxallocmat - a->sliidx[a->totalslices]); 1453 info->assemblies = A->num_ass; 1454 info->mallocs = A->info.mallocs; 1455 info->memory = 0; /* REVIEW ME */ 1456 if (A->factortype) { 1457 info->fill_ratio_given = A->info.fill_ratio_given; 1458 info->fill_ratio_needed = A->info.fill_ratio_needed; 1459 info->factor_mallocs = A->info.factor_mallocs; 1460 } else { 1461 info->fill_ratio_given = 0; 1462 info->fill_ratio_needed = 0; 1463 info->factor_mallocs = 0; 1464 } 1465 PetscFunctionReturn(PETSC_SUCCESS); 1466 } 1467 1468 PetscErrorCode MatSetValues_SeqSELL(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 1469 { 1470 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 1471 PetscInt shift, i, k, l, low, high, t, ii, row, col, nrow; 1472 PetscInt *cp, nonew = a->nonew, lastcol = -1; 1473 MatScalar *vp, value; 1474 #if defined(PETSC_HAVE_CUDA) 1475 PetscBool inserted = PETSC_FALSE; 1476 #endif 1477 1478 PetscFunctionBegin; 1479 for (k = 0; k < m; k++) { /* loop over added rows */ 1480 row = im[k]; 1481 if (row < 0) continue; 1482 PetscCheck(row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->n - 1); 1483 shift = a->sliidx[row / SLICE_HEIGHT] + row % SLICE_HEIGHT; /* starting index of the row */ 1484 cp = a->colidx + shift; /* pointer to the row */ 1485 vp = a->val + shift; /* pointer to the row */ 1486 nrow = a->rlen[row]; 1487 low = 0; 1488 high = nrow; 1489 1490 for (l = 0; l < n; l++) { /* loop over added columns */ 1491 col = in[l]; 1492 if (col < 0) continue; 1493 PetscCheck(col < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Col too large: row %" PetscInt_FMT " max %" PetscInt_FMT, col, A->cmap->n - 1); 1494 if (a->roworiented) { 1495 value = v[l + k * n]; 1496 } else { 1497 value = v[k + l * m]; 1498 } 1499 if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue; 1500 1501 /* search in this row for the specified column, i indicates the column to be set */ 1502 if (col <= lastcol) low = 0; 1503 else high = nrow; 1504 lastcol = col; 1505 while (high - low > 5) { 1506 t = (low + high) / 2; 1507 if (*(cp + SLICE_HEIGHT * t) > col) high = t; 1508 else low = t; 1509 } 1510 for (i = low; i < high; i++) { 1511 if (*(cp + SLICE_HEIGHT * i) > col) break; 1512 if (*(cp + SLICE_HEIGHT * i) == col) { 1513 if (is == ADD_VALUES) *(vp + SLICE_HEIGHT * i) += value; 1514 else *(vp + SLICE_HEIGHT * i) = value; 1515 #if defined(PETSC_HAVE_CUDA) 1516 inserted = PETSC_TRUE; 1517 #endif 1518 low = i + 1; 1519 goto noinsert; 1520 } 1521 } 1522 if (value == 0.0 && a->ignorezeroentries) goto noinsert; 1523 if (nonew == 1) goto noinsert; 1524 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 1525 /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */ 1526 MatSeqXSELLReallocateSELL(A, A->rmap->n, 1, nrow, a->sliidx, row / SLICE_HEIGHT, row, col, a->colidx, a->val, cp, vp, nonew, MatScalar); 1527 /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */ 1528 for (ii = nrow - 1; ii >= i; ii--) { 1529 *(cp + SLICE_HEIGHT * (ii + 1)) = *(cp + SLICE_HEIGHT * ii); 1530 *(vp + SLICE_HEIGHT * (ii + 1)) = *(vp + SLICE_HEIGHT * ii); 1531 } 1532 a->rlen[row]++; 1533 *(cp + SLICE_HEIGHT * i) = col; 1534 *(vp + SLICE_HEIGHT * i) = value; 1535 a->nz++; 1536 A->nonzerostate++; 1537 #if defined(PETSC_HAVE_CUDA) 1538 inserted = PETSC_TRUE; 1539 #endif 1540 low = i + 1; 1541 high++; 1542 nrow++; 1543 noinsert:; 1544 } 1545 a->rlen[row] = nrow; 1546 } 1547 #if defined(PETSC_HAVE_CUDA) 1548 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 1549 #endif 1550 PetscFunctionReturn(PETSC_SUCCESS); 1551 } 1552 1553 PetscErrorCode MatCopy_SeqSELL(Mat A, Mat B, MatStructure str) 1554 { 1555 PetscFunctionBegin; 1556 /* If the two matrices have the same copy implementation, use fast copy. */ 1557 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1558 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 1559 Mat_SeqSELL *b = (Mat_SeqSELL *)B->data; 1560 1561 PetscCheck(a->sliidx[a->totalslices] == b->sliidx[b->totalslices], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzeros in two matrices are different"); 1562 PetscCall(PetscArraycpy(b->val, a->val, a->sliidx[a->totalslices])); 1563 } else { 1564 PetscCall(MatCopy_Basic(A, B, str)); 1565 } 1566 PetscFunctionReturn(PETSC_SUCCESS); 1567 } 1568 1569 PetscErrorCode MatSetUp_SeqSELL(Mat A) 1570 { 1571 PetscFunctionBegin; 1572 PetscCall(MatSeqSELLSetPreallocation(A, PETSC_DEFAULT, NULL)); 1573 PetscFunctionReturn(PETSC_SUCCESS); 1574 } 1575 1576 PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A, PetscScalar *array[]) 1577 { 1578 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 1579 1580 PetscFunctionBegin; 1581 *array = a->val; 1582 PetscFunctionReturn(PETSC_SUCCESS); 1583 } 1584 1585 PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A, PetscScalar *array[]) 1586 { 1587 PetscFunctionBegin; 1588 PetscFunctionReturn(PETSC_SUCCESS); 1589 } 1590 1591 PetscErrorCode MatRealPart_SeqSELL(Mat A) 1592 { 1593 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 1594 PetscInt i; 1595 MatScalar *aval = a->val; 1596 1597 PetscFunctionBegin; 1598 for (i = 0; i < a->sliidx[a->totalslices]; i++) aval[i] = PetscRealPart(aval[i]); 1599 #if defined(PETSC_HAVE_CUDA) 1600 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED) A->offloadmask = PETSC_OFFLOAD_CPU; 1601 #endif 1602 PetscFunctionReturn(PETSC_SUCCESS); 1603 } 1604 1605 PetscErrorCode MatImaginaryPart_SeqSELL(Mat A) 1606 { 1607 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 1608 PetscInt i; 1609 MatScalar *aval = a->val; 1610 1611 PetscFunctionBegin; 1612 for (i = 0; i < a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]); 1613 PetscCall(MatSeqSELLInvalidateDiagonal(A)); 1614 #if defined(PETSC_HAVE_CUDA) 1615 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED) A->offloadmask = PETSC_OFFLOAD_CPU; 1616 #endif 1617 PetscFunctionReturn(PETSC_SUCCESS); 1618 } 1619 1620 PetscErrorCode MatScale_SeqSELL(Mat inA, PetscScalar alpha) 1621 { 1622 Mat_SeqSELL *a = (Mat_SeqSELL *)inA->data; 1623 MatScalar *aval = a->val; 1624 PetscScalar oalpha = alpha; 1625 PetscBLASInt one = 1, size; 1626 1627 PetscFunctionBegin; 1628 PetscCall(PetscBLASIntCast(a->sliidx[a->totalslices], &size)); 1629 PetscCallBLAS("BLASscal", BLASscal_(&size, &oalpha, aval, &one)); 1630 PetscCall(PetscLogFlops(a->nz)); 1631 PetscCall(MatSeqSELLInvalidateDiagonal(inA)); 1632 #if defined(PETSC_HAVE_CUDA) 1633 if (inA->offloadmask != PETSC_OFFLOAD_UNALLOCATED) inA->offloadmask = PETSC_OFFLOAD_CPU; 1634 #endif 1635 PetscFunctionReturn(PETSC_SUCCESS); 1636 } 1637 1638 PetscErrorCode MatShift_SeqSELL(Mat Y, PetscScalar a) 1639 { 1640 Mat_SeqSELL *y = (Mat_SeqSELL *)Y->data; 1641 1642 PetscFunctionBegin; 1643 if (!Y->preallocated || !y->nz) PetscCall(MatSeqSELLSetPreallocation(Y, 1, NULL)); 1644 PetscCall(MatShift_Basic(Y, a)); 1645 PetscFunctionReturn(PETSC_SUCCESS); 1646 } 1647 1648 PetscErrorCode MatSOR_SeqSELL(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1649 { 1650 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 1651 PetscScalar *x, sum, *t; 1652 const MatScalar *idiag = NULL, *mdiag; 1653 const PetscScalar *b, *xb; 1654 PetscInt n, m = A->rmap->n, i, j, shift; 1655 const PetscInt *diag; 1656 1657 PetscFunctionBegin; 1658 its = its * lits; 1659 1660 if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */ 1661 if (!a->idiagvalid) PetscCall(MatInvertDiagonal_SeqSELL(A, omega, fshift)); 1662 a->fshift = fshift; 1663 a->omega = omega; 1664 1665 diag = a->diag; 1666 t = a->ssor_work; 1667 idiag = a->idiag; 1668 mdiag = a->mdiag; 1669 1670 PetscCall(VecGetArray(xx, &x)); 1671 PetscCall(VecGetArrayRead(bb, &b)); 1672 /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */ 1673 PetscCheck(flag != SOR_APPLY_UPPER, PETSC_COMM_SELF, PETSC_ERR_SUP, "SOR_APPLY_UPPER is not implemented"); 1674 PetscCheck(flag != SOR_APPLY_LOWER, PETSC_COMM_SELF, PETSC_ERR_SUP, "SOR_APPLY_LOWER is not implemented"); 1675 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 1676 1677 if (flag & SOR_ZERO_INITIAL_GUESS) { 1678 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1679 for (i = 0; i < m; i++) { 1680 shift = a->sliidx[i / SLICE_HEIGHT] + i % SLICE_HEIGHT; /* starting index of the row i */ 1681 sum = b[i]; 1682 n = (diag[i] - shift) / SLICE_HEIGHT; 1683 for (j = 0; j < n; j++) sum -= a->val[shift + SLICE_HEIGHT * j] * x[a->colidx[shift + SLICE_HEIGHT * j]]; 1684 t[i] = sum; 1685 x[i] = sum * idiag[i]; 1686 } 1687 xb = t; 1688 PetscCall(PetscLogFlops(a->nz)); 1689 } else xb = b; 1690 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1691 for (i = m - 1; i >= 0; i--) { 1692 shift = a->sliidx[i / SLICE_HEIGHT] + i % SLICE_HEIGHT; /* starting index of the row i */ 1693 sum = xb[i]; 1694 n = a->rlen[i] - (diag[i] - shift) / SLICE_HEIGHT - 1; 1695 for (j = 1; j <= n; j++) sum -= a->val[diag[i] + SLICE_HEIGHT * j] * x[a->colidx[diag[i] + SLICE_HEIGHT * j]]; 1696 if (xb == b) { 1697 x[i] = sum * idiag[i]; 1698 } else { 1699 x[i] = (1. - omega) * x[i] + sum * idiag[i]; /* omega in idiag */ 1700 } 1701 } 1702 PetscCall(PetscLogFlops(a->nz)); /* assumes 1/2 in upper */ 1703 } 1704 its--; 1705 } 1706 while (its--) { 1707 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1708 for (i = 0; i < m; i++) { 1709 /* lower */ 1710 shift = a->sliidx[i / SLICE_HEIGHT] + i % SLICE_HEIGHT; /* starting index of the row i */ 1711 sum = b[i]; 1712 n = (diag[i] - shift) / SLICE_HEIGHT; 1713 for (j = 0; j < n; j++) sum -= a->val[shift + SLICE_HEIGHT * j] * x[a->colidx[shift + SLICE_HEIGHT * j]]; 1714 t[i] = sum; /* save application of the lower-triangular part */ 1715 /* upper */ 1716 n = a->rlen[i] - (diag[i] - shift) / SLICE_HEIGHT - 1; 1717 for (j = 1; j <= n; j++) sum -= a->val[diag[i] + SLICE_HEIGHT * j] * x[a->colidx[diag[i] + SLICE_HEIGHT * j]]; 1718 x[i] = (1. - omega) * x[i] + sum * idiag[i]; /* omega in idiag */ 1719 } 1720 xb = t; 1721 PetscCall(PetscLogFlops(2.0 * a->nz)); 1722 } else xb = b; 1723 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1724 for (i = m - 1; i >= 0; i--) { 1725 shift = a->sliidx[i / SLICE_HEIGHT] + i % SLICE_HEIGHT; /* starting index of the row i */ 1726 sum = xb[i]; 1727 if (xb == b) { 1728 /* whole matrix (no checkpointing available) */ 1729 n = a->rlen[i]; 1730 for (j = 0; j < n; j++) sum -= a->val[shift + SLICE_HEIGHT * j] * x[a->colidx[shift + SLICE_HEIGHT * j]]; 1731 x[i] = (1. - omega) * x[i] + (sum + mdiag[i] * x[i]) * idiag[i]; 1732 } else { /* lower-triangular part has been saved, so only apply upper-triangular */ 1733 n = a->rlen[i] - (diag[i] - shift) / SLICE_HEIGHT - 1; 1734 for (j = 1; j <= n; j++) sum -= a->val[diag[i] + SLICE_HEIGHT * j] * x[a->colidx[diag[i] + SLICE_HEIGHT * j]]; 1735 x[i] = (1. - omega) * x[i] + sum * idiag[i]; /* omega in idiag */ 1736 } 1737 } 1738 if (xb == b) { 1739 PetscCall(PetscLogFlops(2.0 * a->nz)); 1740 } else { 1741 PetscCall(PetscLogFlops(a->nz)); /* assumes 1/2 in upper */ 1742 } 1743 } 1744 } 1745 PetscCall(VecRestoreArray(xx, &x)); 1746 PetscCall(VecRestoreArrayRead(bb, &b)); 1747 PetscFunctionReturn(PETSC_SUCCESS); 1748 } 1749 1750 static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL, 1751 MatGetRow_SeqSELL, 1752 MatRestoreRow_SeqSELL, 1753 MatMult_SeqSELL, 1754 /* 4*/ MatMultAdd_SeqSELL, 1755 MatMultTranspose_SeqSELL, 1756 MatMultTransposeAdd_SeqSELL, 1757 NULL, 1758 NULL, 1759 NULL, 1760 /* 10*/ NULL, 1761 NULL, 1762 NULL, 1763 MatSOR_SeqSELL, 1764 NULL, 1765 /* 15*/ MatGetInfo_SeqSELL, 1766 MatEqual_SeqSELL, 1767 MatGetDiagonal_SeqSELL, 1768 MatDiagonalScale_SeqSELL, 1769 NULL, 1770 /* 20*/ NULL, 1771 MatAssemblyEnd_SeqSELL, 1772 MatSetOption_SeqSELL, 1773 MatZeroEntries_SeqSELL, 1774 /* 24*/ NULL, 1775 NULL, 1776 NULL, 1777 NULL, 1778 NULL, 1779 /* 29*/ MatSetUp_SeqSELL, 1780 NULL, 1781 NULL, 1782 NULL, 1783 NULL, 1784 /* 34*/ MatDuplicate_SeqSELL, 1785 NULL, 1786 NULL, 1787 NULL, 1788 NULL, 1789 /* 39*/ NULL, 1790 NULL, 1791 NULL, 1792 MatGetValues_SeqSELL, 1793 MatCopy_SeqSELL, 1794 /* 44*/ NULL, 1795 MatScale_SeqSELL, 1796 MatShift_SeqSELL, 1797 NULL, 1798 NULL, 1799 /* 49*/ NULL, 1800 NULL, 1801 NULL, 1802 NULL, 1803 NULL, 1804 /* 54*/ MatFDColoringCreate_SeqXAIJ, 1805 NULL, 1806 NULL, 1807 NULL, 1808 NULL, 1809 /* 59*/ NULL, 1810 MatDestroy_SeqSELL, 1811 MatView_SeqSELL, 1812 NULL, 1813 NULL, 1814 /* 64*/ NULL, 1815 NULL, 1816 NULL, 1817 NULL, 1818 NULL, 1819 /* 69*/ NULL, 1820 NULL, 1821 NULL, 1822 NULL, 1823 NULL, 1824 /* 74*/ NULL, 1825 MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */ 1826 NULL, 1827 NULL, 1828 NULL, 1829 /* 79*/ NULL, 1830 NULL, 1831 NULL, 1832 NULL, 1833 NULL, 1834 /* 84*/ NULL, 1835 NULL, 1836 NULL, 1837 NULL, 1838 NULL, 1839 /* 89*/ NULL, 1840 NULL, 1841 NULL, 1842 NULL, 1843 NULL, 1844 /* 94*/ NULL, 1845 NULL, 1846 NULL, 1847 NULL, 1848 NULL, 1849 /* 99*/ NULL, 1850 NULL, 1851 NULL, 1852 MatConjugate_SeqSELL, 1853 NULL, 1854 /*104*/ NULL, 1855 NULL, 1856 NULL, 1857 NULL, 1858 NULL, 1859 /*109*/ NULL, 1860 NULL, 1861 NULL, 1862 NULL, 1863 MatMissingDiagonal_SeqSELL, 1864 /*114*/ NULL, 1865 NULL, 1866 NULL, 1867 NULL, 1868 NULL, 1869 /*119*/ NULL, 1870 NULL, 1871 NULL, 1872 NULL, 1873 NULL, 1874 /*124*/ NULL, 1875 NULL, 1876 NULL, 1877 NULL, 1878 NULL, 1879 /*129*/ NULL, 1880 NULL, 1881 NULL, 1882 NULL, 1883 NULL, 1884 /*134*/ NULL, 1885 NULL, 1886 NULL, 1887 NULL, 1888 NULL, 1889 /*139*/ NULL, 1890 NULL, 1891 NULL, 1892 MatFDColoringSetUp_SeqXAIJ, 1893 NULL, 1894 /*144*/ NULL, 1895 NULL, 1896 NULL, 1897 NULL, 1898 NULL, 1899 NULL, 1900 /*150*/ NULL, 1901 NULL}; 1902 1903 PetscErrorCode MatStoreValues_SeqSELL(Mat mat) 1904 { 1905 Mat_SeqSELL *a = (Mat_SeqSELL *)mat->data; 1906 1907 PetscFunctionBegin; 1908 PetscCheck(a->nonew, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1909 1910 /* allocate space for values if not already there */ 1911 if (!a->saved_values) PetscCall(PetscMalloc1(a->sliidx[a->totalslices] + 1, &a->saved_values)); 1912 1913 /* copy values over */ 1914 PetscCall(PetscArraycpy(a->saved_values, a->val, a->sliidx[a->totalslices])); 1915 PetscFunctionReturn(PETSC_SUCCESS); 1916 } 1917 1918 PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat) 1919 { 1920 Mat_SeqSELL *a = (Mat_SeqSELL *)mat->data; 1921 1922 PetscFunctionBegin; 1923 PetscCheck(a->nonew, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1924 PetscCheck(a->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first"); 1925 PetscCall(PetscArraycpy(a->val, a->saved_values, a->sliidx[a->totalslices])); 1926 PetscFunctionReturn(PETSC_SUCCESS); 1927 } 1928 1929 /*@C 1930 MatSeqSELLRestoreArray - returns access to the array where the data for a `MATSEQSELL` matrix is stored obtained by `MatSeqSELLGetArray()` 1931 1932 Not Collective 1933 1934 Input Parameters: 1935 + mat - a `MATSEQSELL` matrix 1936 - array - pointer to the data 1937 1938 Level: intermediate 1939 1940 .seealso: `Mat`, `MATSEQSELL`, `MatSeqSELLGetArray()`, `MatSeqSELLRestoreArrayF90()` 1941 @*/ 1942 PetscErrorCode MatSeqSELLRestoreArray(Mat A, PetscScalar **array) 1943 { 1944 PetscFunctionBegin; 1945 PetscUseMethod(A, "MatSeqSELLRestoreArray_C", (Mat, PetscScalar **), (A, array)); 1946 PetscFunctionReturn(PETSC_SUCCESS); 1947 } 1948 1949 #if defined(PETSC_HAVE_CUDA) 1950 PETSC_EXTERN PetscErrorCode MatConvert_SeqSELL_SeqSELLCUDA(Mat); 1951 #endif 1952 1953 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B) 1954 { 1955 Mat_SeqSELL *b; 1956 PetscMPIInt size; 1957 1958 PetscFunctionBegin; 1959 PetscCall(PetscCitationsRegister(citation, &cited)); 1960 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 1961 PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Comm must be of size 1"); 1962 1963 PetscCall(PetscNew(&b)); 1964 1965 B->data = (void *)b; 1966 1967 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 1968 1969 b->row = NULL; 1970 b->col = NULL; 1971 b->icol = NULL; 1972 b->reallocs = 0; 1973 b->ignorezeroentries = PETSC_FALSE; 1974 b->roworiented = PETSC_TRUE; 1975 b->nonew = 0; 1976 b->diag = NULL; 1977 b->solve_work = NULL; 1978 B->spptr = NULL; 1979 b->saved_values = NULL; 1980 b->idiag = NULL; 1981 b->mdiag = NULL; 1982 b->ssor_work = NULL; 1983 b->omega = 1.0; 1984 b->fshift = 0.0; 1985 b->idiagvalid = PETSC_FALSE; 1986 b->keepnonzeropattern = PETSC_FALSE; 1987 1988 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQSELL)); 1989 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqSELLGetArray_C", MatSeqSELLGetArray_SeqSELL)); 1990 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqSELLRestoreArray_C", MatSeqSELLRestoreArray_SeqSELL)); 1991 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqSELL)); 1992 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqSELL)); 1993 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqSELLSetPreallocation_C", MatSeqSELLSetPreallocation_SeqSELL)); 1994 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_SeqSELL_SeqAIJ_C", MatConvert_SeqSELL_SeqAIJ)); 1995 #if defined(PETSC_HAVE_CUDA) 1996 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_SeqSELL_SeqSELLCUDA_C", MatConvert_SeqSELL_SeqSELLCUDA)); 1997 #endif 1998 PetscFunctionReturn(PETSC_SUCCESS); 1999 } 2000 2001 /* 2002 Given a matrix generated with MatGetFactor() duplicates all the information in A into B 2003 */ 2004 PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) 2005 { 2006 Mat_SeqSELL *c = (Mat_SeqSELL *)C->data, *a = (Mat_SeqSELL *)A->data; 2007 PetscInt i, m = A->rmap->n; 2008 PetscInt totalslices = a->totalslices; 2009 2010 PetscFunctionBegin; 2011 C->factortype = A->factortype; 2012 c->row = NULL; 2013 c->col = NULL; 2014 c->icol = NULL; 2015 c->reallocs = 0; 2016 C->assembled = PETSC_TRUE; 2017 2018 PetscCall(PetscLayoutReference(A->rmap, &C->rmap)); 2019 PetscCall(PetscLayoutReference(A->cmap, &C->cmap)); 2020 2021 PetscCall(PetscMalloc1(SLICE_HEIGHT * totalslices, &c->rlen)); 2022 PetscCall(PetscMalloc1(totalslices + 1, &c->sliidx)); 2023 2024 for (i = 0; i < m; i++) c->rlen[i] = a->rlen[i]; 2025 for (i = 0; i < totalslices + 1; i++) c->sliidx[i] = a->sliidx[i]; 2026 2027 /* allocate the matrix space */ 2028 if (mallocmatspace) { 2029 PetscCall(PetscMalloc2(a->maxallocmat, &c->val, a->maxallocmat, &c->colidx)); 2030 2031 c->singlemalloc = PETSC_TRUE; 2032 2033 if (m > 0) { 2034 PetscCall(PetscArraycpy(c->colidx, a->colidx, a->maxallocmat)); 2035 if (cpvalues == MAT_COPY_VALUES) { 2036 PetscCall(PetscArraycpy(c->val, a->val, a->maxallocmat)); 2037 } else { 2038 PetscCall(PetscArrayzero(c->val, a->maxallocmat)); 2039 } 2040 } 2041 } 2042 2043 c->ignorezeroentries = a->ignorezeroentries; 2044 c->roworiented = a->roworiented; 2045 c->nonew = a->nonew; 2046 if (a->diag) { 2047 PetscCall(PetscMalloc1(m, &c->diag)); 2048 for (i = 0; i < m; i++) c->diag[i] = a->diag[i]; 2049 } else c->diag = NULL; 2050 2051 c->solve_work = NULL; 2052 c->saved_values = NULL; 2053 c->idiag = NULL; 2054 c->ssor_work = NULL; 2055 c->keepnonzeropattern = a->keepnonzeropattern; 2056 c->free_val = PETSC_TRUE; 2057 c->free_colidx = PETSC_TRUE; 2058 2059 c->maxallocmat = a->maxallocmat; 2060 c->maxallocrow = a->maxallocrow; 2061 c->rlenmax = a->rlenmax; 2062 c->nz = a->nz; 2063 C->preallocated = PETSC_TRUE; 2064 2065 c->nonzerorowcnt = a->nonzerorowcnt; 2066 C->nonzerostate = A->nonzerostate; 2067 2068 PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist)); 2069 PetscFunctionReturn(PETSC_SUCCESS); 2070 } 2071 2072 PetscErrorCode MatDuplicate_SeqSELL(Mat A, MatDuplicateOption cpvalues, Mat *B) 2073 { 2074 PetscFunctionBegin; 2075 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 2076 PetscCall(MatSetSizes(*B, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 2077 if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) PetscCall(MatSetBlockSizesFromMats(*B, A, A)); 2078 PetscCall(MatSetType(*B, ((PetscObject)A)->type_name)); 2079 PetscCall(MatDuplicateNoCreate_SeqSELL(*B, A, cpvalues, PETSC_TRUE)); 2080 PetscFunctionReturn(PETSC_SUCCESS); 2081 } 2082 2083 /*MC 2084 MATSEQSELL - MATSEQSELL = "seqsell" - A matrix type to be used for sequential sparse matrices, 2085 based on the sliced Ellpack format 2086 2087 Options Database Key: 2088 . -mat_type seqsell - sets the matrix type to "`MATSEQELL` during a call to `MatSetFromOptions()` 2089 2090 Level: beginner 2091 2092 .seealso: `Mat`, `MatCreateSeqSell()`, `MATSELL`, `MATMPISELL`, `MATSEQAIJ`, `MATAIJ`, `MATMPIAIJ` 2093 M*/ 2094 2095 /*MC 2096 MATSELL - MATSELL = "sell" - A matrix type to be used for sparse matrices. 2097 2098 This matrix type is identical to `MATSEQSELL` when constructed with a single process communicator, 2099 and `MATMPISELL` otherwise. As a result, for single process communicators, 2100 `MatSeqSELLSetPreallocation()` is supported, and similarly `MatMPISELLSetPreallocation()` is supported 2101 for communicators controlling multiple processes. It is recommended that you call both of 2102 the above preallocation routines for simplicity. 2103 2104 Options Database Key: 2105 . -mat_type sell - sets the matrix type to "sell" during a call to MatSetFromOptions() 2106 2107 Level: beginner 2108 2109 Notes: 2110 This format is only supported for real scalars, double precision, and 32 bit indices (the defaults). 2111 2112 It can provide better performance on Intel and AMD processes with AVX2 or AVX512 support for matrices that have a similar number of 2113 non-zeros in contiguous groups of rows. However if the computation is memory bandwidth limited it may not provide much improvement. 2114 2115 Developer Notes: 2116 On Intel (and AMD) systems some of the matrix operations use SIMD (AVX) instructions to achieve higher performance. 2117 2118 The sparse matrix format is as follows. For simplicity we assume a slice size of 2, it is actually 8 2119 .vb 2120 (2 0 3 4) 2121 Consider the matrix A = (5 0 6 0) 2122 (0 0 7 8) 2123 (0 0 9 9) 2124 2125 symbolically the Ellpack format can be written as 2126 2127 (2 3 4 |) (0 2 3 |) 2128 v = (5 6 0 |) colidx = (0 2 2 |) 2129 -------- --------- 2130 (7 8 |) (2 3 |) 2131 (9 9 |) (2 3 |) 2132 2133 The data for 2 contiguous rows of the matrix are stored together (in column-major format) (with any left-over rows handled as a special case). 2134 Any of the rows in a slice fewer columns than the rest of the slice (row 1 above) are padded with a previous valid column in their "extra" colidx[] locations and 2135 zeros in their "extra" v locations so that the matrix operations do not need special code to handle different length rows within the 2 rows in a slice. 2136 2137 The one-dimensional representation of v used in the code is (2 5 3 6 4 0 7 9 8 9) and for colidx is (0 0 2 2 3 2 2 2 3 3) 2138 2139 .ve 2140 2141 See MatMult_SeqSELL() for how this format is used with the SIMD operations to achieve high performance. 2142 2143 References: 2144 . * - Hong Zhang, Richard T. Mills, Karl Rupp, and Barry F. Smith, Vectorized Parallel Sparse Matrix-Vector Multiplication in {PETSc} Using {AVX-512}, 2145 Proceedings of the 47th International Conference on Parallel Processing, 2018. 2146 2147 .seealso: `Mat`, `MatCreateSeqSELL()`, `MatCreateSeqAIJ()`, `MatCreateSell()`, `MATSEQSELL`, `MATMPISELL`, `MATSEQAIJ`, `MATMPIAIJ`, `MATAIJ` 2148 M*/ 2149 2150 /*@C 2151 MatCreateSeqSELL - Creates a sparse matrix in `MATSEQSELL` format. 2152 2153 Collective 2154 2155 Input Parameters: 2156 + comm - MPI communicator, set to `PETSC_COMM_SELF` 2157 . m - number of rows 2158 . n - number of columns 2159 . rlenmax - maximum number of nonzeros in a row, ignored if `rlen` is provided 2160 - rlen - array containing the number of nonzeros in the various rows (possibly different for each row) or NULL 2161 2162 Output Parameter: 2163 . A - the matrix 2164 2165 Level: intermediate 2166 2167 Notes: 2168 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 2169 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2170 [MatXXXXSetPreallocation() is, for example, `MatSeqSELLSetPreallocation()`] 2171 2172 Specify the preallocated storage with either `rlenmax` or `rlen` (not both). 2173 Set `rlenmax` = `PETSC_DEFAULT` and `rlen` = `NULL` for PETSc to control dynamic memory 2174 allocation. 2175 2176 .seealso: `Mat`, `MATSEQSELL`, `MatCreate()`, `MatCreateSELL()`, `MatSetValues()`, `MatSeqSELLSetPreallocation()`, `MATSELL`, `MATSEQSELL`, `MATMPISELL` 2177 @*/ 2178 PetscErrorCode MatCreateSeqSELL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt rlenmax, const PetscInt rlen[], Mat *A) 2179 { 2180 PetscFunctionBegin; 2181 PetscCall(MatCreate(comm, A)); 2182 PetscCall(MatSetSizes(*A, m, n, m, n)); 2183 PetscCall(MatSetType(*A, MATSEQSELL)); 2184 PetscCall(MatSeqSELLSetPreallocation_SeqSELL(*A, rlenmax, rlen)); 2185 PetscFunctionReturn(PETSC_SUCCESS); 2186 } 2187 2188 PetscErrorCode MatEqual_SeqSELL(Mat A, Mat B, PetscBool *flg) 2189 { 2190 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data, *b = (Mat_SeqSELL *)B->data; 2191 PetscInt totalslices = a->totalslices; 2192 2193 PetscFunctionBegin; 2194 /* If the matrix dimensions are not equal,or no of nonzeros */ 2195 if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) || (a->nz != b->nz) || (a->rlenmax != b->rlenmax)) { 2196 *flg = PETSC_FALSE; 2197 PetscFunctionReturn(PETSC_SUCCESS); 2198 } 2199 /* if the a->colidx are the same */ 2200 PetscCall(PetscArraycmp(a->colidx, b->colidx, a->sliidx[totalslices], flg)); 2201 if (!*flg) PetscFunctionReturn(PETSC_SUCCESS); 2202 /* if a->val are the same */ 2203 PetscCall(PetscArraycmp(a->val, b->val, a->sliidx[totalslices], flg)); 2204 PetscFunctionReturn(PETSC_SUCCESS); 2205 } 2206 2207 PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A) 2208 { 2209 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 2210 2211 PetscFunctionBegin; 2212 a->idiagvalid = PETSC_FALSE; 2213 PetscFunctionReturn(PETSC_SUCCESS); 2214 } 2215 2216 PetscErrorCode MatConjugate_SeqSELL(Mat A) 2217 { 2218 #if defined(PETSC_USE_COMPLEX) 2219 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 2220 PetscInt i; 2221 PetscScalar *val = a->val; 2222 2223 PetscFunctionBegin; 2224 for (i = 0; i < a->sliidx[a->totalslices]; i++) { val[i] = PetscConj(val[i]); } 2225 #if defined(PETSC_HAVE_CUDA) 2226 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED) A->offloadmask = PETSC_OFFLOAD_CPU; 2227 #endif 2228 #else 2229 PetscFunctionBegin; 2230 #endif 2231 PetscFunctionReturn(PETSC_SUCCESS); 2232 } 2233