1 2 /* 3 Defines the basic matrix operations for the SELL matrix storage format. 4 */ 5 #include <../src/mat/impls/sell/seq/sell.h> /*I "petscmat.h" I*/ 6 #include <petscblaslapack.h> 7 #include <petsc/private/kernels/blocktranspose.h> 8 #if defined(PETSC_HAVE_IMMINTRIN_H) 9 #include <immintrin.h> 10 11 #if !defined(_MM_SCALE_8) 12 #define _MM_SCALE_8 8 13 #endif 14 15 #if defined(__AVX512F__) 16 /* these do not work 17 vec_idx = _mm512_loadunpackhi_epi32(vec_idx,acolidx); 18 vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval); 19 */ 20 #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 21 /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \ 22 vec_idx = _mm256_load_si256((__m256i const*)acolidx); \ 23 vec_vals = _mm512_load_pd(aval); \ 24 vec_x = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \ 25 vec_y = _mm512_fmadd_pd(vec_x,vec_vals,vec_y) 26 #elif defined(__AVX2__) 27 #define AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 28 vec_vals = _mm256_load_pd(aval); \ 29 vec_idx = _mm_load_si128((__m128i const*)acolidx); /* SSE2 */ \ 30 vec_x = _mm256_i32gather_pd(x,vec_idx,_MM_SCALE_8); \ 31 vec_y = _mm256_fmadd_pd(vec_x,vec_vals,vec_y) 32 #endif 33 #endif /* PETSC_HAVE_IMMINTRIN_H */ 34 35 /*@C 36 MatSeqSELLSetPreallocation - For good matrix assembly performance 37 the user should preallocate the matrix storage by setting the parameter nz 38 (or the array nnz). By setting these parameters accurately, performance 39 during matrix assembly can be increased significantly. 40 41 Collective on MPI_Comm 42 43 Input Parameters: 44 + B - The matrix 45 . nz - number of nonzeros per row (same for all rows) 46 - nnz - array containing the number of nonzeros in the various rows 47 (possibly different for each row) or NULL 48 49 Notes: 50 If nnz is given then nz is ignored. 51 52 Specify the preallocated storage with either nz or nnz (not both). 53 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 54 allocation. For large problems you MUST preallocate memory or you 55 will get TERRIBLE performance, see the users' manual chapter on matrices. 56 57 You can call MatGetInfo() to get information on how effective the preallocation was; 58 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 59 You can also run with the option -info and look for messages with the string 60 malloc in them to see if additional memory allocation was needed. 61 62 Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix 63 entries or columns indices. 64 65 The maximum number of nonzeos in any row should be as accuate as possible. 66 If it is underesitmated, you will get bad performance due to reallocation 67 (MatSeqXSELLReallocateSELL). 68 69 Level: intermediate 70 71 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo() 72 73 @*/ 74 PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[]) 75 { 76 PetscErrorCode ierr; 77 78 PetscFunctionBegin; 79 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 80 PetscValidType(B,1); 81 ierr = PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));CHKERRQ(ierr); 82 PetscFunctionReturn(0); 83 } 84 85 PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[]) 86 { 87 Mat_SeqSELL *b; 88 PetscInt i,j,totalslices; 89 PetscBool skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE; 90 PetscErrorCode ierr; 91 92 PetscFunctionBegin; 93 if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE; 94 if (maxallocrow == MAT_SKIP_ALLOCATION) { 95 skipallocation = PETSC_TRUE; 96 maxallocrow = 0; 97 } 98 99 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 100 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 101 102 /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */ 103 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5; 104 if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow); 105 if (rlen) { 106 for (i=0; i<B->rmap->n; i++) { 107 if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]); 108 if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n); 109 } 110 } 111 112 B->preallocated = PETSC_TRUE; 113 114 b = (Mat_SeqSELL*)B->data; 115 116 totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */ 117 b->totalslices = totalslices; 118 if (!skipallocation) { 119 if (B->rmap->n & 0x07) PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n); 120 121 if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */ 122 ierr = PetscMalloc1(totalslices+1,&b->sliidx);CHKERRQ(ierr); 123 ierr = PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 124 } 125 if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */ 126 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10; 127 else if (maxallocrow < 0) maxallocrow = 1; 128 for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow; 129 } else { 130 maxallocrow = 0; 131 b->sliidx[0] = 0; 132 for (i=1; i<totalslices; i++) { 133 b->sliidx[i] = 0; 134 for (j=0;j<8;j++) { 135 b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]); 136 } 137 maxallocrow = PetscMax(b->sliidx[i],maxallocrow); 138 b->sliidx[i] = b->sliidx[i-1] + 8*b->sliidx[i]; 139 } 140 /* last slice */ 141 b->sliidx[totalslices] = 0; 142 for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]); 143 maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow); 144 b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices]; 145 } 146 147 /* allocate space for val, colidx, rlen */ 148 /* FIXME: should B's old memory be unlogged? */ 149 ierr = MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);CHKERRQ(ierr); 150 /* FIXME: assuming an element of the bit array takes 8 bits */ 151 ierr = PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);CHKERRQ(ierr); 152 ierr = PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 153 /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */ 154 ierr = PetscCalloc1(8*totalslices,&b->rlen);CHKERRQ(ierr); 155 ierr = PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));CHKERRQ(ierr); 156 157 b->singlemalloc = PETSC_TRUE; 158 b->free_val = PETSC_TRUE; 159 b->free_colidx = PETSC_TRUE; 160 } else { 161 b->free_val = PETSC_FALSE; 162 b->free_colidx = PETSC_FALSE; 163 } 164 165 b->nz = 0; 166 b->maxallocrow = maxallocrow; 167 b->rlenmax = maxallocrow; 168 b->maxallocmat = b->sliidx[totalslices]; 169 B->info.nz_unneeded = (double)b->maxallocmat; 170 if (realalloc) { 171 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 172 } 173 PetscFunctionReturn(0); 174 } 175 176 PetscErrorCode MatGetRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 177 { 178 Mat_SeqSELL *a = (Mat_SeqSELL*)A->data; 179 PetscInt shift; 180 181 PetscFunctionBegin; 182 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 183 if (nz) *nz = a->rlen[row]; 184 shift = a->sliidx[row>>3]+(row&0x07); 185 if (!a->getrowcols) { 186 PetscErrorCode ierr; 187 188 ierr = PetscMalloc2(a->rlenmax,&a->getrowcols,a->rlenmax,&a->getrowvals);CHKERRQ(ierr); 189 } 190 if (idx) { 191 PetscInt j; 192 for (j=0; j<a->rlen[row]; j++) a->getrowcols[j] = a->colidx[shift+8*j]; 193 *idx = a->getrowcols; 194 } 195 if (v) { 196 PetscInt j; 197 for (j=0; j<a->rlen[row]; j++) a->getrowvals[j] = a->val[shift+8*j]; 198 *v = a->getrowvals; 199 } 200 PetscFunctionReturn(0); 201 } 202 203 PetscErrorCode MatRestoreRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 204 { 205 PetscFunctionBegin; 206 PetscFunctionReturn(0); 207 } 208 209 PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 210 { 211 Mat B; 212 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 213 PetscInt i; 214 PetscErrorCode ierr; 215 216 PetscFunctionBegin; 217 if (reuse == MAT_REUSE_MATRIX) { 218 B = *newmat; 219 ierr = MatZeroEntries(B);CHKERRQ(ierr); 220 } else { 221 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 222 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 223 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 224 ierr = MatSeqAIJSetPreallocation(B,0,a->rlen);CHKERRQ(ierr); 225 } 226 227 for (i=0; i<A->rmap->n; i++) { 228 PetscInt nz,*cols; 229 PetscScalar *vals; 230 231 ierr = MatGetRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 232 ierr = MatSetValues(B,1,&i,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 233 ierr = MatRestoreRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 234 } 235 236 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 237 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 238 B->rmap->bs = A->rmap->bs; 239 240 if (reuse == MAT_INPLACE_MATRIX) { 241 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 242 } else { 243 *newmat = B; 244 } 245 PetscFunctionReturn(0); 246 } 247 248 #include <../src/mat/impls/aij/seq/aij.h> 249 250 PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 251 { 252 Mat B; 253 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 254 PetscInt *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols; 255 const PetscInt *cols; 256 const PetscScalar *vals; 257 PetscErrorCode ierr; 258 259 PetscFunctionBegin; 260 if (A->rmap->bs > 1) { 261 ierr = MatConvert_Basic(A,newtype,reuse,newmat);CHKERRQ(ierr); 262 PetscFunctionReturn(0); 263 } 264 265 if (reuse == MAT_REUSE_MATRIX) { 266 B = *newmat; 267 } else { 268 /* Can we just use ilen? */ 269 ierr = PetscMalloc1(m,&rowlengths);CHKERRQ(ierr); 270 for (i=0; i<m; i++) { 271 rowlengths[i] = ai[i+1] - ai[i]; 272 } 273 274 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 275 ierr = MatSetSizes(B,m,n,m,n);CHKERRQ(ierr); 276 ierr = MatSetType(B,MATSEQSELL);CHKERRQ(ierr); 277 ierr = MatSeqSELLSetPreallocation(B,0,rowlengths);CHKERRQ(ierr); 278 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 279 } 280 281 for (row=0; row<m; row++) { 282 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 283 ierr = MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 284 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 285 } 286 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 287 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 288 B->rmap->bs = A->rmap->bs; 289 290 if (reuse == MAT_INPLACE_MATRIX) { 291 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 292 } else { 293 *newmat = B; 294 } 295 PetscFunctionReturn(0); 296 } 297 298 PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy) 299 { 300 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 301 PetscScalar *y; 302 const PetscScalar *x; 303 const MatScalar *aval=a->val; 304 PetscInt totalslices=a->totalslices; 305 const PetscInt *acolidx=a->colidx; 306 PetscInt i,j; 307 PetscErrorCode ierr; 308 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 309 __m512d vec_x,vec_y,vec_vals; 310 __m256i vec_idx; 311 __mmask8 mask; 312 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 313 __m256i vec_idx2,vec_idx3,vec_idx4; 314 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 315 __m128i vec_idx; 316 __m256d vec_x,vec_y,vec_y2,vec_vals; 317 MatScalar yval; 318 PetscInt r,rows_left,row,nnz_in_row; 319 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 320 __m128d vec_x_tmp; 321 __m256d vec_x,vec_y,vec_y2,vec_vals; 322 MatScalar yval; 323 PetscInt r,rows_left,row,nnz_in_row; 324 #else 325 PetscScalar sum[8]; 326 #endif 327 328 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 329 #pragma disjoint(*x,*y,*aval) 330 #endif 331 332 PetscFunctionBegin; 333 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 334 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 335 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 336 for (i=0; i<totalslices; i++) { /* loop over slices */ 337 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 338 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 339 340 vec_y = _mm512_setzero_pd(); 341 vec_y2 = _mm512_setzero_pd(); 342 vec_y3 = _mm512_setzero_pd(); 343 vec_y4 = _mm512_setzero_pd(); 344 345 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 346 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 347 case 3: 348 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 349 acolidx += 8; aval += 8; 350 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 351 acolidx += 8; aval += 8; 352 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 353 acolidx += 8; aval += 8; 354 j += 3; 355 break; 356 case 2: 357 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 358 acolidx += 8; aval += 8; 359 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 360 acolidx += 8; aval += 8; 361 j += 2; 362 break; 363 case 1: 364 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 365 acolidx += 8; aval += 8; 366 j += 1; 367 break; 368 } 369 #pragma novector 370 for (; j<(a->sliidx[i+1]>>3); j+=4) { 371 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 372 acolidx += 8; aval += 8; 373 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 374 acolidx += 8; aval += 8; 375 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 376 acolidx += 8; aval += 8; 377 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 378 acolidx += 8; aval += 8; 379 } 380 381 vec_y = _mm512_add_pd(vec_y,vec_y2); 382 vec_y = _mm512_add_pd(vec_y,vec_y3); 383 vec_y = _mm512_add_pd(vec_y,vec_y4); 384 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 385 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 386 _mm512_mask_store_pd(&y[8*i],mask,vec_y); 387 } else { 388 _mm512_store_pd(&y[8*i],vec_y); 389 } 390 } 391 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 392 for (i=0; i<totalslices; i++) { /* loop over full slices */ 393 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 394 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 395 396 /* last slice may have padding rows. Don't use vectorization. */ 397 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 398 rows_left = A->rmap->n - 8*i; 399 for (r=0; r<rows_left; ++r) { 400 yval = (MatScalar)0; 401 row = 8*i + r; 402 nnz_in_row = a->rlen[row]; 403 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 404 y[row] = yval; 405 } 406 break; 407 } 408 409 vec_y = _mm256_setzero_pd(); 410 vec_y2 = _mm256_setzero_pd(); 411 412 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 413 #pragma novector 414 #pragma unroll(2) 415 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 416 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 417 aval += 4; acolidx += 4; 418 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y2); 419 aval += 4; acolidx += 4; 420 } 421 422 _mm256_store_pd(y+i*8,vec_y); 423 _mm256_store_pd(y+i*8+4,vec_y2); 424 } 425 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 426 for (i=0; i<totalslices; i++) { /* loop over full slices */ 427 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 428 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 429 430 vec_y = _mm256_setzero_pd(); 431 vec_y2 = _mm256_setzero_pd(); 432 433 /* last slice may have padding rows. Don't use vectorization. */ 434 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 435 rows_left = A->rmap->n - 8*i; 436 for (r=0; r<rows_left; ++r) { 437 yval = (MatScalar)0; 438 row = 8*i + r; 439 nnz_in_row = a->rlen[row]; 440 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]]; 441 y[row] = yval; 442 } 443 break; 444 } 445 446 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 447 #pragma novector 448 #pragma unroll(2) 449 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 450 vec_vals = _mm256_loadu_pd(aval); 451 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 452 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 453 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 454 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 455 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 456 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 457 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 458 aval += 4; 459 460 vec_vals = _mm256_loadu_pd(aval); 461 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 462 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 463 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 464 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 465 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 466 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 467 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 468 aval += 4; 469 } 470 471 _mm256_storeu_pd(y + i*8, vec_y); 472 _mm256_storeu_pd(y + i*8 + 4, vec_y2); 473 } 474 #else 475 for (i=0; i<totalslices; i++) { /* loop over slices */ 476 for (j=0; j<8; j++) sum[j] = 0.0; 477 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 478 sum[0] += aval[j] * x[acolidx[j]]; 479 sum[1] += aval[j+1] * x[acolidx[j+1]]; 480 sum[2] += aval[j+2] * x[acolidx[j+2]]; 481 sum[3] += aval[j+3] * x[acolidx[j+3]]; 482 sum[4] += aval[j+4] * x[acolidx[j+4]]; 483 sum[5] += aval[j+5] * x[acolidx[j+5]]; 484 sum[6] += aval[j+6] * x[acolidx[j+6]]; 485 sum[7] += aval[j+7] * x[acolidx[j+7]]; 486 } 487 if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */ 488 for(j=0; j<(A->rmap->n & 0x07); j++) y[8*i+j] = sum[j]; 489 } else { 490 for(j=0; j<8; j++) y[8*i+j] = sum[j]; 491 } 492 } 493 #endif 494 495 ierr = PetscLogFlops(2.0*a->nz-a->nonzerorowcnt);CHKERRQ(ierr); /* theoretical minimal FLOPs */ 496 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 497 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 498 PetscFunctionReturn(0); 499 } 500 501 #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h> 502 PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz) 503 { 504 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 505 PetscScalar *y,*z; 506 const PetscScalar *x; 507 const MatScalar *aval=a->val; 508 PetscInt totalslices=a->totalslices; 509 const PetscInt *acolidx=a->colidx; 510 PetscInt i,j; 511 PetscErrorCode ierr; 512 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 513 __m512d vec_x,vec_y,vec_vals; 514 __m256i vec_idx; 515 __mmask8 mask; 516 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 517 __m256i vec_idx2,vec_idx3,vec_idx4; 518 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 519 __m128d vec_x_tmp; 520 __m256d vec_x,vec_y,vec_y2,vec_vals; 521 MatScalar yval; 522 PetscInt r,row,nnz_in_row; 523 #else 524 PetscScalar sum[8]; 525 #endif 526 527 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 528 #pragma disjoint(*x,*y,*aval) 529 #endif 530 531 PetscFunctionBegin; 532 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 533 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 534 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 535 for (i=0; i<totalslices; i++) { /* loop over slices */ 536 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 537 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 538 539 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 540 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 541 vec_y = _mm512_mask_load_pd(vec_y,mask,&y[8*i]); 542 } else { 543 vec_y = _mm512_load_pd(&y[8*i]); 544 } 545 vec_y2 = _mm512_setzero_pd(); 546 vec_y3 = _mm512_setzero_pd(); 547 vec_y4 = _mm512_setzero_pd(); 548 549 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 550 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 551 case 3: 552 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 553 acolidx += 8; aval += 8; 554 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 555 acolidx += 8; aval += 8; 556 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 557 acolidx += 8; aval += 8; 558 j += 3; 559 break; 560 case 2: 561 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 562 acolidx += 8; aval += 8; 563 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 564 acolidx += 8; aval += 8; 565 j += 2; 566 break; 567 case 1: 568 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 569 acolidx += 8; aval += 8; 570 j += 1; 571 break; 572 } 573 #pragma novector 574 for (; j<(a->sliidx[i+1]>>3); j+=4) { 575 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 576 acolidx += 8; aval += 8; 577 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 578 acolidx += 8; aval += 8; 579 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 580 acolidx += 8; aval += 8; 581 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 582 acolidx += 8; aval += 8; 583 } 584 585 vec_y = _mm512_add_pd(vec_y,vec_y2); 586 vec_y = _mm512_add_pd(vec_y,vec_y3); 587 vec_y = _mm512_add_pd(vec_y,vec_y4); 588 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 589 _mm512_mask_store_pd(&z[8*i],mask,vec_y); 590 } else { 591 _mm512_store_pd(&z[8*i],vec_y); 592 } 593 } 594 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 595 for (i=0; i<totalslices; i++) { /* loop over full slices */ 596 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 597 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 598 599 /* last slice may have padding rows. Don't use vectorization. */ 600 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 601 for (r=0; r<(A->rmap->n & 0x07); ++r) { 602 row = 8*i + r; 603 yval = (MatScalar)0.0; 604 nnz_in_row = a->rlen[row]; 605 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 606 z[row] = y[row] + yval; 607 } 608 break; 609 } 610 611 vec_y = _mm256_loadu_pd(y+8*i); 612 vec_y2 = _mm256_loadu_pd(y+8*i+4); 613 614 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 615 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 616 vec_vals = _mm256_loadu_pd(aval); 617 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 618 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 619 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 620 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 621 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 622 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 623 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 624 aval += 4; 625 626 vec_vals = _mm256_loadu_pd(aval); 627 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 628 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 629 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 630 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 631 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 632 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 633 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 634 aval += 4; 635 } 636 637 _mm256_storeu_pd(z+i*8,vec_y); 638 _mm256_storeu_pd(z+i*8+4,vec_y2); 639 } 640 #else 641 for (i=0; i<totalslices; i++) { /* loop over slices */ 642 for (j=0; j<8; j++) sum[j] = 0.0; 643 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 644 sum[0] += aval[j] * x[acolidx[j]]; 645 sum[1] += aval[j+1] * x[acolidx[j+1]]; 646 sum[2] += aval[j+2] * x[acolidx[j+2]]; 647 sum[3] += aval[j+3] * x[acolidx[j+3]]; 648 sum[4] += aval[j+4] * x[acolidx[j+4]]; 649 sum[5] += aval[j+5] * x[acolidx[j+5]]; 650 sum[6] += aval[j+6] * x[acolidx[j+6]]; 651 sum[7] += aval[j+7] * x[acolidx[j+7]]; 652 } 653 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 654 for (j=0; j<(A->rmap->n & 0x07); j++) z[8*i+j] = y[8*i+j] + sum[j]; 655 } else { 656 for (j=0; j<8; j++) z[8*i+j] = y[8*i+j] + sum[j]; 657 } 658 } 659 #endif 660 661 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 662 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 663 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy) 668 { 669 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 670 PetscScalar *y; 671 const PetscScalar *x; 672 const MatScalar *aval=a->val; 673 const PetscInt *acolidx=a->colidx; 674 PetscInt i,j,r,row,nnz_in_row,totalslices=a->totalslices; 675 PetscErrorCode ierr; 676 677 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 678 #pragma disjoint(*x,*y,*aval) 679 #endif 680 681 PetscFunctionBegin; 682 if (A->symmetric) { 683 ierr = MatMultAdd_SeqSELL(A,xx,zz,yy);CHKERRQ(ierr); 684 PetscFunctionReturn(0); 685 } 686 if (zz != yy) { ierr = VecCopy(zz,yy);CHKERRQ(ierr); } 687 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 688 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 689 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 690 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 691 for (r=0; r<(A->rmap->n & 0x07); ++r) { 692 row = 8*i + r; 693 nnz_in_row = a->rlen[row]; 694 for (j=0; j<nnz_in_row; ++j) y[acolidx[8*j+r]] += aval[8*j+r] * x[row]; 695 } 696 break; 697 } 698 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 699 y[acolidx[j]] += aval[j] * x[8*i]; 700 y[acolidx[j+1]] += aval[j+1] * x[8*i+1]; 701 y[acolidx[j+2]] += aval[j+2] * x[8*i+2]; 702 y[acolidx[j+3]] += aval[j+3] * x[8*i+3]; 703 y[acolidx[j+4]] += aval[j+4] * x[8*i+4]; 704 y[acolidx[j+5]] += aval[j+5] * x[8*i+5]; 705 y[acolidx[j+6]] += aval[j+6] * x[8*i+6]; 706 y[acolidx[j+7]] += aval[j+7] * x[8*i+7]; 707 } 708 } 709 ierr = PetscLogFlops(2.0*a->sliidx[a->totalslices]);CHKERRQ(ierr); 710 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 711 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy) 716 { 717 PetscErrorCode ierr; 718 719 PetscFunctionBegin; 720 if (A->symmetric) { 721 ierr = MatMult_SeqSELL(A,xx,yy);CHKERRQ(ierr); 722 } else { 723 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 724 ierr = MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);CHKERRQ(ierr); 725 } 726 PetscFunctionReturn(0); 727 } 728 729 /* 730 Checks for missing diagonals 731 */ 732 PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool *missing,PetscInt *d) 733 { 734 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 735 PetscInt *diag,i; 736 737 PetscFunctionBegin; 738 *missing = PETSC_FALSE; 739 if (A->rmap->n > 0 && !(a->colidx)) { 740 *missing = PETSC_TRUE; 741 if (d) *d = 0; 742 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 743 } else { 744 diag = a->diag; 745 for (i=0; i<A->rmap->n; i++) { 746 if (diag[i] == -1) { 747 *missing = PETSC_TRUE; 748 if (d) *d = i; 749 PetscInfo1(A,"Matrix is missing diagonal number %D\n",i); 750 break; 751 } 752 } 753 } 754 PetscFunctionReturn(0); 755 } 756 757 PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A) 758 { 759 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 760 PetscInt i,j,m=A->rmap->n,shift; 761 PetscErrorCode ierr; 762 763 PetscFunctionBegin; 764 if (!a->diag) { 765 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 766 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 767 a->free_diag = PETSC_TRUE; 768 } 769 for (i=0; i<m; i++) { /* loop over rows */ 770 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 771 a->diag[i] = -1; 772 for (j=0; j<a->rlen[i]; j++) { 773 if (a->colidx[shift+j*8] == i) { 774 a->diag[i] = shift+j*8; 775 break; 776 } 777 } 778 } 779 PetscFunctionReturn(0); 780 } 781 782 /* 783 Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways 784 */ 785 PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift) 786 { 787 Mat_SeqSELL *a=(Mat_SeqSELL*) A->data; 788 PetscInt i,*diag,m = A->rmap->n; 789 MatScalar *val = a->val; 790 PetscScalar *idiag,*mdiag; 791 PetscErrorCode ierr; 792 793 PetscFunctionBegin; 794 if (a->idiagvalid) PetscFunctionReturn(0); 795 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 796 diag = a->diag; 797 if (!a->idiag) { 798 ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr); 799 ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr); 800 val = a->val; 801 } 802 mdiag = a->mdiag; 803 idiag = a->idiag; 804 805 if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) { 806 for (i=0; i<m; i++) { 807 mdiag[i] = val[diag[i]]; 808 if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */ 809 if (PetscRealPart(fshift)) { 810 ierr = PetscInfo1(A,"Zero diagonal on row %D\n",i);CHKERRQ(ierr); 811 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 812 A->factorerror_zeropivot_value = 0.0; 813 A->factorerror_zeropivot_row = i; 814 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i); 815 } 816 idiag[i] = 1.0/val[diag[i]]; 817 } 818 ierr = PetscLogFlops(m);CHKERRQ(ierr); 819 } else { 820 for (i=0; i<m; i++) { 821 mdiag[i] = val[diag[i]]; 822 idiag[i] = omega/(fshift + val[diag[i]]); 823 } 824 ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr); 825 } 826 a->idiagvalid = PETSC_TRUE; 827 PetscFunctionReturn(0); 828 } 829 830 PetscErrorCode MatZeroEntries_SeqSELL(Mat A) 831 { 832 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 833 PetscErrorCode ierr; 834 835 PetscFunctionBegin; 836 ierr = PetscMemzero(a->val,(a->sliidx[a->totalslices])*sizeof(PetscScalar));CHKERRQ(ierr); 837 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 PetscErrorCode MatDestroy_SeqSELL(Mat A) 842 { 843 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 844 PetscErrorCode ierr; 845 846 PetscFunctionBegin; 847 #if defined(PETSC_USE_LOG) 848 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz); 849 #endif 850 ierr = MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);CHKERRQ(ierr); 851 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 852 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 853 ierr = PetscFree(a->diag);CHKERRQ(ierr); 854 ierr = PetscFree(a->ibdiag);CHKERRQ(ierr); 855 ierr = PetscFree(a->rlen);CHKERRQ(ierr); 856 ierr = PetscFree(a->sliidx);CHKERRQ(ierr); 857 ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr); 858 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 859 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 860 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 861 ierr = PetscFree2(a->getrowcols,a->getrowvals);CHKERRQ(ierr); 862 863 ierr = PetscFree(A->data);CHKERRQ(ierr); 864 865 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 866 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 867 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 868 #if defined(PETSC_HAVE_ELEMENTAL) 869 #endif 870 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg) 875 { 876 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 877 PetscErrorCode ierr; 878 879 PetscFunctionBegin; 880 switch (op) { 881 case MAT_ROW_ORIENTED: 882 a->roworiented = flg; 883 break; 884 case MAT_KEEP_NONZERO_PATTERN: 885 a->keepnonzeropattern = flg; 886 break; 887 case MAT_NEW_NONZERO_LOCATIONS: 888 a->nonew = (flg ? 0 : 1); 889 break; 890 case MAT_NEW_NONZERO_LOCATION_ERR: 891 a->nonew = (flg ? -1 : 0); 892 break; 893 case MAT_NEW_NONZERO_ALLOCATION_ERR: 894 a->nonew = (flg ? -2 : 0); 895 break; 896 case MAT_UNUSED_NONZERO_LOCATION_ERR: 897 a->nounused = (flg ? -1 : 0); 898 break; 899 case MAT_NEW_DIAGONALS: 900 case MAT_IGNORE_OFF_PROC_ENTRIES: 901 case MAT_USE_HASH_TABLE: 902 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 903 break; 904 case MAT_SPD: 905 case MAT_SYMMETRIC: 906 case MAT_STRUCTURALLY_SYMMETRIC: 907 case MAT_HERMITIAN: 908 case MAT_SYMMETRY_ETERNAL: 909 /* These options are handled directly by MatSetOption() */ 910 break; 911 default: 912 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 913 } 914 PetscFunctionReturn(0); 915 } 916 917 PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v) 918 { 919 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 920 PetscInt i,j,n,shift; 921 PetscScalar *x,zero=0.0; 922 PetscErrorCode ierr; 923 924 PetscFunctionBegin; 925 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 926 if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 927 928 if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) { 929 PetscInt *diag=a->diag; 930 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 931 for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]]; 932 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 933 PetscFunctionReturn(0); 934 } 935 936 ierr = VecSet(v,zero);CHKERRQ(ierr); 937 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 938 for (i=0; i<n; i++) { /* loop over rows */ 939 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 940 x[i] = 0; 941 for (j=0; j<a->rlen[i]; j++) { 942 if (a->colidx[shift+j*8] == i) { 943 x[i] = a->val[shift+j*8]; 944 break; 945 } 946 } 947 } 948 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 949 PetscFunctionReturn(0); 950 } 951 952 PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr) 953 { 954 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 955 const PetscScalar *l,*r; 956 PetscInt i,j,m,n,row; 957 PetscErrorCode ierr; 958 959 PetscFunctionBegin; 960 if (ll) { 961 /* The local size is used so that VecMPI can be passed to this routine 962 by MatDiagonalScale_MPISELL */ 963 ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr); 964 if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 965 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 966 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 967 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 968 a->val[j] *= l[8*i+row]; 969 } 970 } 971 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 972 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 973 } 974 if (rr) { 975 ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr); 976 if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 977 ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr); 978 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 979 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) { 980 a->val[j] *= r[a->colidx[j]]; 981 } 982 } 983 ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr); 984 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 985 } 986 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 987 PetscFunctionReturn(0); 988 } 989 990 extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 991 992 PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 993 { 994 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 995 PetscInt *cp,i,k,low,high,t,row,col,l; 996 PetscInt shift; 997 MatScalar *vp; 998 999 PetscFunctionBegin; 1000 for (k=0; k<m; k++) { /* loop over requested rows */ 1001 row = im[k]; 1002 if (row<0) continue; 1003 #if defined(PETSC_USE_DEBUG) 1004 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 1005 #endif 1006 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1007 cp = a->colidx+shift; /* pointer to the row */ 1008 vp = a->val+shift; /* pointer to the row */ 1009 for (l=0; l<n; l++) { /* loop over requested columns */ 1010 col = in[l]; 1011 if (col<0) continue; 1012 #if defined(PETSC_USE_DEBUG) 1013 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1); 1014 #endif 1015 high = a->rlen[row]; low = 0; /* assume unsorted */ 1016 while (high-low > 5) { 1017 t = (low+high)/2; 1018 if (*(cp+t*8) > col) high = t; 1019 else low = t; 1020 } 1021 for (i=low; i<high; i++) { 1022 if (*(cp+8*i) > col) break; 1023 if (*(cp+8*i) == col) { 1024 *v++ = *(vp+8*i); 1025 goto finished; 1026 } 1027 } 1028 *v++ = 0.0; 1029 finished:; 1030 } 1031 } 1032 PetscFunctionReturn(0); 1033 } 1034 1035 PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer) 1036 { 1037 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1038 PetscInt i,j,m=A->rmap->n,shift; 1039 const char *name; 1040 PetscViewerFormat format; 1041 PetscErrorCode ierr; 1042 1043 PetscFunctionBegin; 1044 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1045 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1046 PetscInt nofinalvalue = 0; 1047 /* 1048 if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) { 1049 nofinalvalue = 1; 1050 } 1051 */ 1052 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1053 ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);CHKERRQ(ierr); 1054 ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);CHKERRQ(ierr); 1055 #if defined(PETSC_USE_COMPLEX) 1056 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 1057 #else 1058 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 1059 #endif 1060 ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr); 1061 1062 for (i=0; i<m; i++) { 1063 shift = a->sliidx[i>>3]+(i&0x07); 1064 for (j=0; j<a->rlen[i]; j++) { 1065 #if defined(PETSC_USE_COMPLEX) 1066 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1067 #else 1068 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1069 #endif 1070 } 1071 } 1072 /* 1073 if (nofinalvalue) { 1074 #if defined(PETSC_USE_COMPLEX) 1075 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr); 1076 #else 1077 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr); 1078 #endif 1079 } 1080 */ 1081 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 1082 ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr); 1083 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1084 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 1085 PetscFunctionReturn(0); 1086 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1087 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1088 for (i=0; i<m; i++) { 1089 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1090 shift = a->sliidx[i>>3]+(i&0x07); 1091 for (j=0; j<a->rlen[i]; j++) { 1092 #if defined(PETSC_USE_COMPLEX) 1093 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1094 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1095 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1096 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1097 } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) { 1098 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1099 } 1100 #else 1101 if (a->val[shift+8*j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);} 1102 #endif 1103 } 1104 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1105 } 1106 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1107 } else if (format == PETSC_VIEWER_ASCII_DENSE) { 1108 PetscInt cnt=0,jcnt; 1109 PetscScalar value; 1110 #if defined(PETSC_USE_COMPLEX) 1111 PetscBool realonly=PETSC_TRUE; 1112 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1113 if (PetscImaginaryPart(a->val[i]) != 0.0) { 1114 realonly = PETSC_FALSE; 1115 break; 1116 } 1117 } 1118 #endif 1119 1120 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1121 for (i=0; i<m; i++) { 1122 jcnt = 0; 1123 shift = a->sliidx[i>>3]+(i&0x07); 1124 for (j=0; j<A->cmap->n; j++) { 1125 if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) { 1126 value = a->val[cnt++]; 1127 jcnt++; 1128 } else { 1129 value = 0.0; 1130 } 1131 #if defined(PETSC_USE_COMPLEX) 1132 if (realonly) { 1133 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr); 1134 } else { 1135 ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr); 1136 } 1137 #else 1138 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr); 1139 #endif 1140 } 1141 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1142 } 1143 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1144 } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) { 1145 PetscInt fshift=1; 1146 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1147 #if defined(PETSC_USE_COMPLEX) 1148 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr); 1149 #else 1150 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr); 1151 #endif 1152 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);CHKERRQ(ierr); 1153 for (i=0; i<m; i++) { 1154 shift = a->sliidx[i>>3]+(i&0x07); 1155 for (j=0; j<a->rlen[i]; j++) { 1156 #if defined(PETSC_USE_COMPLEX) 1157 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1158 #else 1159 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1160 #endif 1161 } 1162 } 1163 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1164 } else if (format == PETSC_VIEWER_NATIVE) { 1165 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 1166 PetscInt row; 1167 ierr = PetscViewerASCIIPrintf(viewer,"slice %D: %D %D\n",i,a->sliidx[i],a->sliidx[i+1]);CHKERRQ(ierr); 1168 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 1169 #if defined(PETSC_USE_COMPLEX) 1170 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1171 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g + %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1172 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1173 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g - %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),-(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1174 } else { 1175 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1176 } 1177 #else 1178 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g\n",8*i+row,a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1179 #endif 1180 } 1181 } 1182 } else { 1183 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1184 if (A->factortype) { 1185 for (i=0; i<m; i++) { 1186 shift = a->sliidx[i>>3]+(i&0x07); 1187 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1188 /* L part */ 1189 for (j=shift; j<a->diag[i]; j+=8) { 1190 #if defined(PETSC_USE_COMPLEX) 1191 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) { 1192 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1193 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) { 1194 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1195 } else { 1196 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1197 } 1198 #else 1199 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1200 #endif 1201 } 1202 /* diagonal */ 1203 j = a->diag[i]; 1204 #if defined(PETSC_USE_COMPLEX) 1205 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1206 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));CHKERRQ(ierr); 1207 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1208 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));CHKERRQ(ierr); 1209 } else { 1210 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));CHKERRQ(ierr); 1211 } 1212 #else 1213 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));CHKERRQ(ierr); 1214 #endif 1215 1216 /* U part */ 1217 for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) { 1218 #if defined(PETSC_USE_COMPLEX) 1219 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1220 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1221 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1222 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1223 } else { 1224 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1225 } 1226 #else 1227 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1228 #endif 1229 } 1230 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1231 } 1232 } else { 1233 for (i=0; i<m; i++) { 1234 shift = a->sliidx[i>>3]+(i&0x07); 1235 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1236 for (j=0; j<a->rlen[i]; j++) { 1237 #if defined(PETSC_USE_COMPLEX) 1238 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1239 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1240 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1241 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1242 } else { 1243 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1244 } 1245 #else 1246 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr); 1247 #endif 1248 } 1249 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1250 } 1251 } 1252 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1253 } 1254 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #include <petscdraw.h> 1259 PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa) 1260 { 1261 Mat A=(Mat)Aa; 1262 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1263 PetscInt i,j,m=A->rmap->n,shift; 1264 int color; 1265 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1266 PetscViewer viewer; 1267 PetscViewerFormat format; 1268 PetscErrorCode ierr; 1269 1270 PetscFunctionBegin; 1271 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1272 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1273 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1274 1275 /* loop over matrix elements drawing boxes */ 1276 1277 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1278 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1279 /* Blue for negative, Cyan for zero and Red for positive */ 1280 color = PETSC_DRAW_BLUE; 1281 for (i=0; i<m; i++) { 1282 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1283 y_l = m - i - 1.0; y_r = y_l + 1.0; 1284 for (j=0; j<a->rlen[i]; j++) { 1285 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1286 if (PetscRealPart(a->val[shift+8*j]) >= 0.) continue; 1287 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1288 } 1289 } 1290 color = PETSC_DRAW_CYAN; 1291 for (i=0; i<m; i++) { 1292 shift = a->sliidx[i>>3]+(i&0x07); 1293 y_l = m - i - 1.0; y_r = y_l + 1.0; 1294 for (j=0; j<a->rlen[i]; j++) { 1295 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1296 if (a->val[shift+8*j] != 0.) continue; 1297 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1298 } 1299 } 1300 color = PETSC_DRAW_RED; 1301 for (i=0; i<m; i++) { 1302 shift = a->sliidx[i>>3]+(i&0x07); 1303 y_l = m - i - 1.0; y_r = y_l + 1.0; 1304 for (j=0; j<a->rlen[i]; j++) { 1305 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1306 if (PetscRealPart(a->val[shift+8*j]) <= 0.) continue; 1307 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1308 } 1309 } 1310 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1311 } else { 1312 /* use contour shading to indicate magnitude of values */ 1313 /* first determine max of all nonzero values */ 1314 PetscReal minv=0.0,maxv=0.0; 1315 PetscInt count=0; 1316 PetscDraw popup; 1317 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1318 if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]); 1319 } 1320 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1321 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1322 ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr); 1323 1324 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1325 for (i=0; i<m; i++) { 1326 shift = a->sliidx[i>>3]+(i&0x07); 1327 y_l = m - i - 1.0; 1328 y_r = y_l + 1.0; 1329 for (j=0; j<a->rlen[i]; j++) { 1330 x_l = a->colidx[shift+j*8]; 1331 x_r = x_l + 1.0; 1332 color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv); 1333 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1334 count++; 1335 } 1336 } 1337 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1338 } 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #include <petscdraw.h> 1343 PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer) 1344 { 1345 PetscDraw draw; 1346 PetscReal xr,yr,xl,yl,h,w; 1347 PetscBool isnull; 1348 PetscErrorCode ierr; 1349 1350 PetscFunctionBegin; 1351 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1352 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1353 if (isnull) PetscFunctionReturn(0); 1354 1355 xr = A->cmap->n; yr = A->rmap->n; h = yr/10.0; w = xr/10.0; 1356 xr += w; yr += h; xl = -w; yl = -h; 1357 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1358 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1359 ierr = PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);CHKERRQ(ierr); 1360 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1361 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1362 PetscFunctionReturn(0); 1363 } 1364 1365 PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer) 1366 { 1367 PetscBool iascii,isbinary,isdraw; 1368 PetscErrorCode ierr; 1369 1370 PetscFunctionBegin; 1371 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1372 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1373 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1374 if (iascii) { 1375 ierr = MatView_SeqSELL_ASCII(A,viewer);CHKERRQ(ierr); 1376 } else if (isbinary) { 1377 /* ierr = MatView_SeqSELL_Binary(A,viewer);CHKERRQ(ierr); */ 1378 } else if (isdraw) { 1379 ierr = MatView_SeqSELL_Draw(A,viewer);CHKERRQ(ierr); 1380 } 1381 PetscFunctionReturn(0); 1382 } 1383 1384 PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode) 1385 { 1386 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1387 PetscInt i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k; 1388 MatScalar *vp; 1389 PetscErrorCode ierr; 1390 1391 PetscFunctionBegin; 1392 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1393 /* To do: compress out the unused elements */ 1394 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 1395 ierr = PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);CHKERRQ(ierr); 1396 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr); 1397 ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);CHKERRQ(ierr); 1398 /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */ 1399 for (i=0; i<a->totalslices; ++i) { 1400 shift = a->sliidx[i]; /* starting index of the slice */ 1401 cp = a->colidx+shift; /* pointer to the column indices of the slice */ 1402 vp = a->val+shift; /* pointer to the nonzero values of the slice */ 1403 for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */ 1404 row = 8*i + row_in_slice; 1405 nrow = a->rlen[row]; /* number of nonzeros in row */ 1406 /* 1407 Search for the nearest nonzero. Normally setting the index to zero may cause extra communication. 1408 But if the entire slice are empty, it is fine to use 0 since the index will not be loaded. 1409 */ 1410 lastcol = 0; 1411 if (nrow>0) { /* nonempty row */ 1412 lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */ 1413 } else if (!row_in_slice) { /* first row of the currect slice is empty */ 1414 for (j=1;j<8;j++) { 1415 if (a->rlen[8*i+j]) { 1416 lastcol = cp[j]; 1417 break; 1418 } 1419 } 1420 } else { 1421 if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */ 1422 } 1423 1424 for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) { 1425 cp[8*k+row_in_slice] = lastcol; 1426 vp[8*k+row_in_slice] = (MatScalar)0; 1427 } 1428 } 1429 } 1430 1431 A->info.mallocs += a->reallocs; 1432 a->reallocs = 0; 1433 1434 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info) 1439 { 1440 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1441 1442 PetscFunctionBegin; 1443 info->block_size = 1.0; 1444 info->nz_allocated = (double)a->maxallocmat; 1445 info->nz_used = (double)a->sliidx[a->totalslices]; /* include padding zeros */ 1446 info->nz_unneeded = (double)(a->maxallocmat-a->sliidx[a->totalslices]); 1447 info->assemblies = (double)A->num_ass; 1448 info->mallocs = (double)A->info.mallocs; 1449 info->memory = ((PetscObject)A)->mem; 1450 if (A->factortype) { 1451 info->fill_ratio_given = A->info.fill_ratio_given; 1452 info->fill_ratio_needed = A->info.fill_ratio_needed; 1453 info->factor_mallocs = A->info.factor_mallocs; 1454 } else { 1455 info->fill_ratio_given = 0; 1456 info->fill_ratio_needed = 0; 1457 info->factor_mallocs = 0; 1458 } 1459 PetscFunctionReturn(0); 1460 } 1461 1462 PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1463 { 1464 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1465 PetscInt shift,i,k,l,low,high,t,ii,row,col,nrow; 1466 PetscInt *cp,nonew=a->nonew,lastcol=-1; 1467 MatScalar *vp,value; 1468 PetscErrorCode ierr; 1469 1470 PetscFunctionBegin; 1471 for (k=0; k<m; k++) { /* loop over added rows */ 1472 row = im[k]; 1473 if (row < 0) continue; 1474 #if defined(PETSC_USE_DEBUG) 1475 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 1476 #endif 1477 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1478 cp = a->colidx+shift; /* pointer to the row */ 1479 vp = a->val+shift; /* pointer to the row */ 1480 nrow = a->rlen[row]; 1481 low = 0; 1482 high = nrow; 1483 1484 for (l=0; l<n; l++) { /* loop over added columns */ 1485 col = in[l]; 1486 if (col<0) continue; 1487 #if defined(PETSC_USE_DEBUG) 1488 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1); 1489 #endif 1490 if (a->roworiented) { 1491 value = v[l+k*n]; 1492 } else { 1493 value = v[k+l*m]; 1494 } 1495 if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue; 1496 1497 /* search in this row for the specified colmun, i indicates the column to be set */ 1498 if (col <= lastcol) low = 0; 1499 else high = nrow; 1500 lastcol = col; 1501 while (high-low > 5) { 1502 t = (low+high)/2; 1503 if (*(cp+t*8) > col) high = t; 1504 else low = t; 1505 } 1506 for (i=low; i<high; i++) { 1507 if (*(cp+i*8) > col) break; 1508 if (*(cp+i*8) == col) { 1509 if (is == ADD_VALUES) *(vp+i*8) += value; 1510 else *(vp+i*8) = value; 1511 low = i + 1; 1512 goto noinsert; 1513 } 1514 } 1515 if (value == 0.0 && a->ignorezeroentries) goto noinsert; 1516 if (nonew == 1) goto noinsert; 1517 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1518 /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */ 1519 MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar); 1520 /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */ 1521 for (ii=nrow-1; ii>=i; ii--) { 1522 *(cp+(ii+1)*8) = *(cp+ii*8); 1523 *(vp+(ii+1)*8) = *(vp+ii*8); 1524 } 1525 a->rlen[row]++; 1526 *(cp+i*8) = col; 1527 *(vp+i*8) = value; 1528 a->nz++; 1529 A->nonzerostate++; 1530 low = i+1; high++; nrow++; 1531 noinsert:; 1532 } 1533 a->rlen[row] = nrow; 1534 } 1535 PetscFunctionReturn(0); 1536 } 1537 1538 PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str) 1539 { 1540 PetscErrorCode ierr; 1541 1542 PetscFunctionBegin; 1543 /* If the two matrices have the same copy implementation, use fast copy. */ 1544 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1545 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1546 Mat_SeqSELL *b=(Mat_SeqSELL*)B->data; 1547 1548 if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1549 ierr = PetscMemcpy(b->val,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1550 } else { 1551 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1552 } 1553 PetscFunctionReturn(0); 1554 } 1555 1556 PetscErrorCode MatSetUp_SeqSELL(Mat A) 1557 { 1558 PetscErrorCode ierr; 1559 1560 PetscFunctionBegin; 1561 ierr = MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,0);CHKERRQ(ierr); 1562 PetscFunctionReturn(0); 1563 } 1564 1565 PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[]) 1566 { 1567 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1568 1569 PetscFunctionBegin; 1570 *array = a->val; 1571 PetscFunctionReturn(0); 1572 } 1573 1574 PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[]) 1575 { 1576 PetscFunctionBegin; 1577 PetscFunctionReturn(0); 1578 } 1579 1580 PetscErrorCode MatRealPart_SeqSELL(Mat A) 1581 { 1582 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1583 PetscInt i; 1584 MatScalar *aval=a->val; 1585 1586 PetscFunctionBegin; 1587 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]); 1588 PetscFunctionReturn(0); 1589 } 1590 1591 PetscErrorCode MatImaginaryPart_SeqSELL(Mat A) 1592 { 1593 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1594 PetscInt i; 1595 MatScalar *aval=a->val; 1596 PetscErrorCode ierr; 1597 1598 PetscFunctionBegin; 1599 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]); 1600 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1601 PetscFunctionReturn(0); 1602 } 1603 1604 PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha) 1605 { 1606 Mat_SeqSELL *a=(Mat_SeqSELL*)inA->data; 1607 MatScalar *aval=a->val; 1608 PetscScalar oalpha=alpha; 1609 PetscBLASInt one=1,size; 1610 PetscErrorCode ierr; 1611 1612 PetscFunctionBegin; 1613 ierr = PetscBLASIntCast(a->sliidx[a->totalslices],&size);CHKERRQ(ierr); 1614 PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one)); 1615 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1616 ierr = MatSeqSELLInvalidateDiagonal(inA);CHKERRQ(ierr); 1617 PetscFunctionReturn(0); 1618 } 1619 1620 PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a) 1621 { 1622 Mat_SeqSELL *y=(Mat_SeqSELL*)Y->data; 1623 PetscErrorCode ierr; 1624 1625 PetscFunctionBegin; 1626 if (!Y->preallocated || !y->nz) { 1627 ierr = MatSeqSELLSetPreallocation(Y,1,NULL);CHKERRQ(ierr); 1628 } 1629 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1630 PetscFunctionReturn(0); 1631 } 1632 1633 PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1634 { 1635 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1636 PetscScalar *x,sum,*t; 1637 const MatScalar *idiag=0,*mdiag; 1638 const PetscScalar *b,*xb; 1639 PetscInt n,m=A->rmap->n,i,j,shift; 1640 const PetscInt *diag; 1641 PetscErrorCode ierr; 1642 1643 PetscFunctionBegin; 1644 its = its*lits; 1645 1646 if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */ 1647 if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqSELL(A,omega,fshift);CHKERRQ(ierr);} 1648 a->fshift = fshift; 1649 a->omega = omega; 1650 1651 diag = a->diag; 1652 t = a->ssor_work; 1653 idiag = a->idiag; 1654 mdiag = a->mdiag; 1655 1656 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1657 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1658 /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */ 1659 if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented"); 1660 if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented"); 1661 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 1662 1663 if (flag & SOR_ZERO_INITIAL_GUESS) { 1664 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1665 for (i=0; i<m; i++) { 1666 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1667 sum = b[i]; 1668 n = (diag[i]-shift)/8; 1669 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1670 t[i] = sum; 1671 x[i] = sum*idiag[i]; 1672 } 1673 xb = t; 1674 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1675 } else xb = b; 1676 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1677 for (i=m-1; i>=0; i--) { 1678 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1679 sum = xb[i]; 1680 n = a->rlen[i]-(diag[i]-shift)/8-1; 1681 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1682 if (xb == b) { 1683 x[i] = sum*idiag[i]; 1684 } else { 1685 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1686 } 1687 } 1688 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1689 } 1690 its--; 1691 } 1692 while (its--) { 1693 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1694 for (i=0; i<m; i++) { 1695 /* lower */ 1696 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1697 sum = b[i]; 1698 n = (diag[i]-shift)/8; 1699 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1700 t[i] = sum; /* save application of the lower-triangular part */ 1701 /* upper */ 1702 n = a->rlen[i]-(diag[i]-shift)/8-1; 1703 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1704 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1705 } 1706 xb = t; 1707 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1708 } else xb = b; 1709 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1710 for (i=m-1; i>=0; i--) { 1711 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1712 sum = xb[i]; 1713 if (xb == b) { 1714 /* whole matrix (no checkpointing available) */ 1715 n = a->rlen[i]; 1716 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1717 x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i]; 1718 } else { /* lower-triangular part has been saved, so only apply upper-triangular */ 1719 n = a->rlen[i]-(diag[i]-shift)/8-1; 1720 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1721 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1722 } 1723 } 1724 if (xb == b) { 1725 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1726 } else { 1727 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1728 } 1729 } 1730 } 1731 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1732 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1733 PetscFunctionReturn(0); 1734 } 1735 1736 /* -------------------------------------------------------------------*/ 1737 static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL, 1738 MatGetRow_SeqSELL, 1739 MatRestoreRow_SeqSELL, 1740 MatMult_SeqSELL, 1741 /* 4*/ MatMultAdd_SeqSELL, 1742 MatMultTranspose_SeqSELL, 1743 MatMultTransposeAdd_SeqSELL, 1744 0, 1745 0, 1746 0, 1747 /* 10*/ 0, 1748 0, 1749 0, 1750 MatSOR_SeqSELL, 1751 0, 1752 /* 15*/ MatGetInfo_SeqSELL, 1753 MatEqual_SeqSELL, 1754 MatGetDiagonal_SeqSELL, 1755 MatDiagonalScale_SeqSELL, 1756 0, 1757 /* 20*/ 0, 1758 MatAssemblyEnd_SeqSELL, 1759 MatSetOption_SeqSELL, 1760 MatZeroEntries_SeqSELL, 1761 /* 24*/ 0, 1762 0, 1763 0, 1764 0, 1765 0, 1766 /* 29*/ MatSetUp_SeqSELL, 1767 0, 1768 0, 1769 0, 1770 0, 1771 /* 34*/ MatDuplicate_SeqSELL, 1772 0, 1773 0, 1774 0, 1775 0, 1776 /* 39*/ 0, 1777 0, 1778 0, 1779 MatGetValues_SeqSELL, 1780 MatCopy_SeqSELL, 1781 /* 44*/ 0, 1782 MatScale_SeqSELL, 1783 MatShift_SeqSELL, 1784 0, 1785 0, 1786 /* 49*/ 0, 1787 0, 1788 0, 1789 0, 1790 0, 1791 /* 54*/ MatFDColoringCreate_SeqXAIJ, 1792 0, 1793 0, 1794 0, 1795 0, 1796 /* 59*/ 0, 1797 MatDestroy_SeqSELL, 1798 MatView_SeqSELL, 1799 0, 1800 0, 1801 /* 64*/ 0, 1802 0, 1803 0, 1804 0, 1805 0, 1806 /* 69*/ 0, 1807 0, 1808 0, 1809 0, 1810 0, 1811 /* 74*/ 0, 1812 MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */ 1813 0, 1814 0, 1815 0, 1816 /* 79*/ 0, 1817 0, 1818 0, 1819 0, 1820 0, 1821 /* 84*/ 0, 1822 0, 1823 0, 1824 0, 1825 0, 1826 /* 89*/ 0, 1827 0, 1828 0, 1829 0, 1830 0, 1831 /* 94*/ 0, 1832 0, 1833 0, 1834 0, 1835 0, 1836 /* 99*/ 0, 1837 0, 1838 0, 1839 MatConjugate_SeqSELL, 1840 0, 1841 /*104*/ 0, 1842 0, 1843 0, 1844 0, 1845 0, 1846 /*109*/ 0, 1847 0, 1848 0, 1849 0, 1850 MatMissingDiagonal_SeqSELL, 1851 /*114*/ 0, 1852 0, 1853 0, 1854 0, 1855 0, 1856 /*119*/ 0, 1857 0, 1858 0, 1859 0, 1860 0, 1861 /*124*/ 0, 1862 0, 1863 0, 1864 0, 1865 0, 1866 /*129*/ 0, 1867 0, 1868 0, 1869 0, 1870 0, 1871 /*134*/ 0, 1872 0, 1873 0, 1874 0, 1875 0, 1876 /*139*/ 0, 1877 0, 1878 0, 1879 MatFDColoringSetUp_SeqXAIJ, 1880 0, 1881 /*144*/0 1882 }; 1883 1884 PetscErrorCode MatStoreValues_SeqSELL(Mat mat) 1885 { 1886 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1887 PetscErrorCode ierr; 1888 1889 PetscFunctionBegin; 1890 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1891 1892 /* allocate space for values if not already there */ 1893 if (!a->saved_values) { 1894 ierr = PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);CHKERRQ(ierr); 1895 ierr = PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));CHKERRQ(ierr); 1896 } 1897 1898 /* copy values over */ 1899 ierr = PetscMemcpy(a->saved_values,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1900 PetscFunctionReturn(0); 1901 } 1902 1903 PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat) 1904 { 1905 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1906 PetscErrorCode ierr; 1907 1908 PetscFunctionBegin; 1909 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1910 if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1911 /* copy values over */ 1912 ierr = PetscMemcpy(a->val,a->saved_values,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1913 PetscFunctionReturn(0); 1914 } 1915 1916 /*@C 1917 MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray() 1918 1919 Not Collective 1920 1921 Input Parameters: 1922 . mat - a MATSEQSELL matrix 1923 . array - pointer to the data 1924 1925 Level: intermediate 1926 1927 .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90() 1928 @*/ 1929 PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array) 1930 { 1931 PetscErrorCode ierr; 1932 1933 PetscFunctionBegin; 1934 ierr = PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 1935 PetscFunctionReturn(0); 1936 } 1937 1938 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B) 1939 { 1940 Mat_SeqSELL *b; 1941 PetscMPIInt size; 1942 PetscErrorCode ierr; 1943 1944 PetscFunctionBegin; 1945 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 1946 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1"); 1947 1948 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1949 1950 B->data = (void*)b; 1951 1952 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1953 1954 b->row = 0; 1955 b->col = 0; 1956 b->icol = 0; 1957 b->reallocs = 0; 1958 b->ignorezeroentries = PETSC_FALSE; 1959 b->roworiented = PETSC_TRUE; 1960 b->nonew = 0; 1961 b->diag = 0; 1962 b->solve_work = 0; 1963 B->spptr = 0; 1964 b->saved_values = 0; 1965 b->idiag = 0; 1966 b->mdiag = 0; 1967 b->ssor_work = 0; 1968 b->omega = 1.0; 1969 b->fshift = 0.0; 1970 b->idiagvalid = PETSC_FALSE; 1971 b->ibdiagvalid = PETSC_FALSE; 1972 b->keepnonzeropattern = PETSC_FALSE; 1973 1974 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);CHKERRQ(ierr); 1975 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);CHKERRQ(ierr); 1976 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);CHKERRQ(ierr); 1977 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);CHKERRQ(ierr); 1978 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);CHKERRQ(ierr); 1979 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);CHKERRQ(ierr); 1980 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);CHKERRQ(ierr); 1981 PetscFunctionReturn(0); 1982 } 1983 1984 /* 1985 Given a matrix generated with MatGetFactor() duplicates all the information in A into B 1986 */ 1987 PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 1988 { 1989 Mat_SeqSELL *c,*a=(Mat_SeqSELL*)A->data; 1990 PetscInt i,m=A->rmap->n; 1991 PetscInt totalslices=a->totalslices; 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 c = (Mat_SeqSELL*)C->data; 1996 1997 C->factortype = A->factortype; 1998 c->row = 0; 1999 c->col = 0; 2000 c->icol = 0; 2001 c->reallocs = 0; 2002 2003 C->assembled = PETSC_TRUE; 2004 2005 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 2006 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 2007 2008 ierr = PetscMalloc1(8*totalslices,&c->rlen);CHKERRQ(ierr); 2009 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 2010 ierr = PetscMalloc1(totalslices+1,&c->sliidx);CHKERRQ(ierr); 2011 ierr = PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 2012 2013 for (i=0; i<m; i++) c->rlen[i] = a->rlen[i]; 2014 for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i]; 2015 2016 /* allocate the matrix space */ 2017 if (mallocmatspace) { 2018 ierr = PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);CHKERRQ(ierr); 2019 ierr = PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2020 2021 c->singlemalloc = PETSC_TRUE; 2022 2023 if (m > 0) { 2024 ierr = PetscMemcpy(c->colidx,a->colidx,(a->maxallocmat)*sizeof(PetscInt));CHKERRQ(ierr); 2025 if (cpvalues == MAT_COPY_VALUES) { 2026 ierr = PetscMemcpy(c->val,a->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 2027 } else { 2028 ierr = PetscMemzero(c->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 2029 } 2030 } 2031 } 2032 2033 c->ignorezeroentries = a->ignorezeroentries; 2034 c->roworiented = a->roworiented; 2035 c->nonew = a->nonew; 2036 if (a->diag) { 2037 ierr = PetscMalloc1(m,&c->diag);CHKERRQ(ierr); 2038 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 2039 for (i=0; i<m; i++) { 2040 c->diag[i] = a->diag[i]; 2041 } 2042 } else c->diag = 0; 2043 2044 c->solve_work = 0; 2045 c->saved_values = 0; 2046 c->idiag = 0; 2047 c->ssor_work = 0; 2048 c->keepnonzeropattern = a->keepnonzeropattern; 2049 c->free_val = PETSC_TRUE; 2050 c->free_colidx = PETSC_TRUE; 2051 2052 c->maxallocmat = a->maxallocmat; 2053 c->maxallocrow = a->maxallocrow; 2054 c->rlenmax = a->rlenmax; 2055 c->nz = a->nz; 2056 C->preallocated = PETSC_TRUE; 2057 2058 c->nonzerorowcnt = a->nonzerorowcnt; 2059 C->nonzerostate = A->nonzerostate; 2060 2061 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2062 PetscFunctionReturn(0); 2063 } 2064 2065 PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B) 2066 { 2067 PetscErrorCode ierr; 2068 2069 PetscFunctionBegin; 2070 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 2071 ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr); 2072 if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) { 2073 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 2074 } 2075 ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2076 ierr = MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 2077 PetscFunctionReturn(0); 2078 } 2079 2080 /*@C 2081 MatCreateSeqSELL - Creates a sparse matrix in SELL format. 2082 2083 Collective on MPI_Comm 2084 2085 Input Parameters: 2086 + comm - MPI communicator, set to PETSC_COMM_SELF 2087 . m - number of rows 2088 . n - number of columns 2089 . rlenmax - maximum number of nonzeros in a row 2090 - rlen - array containing the number of nonzeros in the various rows 2091 (possibly different for each row) or NULL 2092 2093 Output Parameter: 2094 . A - the matrix 2095 2096 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2097 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2098 [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation] 2099 2100 Notes: 2101 If nnz is given then nz is ignored 2102 2103 Specify the preallocated storage with either rlenmax or rlen (not both). 2104 Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory 2105 allocation. For large problems you MUST preallocate memory or you 2106 will get TERRIBLE performance, see the users' manual chapter on matrices. 2107 2108 Level: intermediate 2109 2110 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatCreateSeqSELLWithArrays() 2111 2112 @*/ 2113 PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A) 2114 { 2115 PetscErrorCode ierr; 2116 2117 PetscFunctionBegin; 2118 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2119 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2120 ierr = MatSetType(*A,MATSEQSELL);CHKERRQ(ierr); 2121 ierr = MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);CHKERRQ(ierr); 2122 PetscFunctionReturn(0); 2123 } 2124 2125 PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg) 2126 { 2127 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data; 2128 PetscInt totalslices=a->totalslices; 2129 PetscErrorCode ierr; 2130 2131 PetscFunctionBegin; 2132 /* If the matrix dimensions are not equal,or no of nonzeros */ 2133 if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) { 2134 *flg = PETSC_FALSE; 2135 PetscFunctionReturn(0); 2136 } 2137 /* if the a->colidx are the same */ 2138 ierr = PetscMemcmp(a->colidx,b->colidx,a->sliidx[totalslices]*sizeof(PetscInt),flg);CHKERRQ(ierr); 2139 if (!*flg) PetscFunctionReturn(0); 2140 /* if a->val are the same */ 2141 ierr = PetscMemcmp(a->val,b->val,a->sliidx[totalslices]*sizeof(PetscScalar),flg);CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A) 2146 { 2147 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2148 2149 PetscFunctionBegin; 2150 a->idiagvalid = PETSC_FALSE; 2151 a->ibdiagvalid = PETSC_FALSE; 2152 PetscFunctionReturn(0); 2153 } 2154 2155 PetscErrorCode MatConjugate_SeqSELL(Mat A) 2156 { 2157 #if defined(PETSC_USE_COMPLEX) 2158 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2159 PetscInt i; 2160 PetscScalar *val = a->val; 2161 2162 PetscFunctionBegin; 2163 for (i=0; i<a->sliidx[a->totalslices]; i++) { 2164 val[i] = PetscConj(val[i]); 2165 } 2166 #else 2167 PetscFunctionBegin; 2168 #endif 2169 PetscFunctionReturn(0); 2170 } 2171