1 2 /* 3 Defines the basic matrix operations for the SELL matrix storage format. 4 */ 5 #include <../src/mat/impls/sell/seq/sell.h> /*I "petscmat.h" I*/ 6 #include <petscblaslapack.h> 7 #include <petsc/private/kernels/blocktranspose.h> 8 #if defined(PETSC_HAVE_IMMINTRIN_H) 9 #include <immintrin.h> 10 11 #if !defined(_MM_SCALE_8) 12 #define _MM_SCALE_8 8 13 #endif 14 15 #if defined(__AVX512F__) 16 /* these do not work 17 vec_idx = _mm512_loadunpackhi_epi32(vec_idx,acolidx); 18 vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval); 19 */ 20 #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 21 /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \ 22 vec_idx = _mm256_load_si256((__m256i const*)acolidx); \ 23 vec_vals = _mm512_load_pd(aval); \ 24 vec_x = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \ 25 vec_y = _mm512_fmadd_pd(vec_x,vec_vals,vec_y) 26 #elif defined(__AVX2__) 27 #define AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 28 vec_vals = _mm256_load_pd(aval); \ 29 vec_idx = _mm_load_si128((__m128i const*)acolidx); /* SSE2 */ \ 30 vec_x = _mm256_i32gather_pd(x,vec_idx,_MM_SCALE_8); \ 31 vec_y = _mm256_fmadd_pd(vec_x,vec_vals,vec_y) 32 #endif 33 #endif /* PETSC_HAVE_IMMINTRIN_H */ 34 35 /*@C 36 MatSeqSELLSetPreallocation - For good matrix assembly performance 37 the user should preallocate the matrix storage by setting the parameter nz 38 (or the array nnz). By setting these parameters accurately, performance 39 during matrix assembly can be increased significantly. 40 41 Collective on MPI_Comm 42 43 Input Parameters: 44 + B - The matrix 45 . nz - number of nonzeros per row (same for all rows) 46 - nnz - array containing the number of nonzeros in the various rows 47 (possibly different for each row) or NULL 48 49 Notes: 50 If nnz is given then nz is ignored. 51 52 Specify the preallocated storage with either nz or nnz (not both). 53 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 54 allocation. For large problems you MUST preallocate memory or you 55 will get TERRIBLE performance, see the users' manual chapter on matrices. 56 57 You can call MatGetInfo() to get information on how effective the preallocation was; 58 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 59 You can also run with the option -info and look for messages with the string 60 malloc in them to see if additional memory allocation was needed. 61 62 Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix 63 entries or columns indices. 64 65 The maximum number of nonzeos in any row should be as accuate as possible. 66 If it is underesitmated, you will get bad performance due to reallocation 67 (MatSeqXSELLReallocateSELL). 68 69 Level: intermediate 70 71 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo() 72 73 @*/ 74 PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[]) 75 { 76 PetscErrorCode ierr; 77 78 PetscFunctionBegin; 79 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 80 PetscValidType(B,1); 81 ierr = PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));CHKERRQ(ierr); 82 PetscFunctionReturn(0); 83 } 84 85 PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[]) 86 { 87 Mat_SeqSELL *b; 88 PetscInt i,j,totalslices; 89 PetscBool skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE; 90 PetscErrorCode ierr; 91 92 PetscFunctionBegin; 93 if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE; 94 if (maxallocrow == MAT_SKIP_ALLOCATION) { 95 skipallocation = PETSC_TRUE; 96 maxallocrow = 0; 97 } 98 99 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 100 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 101 102 /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */ 103 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5; 104 if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow); 105 if (rlen) { 106 for (i=0; i<B->rmap->n; i++) { 107 if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]); 108 if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n); 109 } 110 } 111 112 B->preallocated = PETSC_TRUE; 113 114 b = (Mat_SeqSELL*)B->data; 115 116 totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */ 117 b->totalslices = totalslices; 118 if (!skipallocation) { 119 if (B->rmap->n & 0x07) PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n); 120 121 if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */ 122 ierr = PetscMalloc1(totalslices+1,&b->sliidx);CHKERRQ(ierr); 123 ierr = PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 124 } 125 if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */ 126 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10; 127 else if (maxallocrow < 0) maxallocrow = 1; 128 for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow; 129 } else { 130 maxallocrow = 0; 131 b->sliidx[0] = 0; 132 for (i=1; i<totalslices; i++) { 133 b->sliidx[i] = 0; 134 for (j=0;j<8;j++) { 135 b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]); 136 } 137 maxallocrow = PetscMax(b->sliidx[i],maxallocrow); 138 b->sliidx[i] = b->sliidx[i-1] + 8*b->sliidx[i]; 139 } 140 /* last slice */ 141 b->sliidx[totalslices] = 0; 142 for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]); 143 maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow); 144 b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices]; 145 } 146 147 /* allocate space for val, colidx, rlen */ 148 /* FIXME: should B's old memory be unlogged? */ 149 ierr = MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);CHKERRQ(ierr); 150 /* FIXME: assuming an element of the bit array takes 8 bits */ 151 ierr = PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);CHKERRQ(ierr); 152 ierr = PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 153 /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */ 154 ierr = PetscCalloc1(8*totalslices,&b->rlen);CHKERRQ(ierr); 155 ierr = PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));CHKERRQ(ierr); 156 157 b->singlemalloc = PETSC_TRUE; 158 b->free_val = PETSC_TRUE; 159 b->free_colidx = PETSC_TRUE; 160 } else { 161 b->free_val = PETSC_FALSE; 162 b->free_colidx = PETSC_FALSE; 163 } 164 165 b->nz = 0; 166 b->maxallocrow = maxallocrow; 167 b->rlenmax = maxallocrow; 168 b->maxallocmat = b->sliidx[totalslices]; 169 B->info.nz_unneeded = (double)b->maxallocmat; 170 if (realalloc) { 171 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 172 } 173 PetscFunctionReturn(0); 174 } 175 176 PetscErrorCode MatGetRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 177 { 178 Mat_SeqSELL *a = (Mat_SeqSELL*)A->data; 179 PetscInt shift; 180 181 PetscFunctionBegin; 182 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 183 if (nz) *nz = a->rlen[row]; 184 shift = a->sliidx[row>>3]+(row&0x07); 185 if (!a->getrowcols) { 186 PetscErrorCode ierr; 187 188 ierr = PetscMalloc2(a->rlenmax,&a->getrowcols,a->rlenmax,&a->getrowvals);CHKERRQ(ierr); 189 } 190 if (idx) { 191 PetscInt j; 192 for (j=0; j<a->rlen[row]; j++) a->getrowcols[j] = a->colidx[shift+8*j]; 193 *idx = a->getrowcols; 194 } 195 if (v) { 196 PetscInt j; 197 for (j=0; j<a->rlen[row]; j++) a->getrowvals[j] = a->val[shift+8*j]; 198 *v = a->getrowvals; 199 } 200 PetscFunctionReturn(0); 201 } 202 203 PetscErrorCode MatRestoreRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 204 { 205 PetscFunctionBegin; 206 PetscFunctionReturn(0); 207 } 208 209 PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 210 { 211 Mat B; 212 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 213 PetscInt i; 214 PetscErrorCode ierr; 215 216 PetscFunctionBegin; 217 if (reuse == MAT_REUSE_MATRIX) { 218 B = *newmat; 219 ierr = MatZeroEntries(B);CHKERRQ(ierr); 220 } else { 221 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 222 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 223 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 224 ierr = MatSeqAIJSetPreallocation(B,0,a->rlen);CHKERRQ(ierr); 225 } 226 227 for (i=0; i<A->rmap->n; i++) { 228 PetscInt nz,*cols; 229 PetscScalar *vals; 230 231 ierr = MatGetRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 232 ierr = MatSetValues(B,1,&i,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 233 ierr = MatRestoreRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 234 } 235 236 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 237 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 238 B->rmap->bs = A->rmap->bs; 239 240 if (reuse == MAT_INPLACE_MATRIX) { 241 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 242 } else { 243 *newmat = B; 244 } 245 PetscFunctionReturn(0); 246 } 247 248 #include <../src/mat/impls/aij/seq/aij.h> 249 250 PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 251 { 252 Mat B; 253 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 254 PetscInt *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols; 255 const PetscInt *cols; 256 const PetscScalar *vals; 257 PetscErrorCode ierr; 258 259 PetscFunctionBegin; 260 if (A->rmap->bs > 1) { 261 ierr = MatConvert_Basic(A,newtype,reuse,newmat);CHKERRQ(ierr); 262 PetscFunctionReturn(0); 263 } 264 265 if (reuse == MAT_REUSE_MATRIX) { 266 B = *newmat; 267 } else { 268 /* Can we just use ilen? */ 269 ierr = PetscMalloc1(m,&rowlengths);CHKERRQ(ierr); 270 for (i=0; i<m; i++) { 271 rowlengths[i] = ai[i+1] - ai[i]; 272 } 273 274 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 275 ierr = MatSetSizes(B,m,n,m,n);CHKERRQ(ierr); 276 ierr = MatSetType(B,MATSEQSELL);CHKERRQ(ierr); 277 ierr = MatSeqSELLSetPreallocation(B,0,rowlengths);CHKERRQ(ierr); 278 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 279 } 280 281 for (row=0; row<m; row++) { 282 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 283 ierr = MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 284 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 285 } 286 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 287 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 288 B->rmap->bs = A->rmap->bs; 289 290 if (reuse == MAT_INPLACE_MATRIX) { 291 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 292 } else { 293 *newmat = B; 294 } 295 PetscFunctionReturn(0); 296 } 297 298 PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy) 299 { 300 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 301 PetscScalar *y; 302 const PetscScalar *x; 303 const MatScalar *aval=a->val; 304 PetscInt totalslices=a->totalslices; 305 const PetscInt *acolidx=a->colidx; 306 PetscInt i,j; 307 PetscErrorCode ierr; 308 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 309 __m512d vec_x,vec_y,vec_vals; 310 __m256i vec_idx; 311 __mmask8 mask; 312 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 313 __m256i vec_idx2,vec_idx3,vec_idx4; 314 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 315 __m128i vec_idx; 316 __m256d vec_x,vec_y,vec_y2,vec_vals; 317 MatScalar yval; 318 PetscInt r,rows_left,row,nnz_in_row; 319 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 320 __m128d vec_x_tmp; 321 __m256d vec_x,vec_y,vec_y2,vec_vals; 322 MatScalar yval; 323 PetscInt r,rows_left,row,nnz_in_row; 324 #else 325 PetscScalar sum[8]; 326 #endif 327 328 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 329 #pragma disjoint(*x,*y,*aval) 330 #endif 331 332 PetscFunctionBegin; 333 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 334 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 335 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 336 for (i=0; i<totalslices; i++) { /* loop over slices */ 337 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 338 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 339 340 vec_y = _mm512_setzero_pd(); 341 vec_y2 = _mm512_setzero_pd(); 342 vec_y3 = _mm512_setzero_pd(); 343 vec_y4 = _mm512_setzero_pd(); 344 345 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 346 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 347 case 3: 348 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 349 acolidx += 8; aval += 8; 350 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 351 acolidx += 8; aval += 8; 352 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 353 acolidx += 8; aval += 8; 354 j += 3; 355 break; 356 case 2: 357 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 358 acolidx += 8; aval += 8; 359 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 360 acolidx += 8; aval += 8; 361 j += 2; 362 break; 363 case 1: 364 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 365 acolidx += 8; aval += 8; 366 j += 1; 367 break; 368 } 369 #pragma novector 370 for (; j<(a->sliidx[i+1]>>3); j+=4) { 371 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 372 acolidx += 8; aval += 8; 373 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 374 acolidx += 8; aval += 8; 375 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 376 acolidx += 8; aval += 8; 377 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 378 acolidx += 8; aval += 8; 379 } 380 381 vec_y = _mm512_add_pd(vec_y,vec_y2); 382 vec_y = _mm512_add_pd(vec_y,vec_y3); 383 vec_y = _mm512_add_pd(vec_y,vec_y4); 384 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 385 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 386 _mm512_mask_store_pd(&y[8*i],mask,vec_y); 387 } else { 388 _mm512_store_pd(&y[8*i],vec_y); 389 } 390 } 391 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 392 for (i=0; i<totalslices; i++) { /* loop over full slices */ 393 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 394 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 395 396 /* last slice may have padding rows. Don't use vectorization. */ 397 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 398 rows_left = A->rmap->n - 8*i; 399 for (r=0; r<rows_left; ++r) { 400 yval = (MatScalar)0; 401 row = 8*i + r; 402 nnz_in_row = a->rlen[row]; 403 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 404 y[row] = yval; 405 } 406 break; 407 } 408 409 vec_y = _mm256_setzero_pd(); 410 vec_y2 = _mm256_setzero_pd(); 411 412 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 413 #pragma novector 414 #pragma unroll(2) 415 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 416 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 417 aval += 4; acolidx += 4; 418 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y2); 419 aval += 4; acolidx += 4; 420 } 421 422 _mm256_store_pd(y+i*8,vec_y); 423 _mm256_store_pd(y+i*8+4,vec_y2); 424 } 425 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 426 for (i=0; i<totalslices; i++) { /* loop over full slices */ 427 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 428 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 429 430 vec_y = _mm256_setzero_pd(); 431 vec_y2 = _mm256_setzero_pd(); 432 433 /* last slice may have padding rows. Don't use vectorization. */ 434 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 435 rows_left = A->rmap->n - 8*i; 436 for (r=0; r<rows_left; ++r) { 437 yval = (MatScalar)0; 438 row = 8*i + r; 439 nnz_in_row = a->rlen[row]; 440 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]]; 441 y[row] = yval; 442 } 443 break; 444 } 445 446 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 447 #pragma novector 448 #pragma unroll(2) 449 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 450 vec_vals = _mm256_loadu_pd(aval); 451 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 452 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 453 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 454 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 455 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 456 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 457 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 458 aval += 4; 459 460 vec_vals = _mm256_loadu_pd(aval); 461 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 462 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 463 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 464 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 465 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 466 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 467 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 468 aval += 4; 469 } 470 471 _mm256_storeu_pd(y + i*8, vec_y); 472 _mm256_storeu_pd(y + i*8 + 4, vec_y2); 473 } 474 #else 475 for (i=0; i<totalslices; i++) { /* loop over slices */ 476 for (j=0; j<8; j++) sum[j] = 0.0; 477 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 478 sum[0] += aval[j] * x[acolidx[j]]; 479 sum[1] += aval[j+1] * x[acolidx[j+1]]; 480 sum[2] += aval[j+2] * x[acolidx[j+2]]; 481 sum[3] += aval[j+3] * x[acolidx[j+3]]; 482 sum[4] += aval[j+4] * x[acolidx[j+4]]; 483 sum[5] += aval[j+5] * x[acolidx[j+5]]; 484 sum[6] += aval[j+6] * x[acolidx[j+6]]; 485 sum[7] += aval[j+7] * x[acolidx[j+7]]; 486 } 487 if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */ 488 for(j=0; j<(A->rmap->n & 0x07); j++) y[8*i+j] = sum[j]; 489 } else { 490 for(j=0; j<8; j++) y[8*i+j] = sum[j]; 491 } 492 } 493 #endif 494 495 ierr = PetscLogFlops(2.0*a->nz-a->nonzerorowcnt);CHKERRQ(ierr); /* theoretical minimal FLOPs */ 496 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 497 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 498 PetscFunctionReturn(0); 499 } 500 501 #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h> 502 PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz) 503 { 504 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 505 PetscScalar *y,*z; 506 const PetscScalar *x; 507 const MatScalar *aval=a->val; 508 PetscInt totalslices=a->totalslices; 509 const PetscInt *acolidx=a->colidx; 510 PetscInt i,j; 511 PetscErrorCode ierr; 512 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 513 __m512d vec_x,vec_y,vec_vals; 514 __m256i vec_idx; 515 __mmask8 mask; 516 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 517 __m256i vec_idx2,vec_idx3,vec_idx4; 518 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 519 __m128d vec_x_tmp; 520 __m256d vec_x,vec_y,vec_y2,vec_vals; 521 MatScalar yval; 522 PetscInt r,row,nnz_in_row; 523 #else 524 PetscScalar sum[8]; 525 #endif 526 527 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 528 #pragma disjoint(*x,*y,*aval) 529 #endif 530 531 PetscFunctionBegin; 532 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 533 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 534 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 535 for (i=0; i<totalslices; i++) { /* loop over slices */ 536 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 537 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 538 539 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 540 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 541 vec_y = _mm512_mask_load_pd(vec_y,mask,&y[8*i]); 542 } else { 543 vec_y = _mm512_load_pd(&y[8*i]); 544 } 545 vec_y2 = _mm512_setzero_pd(); 546 vec_y3 = _mm512_setzero_pd(); 547 vec_y4 = _mm512_setzero_pd(); 548 549 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 550 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 551 case 3: 552 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 553 acolidx += 8; aval += 8; 554 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 555 acolidx += 8; aval += 8; 556 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 557 acolidx += 8; aval += 8; 558 j += 3; 559 break; 560 case 2: 561 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 562 acolidx += 8; aval += 8; 563 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 564 acolidx += 8; aval += 8; 565 j += 2; 566 break; 567 case 1: 568 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 569 acolidx += 8; aval += 8; 570 j += 1; 571 break; 572 } 573 #pragma novector 574 for (; j<(a->sliidx[i+1]>>3); j+=4) { 575 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 576 acolidx += 8; aval += 8; 577 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 578 acolidx += 8; aval += 8; 579 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 580 acolidx += 8; aval += 8; 581 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 582 acolidx += 8; aval += 8; 583 } 584 585 vec_y = _mm512_add_pd(vec_y,vec_y2); 586 vec_y = _mm512_add_pd(vec_y,vec_y3); 587 vec_y = _mm512_add_pd(vec_y,vec_y4); 588 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 589 _mm512_mask_store_pd(&z[8*i],mask,vec_y); 590 } else { 591 _mm512_store_pd(&z[8*i],vec_y); 592 } 593 } 594 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 595 for (i=0; i<totalslices; i++) { /* loop over full slices */ 596 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 597 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 598 599 /* last slice may have padding rows. Don't use vectorization. */ 600 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 601 for (r=0; r<(A->rmap->n & 0x07); ++r) { 602 row = 8*i + r; 603 yval = (MatScalar)0.0; 604 nnz_in_row = a->rlen[row]; 605 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 606 z[row] = y[row] + yval; 607 } 608 break; 609 } 610 611 vec_y = _mm256_loadu_pd(y+8*i); 612 vec_y2 = _mm256_loadu_pd(y+8*i+4); 613 614 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 615 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 616 vec_vals = _mm256_loadu_pd(aval); 617 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 618 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 619 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 620 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 621 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 622 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 623 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 624 aval += 4; 625 626 vec_vals = _mm256_loadu_pd(aval); 627 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 628 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 629 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 630 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 631 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 632 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 633 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 634 aval += 4; 635 } 636 637 _mm256_storeu_pd(z+i*8,vec_y); 638 _mm256_storeu_pd(z+i*8+4,vec_y2); 639 } 640 #else 641 for (i=0; i<totalslices; i++) { /* loop over slices */ 642 for (j=0; j<8; j++) sum[j] = 0.0; 643 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 644 sum[0] += aval[j] * x[acolidx[j]]; 645 sum[1] += aval[j+1] * x[acolidx[j+1]]; 646 sum[2] += aval[j+2] * x[acolidx[j+2]]; 647 sum[3] += aval[j+3] * x[acolidx[j+3]]; 648 sum[4] += aval[j+4] * x[acolidx[j+4]]; 649 sum[5] += aval[j+5] * x[acolidx[j+5]]; 650 sum[6] += aval[j+6] * x[acolidx[j+6]]; 651 sum[7] += aval[j+7] * x[acolidx[j+7]]; 652 } 653 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 654 for (j=0; j<(A->rmap->n & 0x07); j++) z[8*i+j] = y[8*i+j] + sum[j]; 655 } else { 656 for (j=0; j<8; j++) z[8*i+j] = y[8*i+j] + sum[j]; 657 } 658 } 659 #endif 660 661 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 662 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 663 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy) 668 { 669 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 670 PetscScalar *y; 671 const PetscScalar *x; 672 const MatScalar *aval=a->val; 673 const PetscInt *acolidx=a->colidx; 674 PetscInt i,j,r,row,nnz_in_row,totalslices=a->totalslices; 675 PetscErrorCode ierr; 676 677 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 678 #pragma disjoint(*x,*y,*aval) 679 #endif 680 681 PetscFunctionBegin; 682 if (zz != yy) { ierr = VecCopy(zz,yy);CHKERRQ(ierr); } 683 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 684 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 685 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 686 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 687 for (r=0; r<(A->rmap->n & 0x07); ++r) { 688 row = 8*i + r; 689 nnz_in_row = a->rlen[row]; 690 for (j=0; j<nnz_in_row; ++j) y[acolidx[8*j+r]] += aval[8*j+r] * x[row]; 691 } 692 break; 693 } 694 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 695 y[acolidx[j]] += aval[j] * x[8*i]; 696 y[acolidx[j+1]] += aval[j+1] * x[8*i+1]; 697 y[acolidx[j+2]] += aval[j+2] * x[8*i+2]; 698 y[acolidx[j+3]] += aval[j+3] * x[8*i+3]; 699 y[acolidx[j+4]] += aval[j+4] * x[8*i+4]; 700 y[acolidx[j+5]] += aval[j+5] * x[8*i+5]; 701 y[acolidx[j+6]] += aval[j+6] * x[8*i+6]; 702 y[acolidx[j+7]] += aval[j+7] * x[8*i+7]; 703 } 704 } 705 ierr = PetscLogFlops(2.0*a->sliidx[a->totalslices]);CHKERRQ(ierr); 706 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 707 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 708 PetscFunctionReturn(0); 709 } 710 711 PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy) 712 { 713 PetscErrorCode ierr; 714 715 PetscFunctionBegin; 716 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 717 ierr = MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);CHKERRQ(ierr); 718 PetscFunctionReturn(0); 719 } 720 721 /* 722 Checks for missing diagonals 723 */ 724 PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool *missing,PetscInt *d) 725 { 726 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 727 PetscInt *diag,i; 728 729 PetscFunctionBegin; 730 *missing = PETSC_FALSE; 731 if (A->rmap->n > 0 && !(a->colidx)) { 732 *missing = PETSC_TRUE; 733 if (d) *d = 0; 734 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 735 } else { 736 diag = a->diag; 737 for (i=0; i<A->rmap->n; i++) { 738 if (diag[i] == -1) { 739 *missing = PETSC_TRUE; 740 if (d) *d = i; 741 PetscInfo1(A,"Matrix is missing diagonal number %D\n",i); 742 break; 743 } 744 } 745 } 746 PetscFunctionReturn(0); 747 } 748 749 PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A) 750 { 751 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 752 PetscInt i,j,m=A->rmap->n,shift; 753 PetscErrorCode ierr; 754 755 PetscFunctionBegin; 756 if (!a->diag) { 757 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 758 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 759 a->free_diag = PETSC_TRUE; 760 } 761 for (i=0; i<m; i++) { /* loop over rows */ 762 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 763 a->diag[i] = -1; 764 for (j=0; j<a->rlen[i]; j++) { 765 if (a->colidx[shift+j*8] == i) { 766 a->diag[i] = shift+j*8; 767 break; 768 } 769 } 770 } 771 PetscFunctionReturn(0); 772 } 773 774 /* 775 Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways 776 */ 777 PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift) 778 { 779 Mat_SeqSELL *a=(Mat_SeqSELL*) A->data; 780 PetscInt i,*diag,m = A->rmap->n; 781 MatScalar *val = a->val; 782 PetscScalar *idiag,*mdiag; 783 PetscErrorCode ierr; 784 785 PetscFunctionBegin; 786 if (a->idiagvalid) PetscFunctionReturn(0); 787 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 788 diag = a->diag; 789 if (!a->idiag) { 790 ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr); 791 ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr); 792 val = a->val; 793 } 794 mdiag = a->mdiag; 795 idiag = a->idiag; 796 797 if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) { 798 for (i=0; i<m; i++) { 799 mdiag[i] = val[diag[i]]; 800 if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */ 801 if (PetscRealPart(fshift)) { 802 ierr = PetscInfo1(A,"Zero diagonal on row %D\n",i);CHKERRQ(ierr); 803 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 804 A->factorerror_zeropivot_value = 0.0; 805 A->factorerror_zeropivot_row = i; 806 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i); 807 } 808 idiag[i] = 1.0/val[diag[i]]; 809 } 810 ierr = PetscLogFlops(m);CHKERRQ(ierr); 811 } else { 812 for (i=0; i<m; i++) { 813 mdiag[i] = val[diag[i]]; 814 idiag[i] = omega/(fshift + val[diag[i]]); 815 } 816 ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr); 817 } 818 a->idiagvalid = PETSC_TRUE; 819 PetscFunctionReturn(0); 820 } 821 822 PetscErrorCode MatZeroEntries_SeqSELL(Mat A) 823 { 824 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 825 PetscErrorCode ierr; 826 827 PetscFunctionBegin; 828 ierr = PetscMemzero(a->val,(a->sliidx[a->totalslices])*sizeof(PetscScalar));CHKERRQ(ierr); 829 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 830 PetscFunctionReturn(0); 831 } 832 833 PetscErrorCode MatDestroy_SeqSELL(Mat A) 834 { 835 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 836 PetscErrorCode ierr; 837 838 PetscFunctionBegin; 839 #if defined(PETSC_USE_LOG) 840 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz); 841 #endif 842 ierr = MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);CHKERRQ(ierr); 843 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 844 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 845 ierr = PetscFree(a->diag);CHKERRQ(ierr); 846 ierr = PetscFree(a->ibdiag);CHKERRQ(ierr); 847 ierr = PetscFree(a->rlen);CHKERRQ(ierr); 848 ierr = PetscFree(a->sliidx);CHKERRQ(ierr); 849 ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr); 850 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 851 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 852 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 853 ierr = PetscFree2(a->getrowcols,a->getrowvals);CHKERRQ(ierr); 854 855 ierr = PetscFree(A->data);CHKERRQ(ierr); 856 857 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 858 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 859 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 860 #if defined(PETSC_HAVE_ELEMENTAL) 861 #endif 862 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg) 867 { 868 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 869 PetscErrorCode ierr; 870 871 PetscFunctionBegin; 872 switch (op) { 873 case MAT_ROW_ORIENTED: 874 a->roworiented = flg; 875 break; 876 case MAT_KEEP_NONZERO_PATTERN: 877 a->keepnonzeropattern = flg; 878 break; 879 case MAT_NEW_NONZERO_LOCATIONS: 880 a->nonew = (flg ? 0 : 1); 881 break; 882 case MAT_NEW_NONZERO_LOCATION_ERR: 883 a->nonew = (flg ? -1 : 0); 884 break; 885 case MAT_NEW_NONZERO_ALLOCATION_ERR: 886 a->nonew = (flg ? -2 : 0); 887 break; 888 case MAT_UNUSED_NONZERO_LOCATION_ERR: 889 a->nounused = (flg ? -1 : 0); 890 break; 891 case MAT_NEW_DIAGONALS: 892 case MAT_IGNORE_OFF_PROC_ENTRIES: 893 case MAT_USE_HASH_TABLE: 894 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 895 break; 896 case MAT_SPD: 897 case MAT_SYMMETRIC: 898 case MAT_STRUCTURALLY_SYMMETRIC: 899 case MAT_HERMITIAN: 900 case MAT_SYMMETRY_ETERNAL: 901 /* These options are handled directly by MatSetOption() */ 902 break; 903 default: 904 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 905 } 906 PetscFunctionReturn(0); 907 } 908 909 PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v) 910 { 911 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 912 PetscInt i,j,n,shift; 913 PetscScalar *x,zero=0.0; 914 PetscErrorCode ierr; 915 916 PetscFunctionBegin; 917 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 918 if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 919 920 if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) { 921 PetscInt *diag=a->diag; 922 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 923 for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]]; 924 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 925 PetscFunctionReturn(0); 926 } 927 928 ierr = VecSet(v,zero);CHKERRQ(ierr); 929 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 930 for (i=0; i<n; i++) { /* loop over rows */ 931 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 932 x[i] = 0; 933 for (j=0; j<a->rlen[i]; j++) { 934 if (a->colidx[shift+j*8] == i) { 935 x[i] = a->val[shift+j*8]; 936 break; 937 } 938 } 939 } 940 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 941 PetscFunctionReturn(0); 942 } 943 944 PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr) 945 { 946 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 947 const PetscScalar *l,*r; 948 PetscInt i,j,m,n,row; 949 PetscErrorCode ierr; 950 951 PetscFunctionBegin; 952 if (ll) { 953 /* The local size is used so that VecMPI can be passed to this routine 954 by MatDiagonalScale_MPISELL */ 955 ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr); 956 if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 957 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 958 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 959 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 960 a->val[j] *= l[8*i+row]; 961 } 962 } 963 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 964 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 965 } 966 if (rr) { 967 ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr); 968 if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 969 ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr); 970 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 971 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) { 972 a->val[j] *= r[a->colidx[j]]; 973 } 974 } 975 ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr); 976 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 977 } 978 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 979 PetscFunctionReturn(0); 980 } 981 982 extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 983 984 PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 985 { 986 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 987 PetscInt *cp,i,k,low,high,t,row,col,l; 988 PetscInt shift; 989 MatScalar *vp; 990 991 PetscFunctionBegin; 992 for (k=0; k<m; k++) { /* loop over requested rows */ 993 row = im[k]; 994 if (row<0) continue; 995 #if defined(PETSC_USE_DEBUG) 996 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 997 #endif 998 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 999 cp = a->colidx+shift; /* pointer to the row */ 1000 vp = a->val+shift; /* pointer to the row */ 1001 for (l=0; l<n; l++) { /* loop over requested columns */ 1002 col = in[l]; 1003 if (col<0) continue; 1004 #if defined(PETSC_USE_DEBUG) 1005 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1); 1006 #endif 1007 high = a->rlen[row]; low = 0; /* assume unsorted */ 1008 while (high-low > 5) { 1009 t = (low+high)/2; 1010 if (*(cp+t*8) > col) high = t; 1011 else low = t; 1012 } 1013 for (i=low; i<high; i++) { 1014 if (*(cp+8*i) > col) break; 1015 if (*(cp+8*i) == col) { 1016 *v++ = *(vp+8*i); 1017 goto finished; 1018 } 1019 } 1020 *v++ = 0.0; 1021 finished:; 1022 } 1023 } 1024 PetscFunctionReturn(0); 1025 } 1026 1027 PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer) 1028 { 1029 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1030 PetscInt i,j,m=A->rmap->n,shift; 1031 const char *name; 1032 PetscViewerFormat format; 1033 PetscErrorCode ierr; 1034 1035 PetscFunctionBegin; 1036 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1037 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1038 PetscInt nofinalvalue = 0; 1039 /* 1040 if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) { 1041 nofinalvalue = 1; 1042 } 1043 */ 1044 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1045 ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);CHKERRQ(ierr); 1046 ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);CHKERRQ(ierr); 1047 #if defined(PETSC_USE_COMPLEX) 1048 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 1049 #else 1050 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 1051 #endif 1052 ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr); 1053 1054 for (i=0; i<m; i++) { 1055 shift = a->sliidx[i>>3]+(i&0x07); 1056 for (j=0; j<a->rlen[i]; j++) { 1057 #if defined(PETSC_USE_COMPLEX) 1058 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1059 #else 1060 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1061 #endif 1062 } 1063 } 1064 /* 1065 if (nofinalvalue) { 1066 #if defined(PETSC_USE_COMPLEX) 1067 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr); 1068 #else 1069 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr); 1070 #endif 1071 } 1072 */ 1073 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 1074 ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr); 1075 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1076 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 1077 PetscFunctionReturn(0); 1078 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1079 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1080 for (i=0; i<m; i++) { 1081 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1082 shift = a->sliidx[i>>3]+(i&0x07); 1083 for (j=0; j<a->rlen[i]; j++) { 1084 #if defined(PETSC_USE_COMPLEX) 1085 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1086 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1087 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1088 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1089 } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) { 1090 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1091 } 1092 #else 1093 if (a->val[shift+8*j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);} 1094 #endif 1095 } 1096 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1097 } 1098 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1099 } else if (format == PETSC_VIEWER_ASCII_DENSE) { 1100 PetscInt cnt=0,jcnt; 1101 PetscScalar value; 1102 #if defined(PETSC_USE_COMPLEX) 1103 PetscBool realonly=PETSC_TRUE; 1104 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1105 if (PetscImaginaryPart(a->val[i]) != 0.0) { 1106 realonly = PETSC_FALSE; 1107 break; 1108 } 1109 } 1110 #endif 1111 1112 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1113 for (i=0; i<m; i++) { 1114 jcnt = 0; 1115 shift = a->sliidx[i>>3]+(i&0x07); 1116 for (j=0; j<A->cmap->n; j++) { 1117 if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) { 1118 value = a->val[cnt++]; 1119 jcnt++; 1120 } else { 1121 value = 0.0; 1122 } 1123 #if defined(PETSC_USE_COMPLEX) 1124 if (realonly) { 1125 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr); 1126 } else { 1127 ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr); 1128 } 1129 #else 1130 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr); 1131 #endif 1132 } 1133 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1134 } 1135 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1136 } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) { 1137 PetscInt fshift=1; 1138 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1139 #if defined(PETSC_USE_COMPLEX) 1140 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr); 1141 #else 1142 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr); 1143 #endif 1144 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);CHKERRQ(ierr); 1145 for (i=0; i<m; i++) { 1146 shift = a->sliidx[i>>3]+(i&0x07); 1147 for (j=0; j<a->rlen[i]; j++) { 1148 #if defined(PETSC_USE_COMPLEX) 1149 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1150 #else 1151 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1152 #endif 1153 } 1154 } 1155 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1156 } else if (format == PETSC_VIEWER_NATIVE) { 1157 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 1158 PetscInt row; 1159 ierr = PetscViewerASCIIPrintf(viewer,"slice %D: %D %D\n",i,a->sliidx[i],a->sliidx[i+1]);CHKERRQ(ierr); 1160 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 1161 #if defined(PETSC_USE_COMPLEX) 1162 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1163 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g + %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1164 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1165 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g - %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),-(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1166 } else { 1167 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1168 } 1169 #else 1170 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g\n",8*i+row,a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1171 #endif 1172 } 1173 } 1174 } else { 1175 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1176 if (A->factortype) { 1177 for (i=0; i<m; i++) { 1178 shift = a->sliidx[i>>3]+(i&0x07); 1179 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1180 /* L part */ 1181 for (j=shift; j<a->diag[i]; j+=8) { 1182 #if defined(PETSC_USE_COMPLEX) 1183 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) { 1184 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1185 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) { 1186 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1187 } else { 1188 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1189 } 1190 #else 1191 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1192 #endif 1193 } 1194 /* diagonal */ 1195 j = a->diag[i]; 1196 #if defined(PETSC_USE_COMPLEX) 1197 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1198 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));CHKERRQ(ierr); 1199 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1200 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));CHKERRQ(ierr); 1201 } else { 1202 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));CHKERRQ(ierr); 1203 } 1204 #else 1205 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));CHKERRQ(ierr); 1206 #endif 1207 1208 /* U part */ 1209 for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) { 1210 #if defined(PETSC_USE_COMPLEX) 1211 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1212 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1213 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1214 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1215 } else { 1216 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1217 } 1218 #else 1219 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1220 #endif 1221 } 1222 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1223 } 1224 } else { 1225 for (i=0; i<m; i++) { 1226 shift = a->sliidx[i>>3]+(i&0x07); 1227 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1228 for (j=0; j<a->rlen[i]; j++) { 1229 #if defined(PETSC_USE_COMPLEX) 1230 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1231 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1232 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1233 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1234 } else { 1235 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1236 } 1237 #else 1238 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr); 1239 #endif 1240 } 1241 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1242 } 1243 } 1244 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1245 } 1246 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 #include <petscdraw.h> 1251 PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa) 1252 { 1253 Mat A=(Mat)Aa; 1254 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1255 PetscInt i,j,m=A->rmap->n,shift; 1256 int color; 1257 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1258 PetscViewer viewer; 1259 PetscViewerFormat format; 1260 PetscErrorCode ierr; 1261 1262 PetscFunctionBegin; 1263 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1264 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1265 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1266 1267 /* loop over matrix elements drawing boxes */ 1268 1269 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1270 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1271 /* Blue for negative, Cyan for zero and Red for positive */ 1272 color = PETSC_DRAW_BLUE; 1273 for (i=0; i<m; i++) { 1274 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1275 y_l = m - i - 1.0; y_r = y_l + 1.0; 1276 for (j=0; j<a->rlen[i]; j++) { 1277 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1278 if (PetscRealPart(a->val[shift+8*j]) >= 0.) continue; 1279 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1280 } 1281 } 1282 color = PETSC_DRAW_CYAN; 1283 for (i=0; i<m; i++) { 1284 shift = a->sliidx[i>>3]+(i&0x07); 1285 y_l = m - i - 1.0; y_r = y_l + 1.0; 1286 for (j=0; j<a->rlen[i]; j++) { 1287 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1288 if (a->val[shift+8*j] != 0.) continue; 1289 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1290 } 1291 } 1292 color = PETSC_DRAW_RED; 1293 for (i=0; i<m; i++) { 1294 shift = a->sliidx[i>>3]+(i&0x07); 1295 y_l = m - i - 1.0; y_r = y_l + 1.0; 1296 for (j=0; j<a->rlen[i]; j++) { 1297 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1298 if (PetscRealPart(a->val[shift+8*j]) <= 0.) continue; 1299 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1300 } 1301 } 1302 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1303 } else { 1304 /* use contour shading to indicate magnitude of values */ 1305 /* first determine max of all nonzero values */ 1306 PetscReal minv=0.0,maxv=0.0; 1307 PetscInt count=0; 1308 PetscDraw popup; 1309 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1310 if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]); 1311 } 1312 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1313 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1314 ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr); 1315 1316 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1317 for (i=0; i<m; i++) { 1318 shift = a->sliidx[i>>3]+(i&0x07); 1319 y_l = m - i - 1.0; 1320 y_r = y_l + 1.0; 1321 for (j=0; j<a->rlen[i]; j++) { 1322 x_l = a->colidx[shift+j*8]; 1323 x_r = x_l + 1.0; 1324 color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv); 1325 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1326 count++; 1327 } 1328 } 1329 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1330 } 1331 PetscFunctionReturn(0); 1332 } 1333 1334 #include <petscdraw.h> 1335 PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer) 1336 { 1337 PetscDraw draw; 1338 PetscReal xr,yr,xl,yl,h,w; 1339 PetscBool isnull; 1340 PetscErrorCode ierr; 1341 1342 PetscFunctionBegin; 1343 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1344 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1345 if (isnull) PetscFunctionReturn(0); 1346 1347 xr = A->cmap->n; yr = A->rmap->n; h = yr/10.0; w = xr/10.0; 1348 xr += w; yr += h; xl = -w; yl = -h; 1349 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1350 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1351 ierr = PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);CHKERRQ(ierr); 1352 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1353 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1354 PetscFunctionReturn(0); 1355 } 1356 1357 PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer) 1358 { 1359 PetscBool iascii,isbinary,isdraw; 1360 PetscErrorCode ierr; 1361 1362 PetscFunctionBegin; 1363 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1364 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1365 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1366 if (iascii) { 1367 ierr = MatView_SeqSELL_ASCII(A,viewer);CHKERRQ(ierr); 1368 } else if (isbinary) { 1369 /* ierr = MatView_SeqSELL_Binary(A,viewer);CHKERRQ(ierr); */ 1370 } else if (isdraw) { 1371 ierr = MatView_SeqSELL_Draw(A,viewer);CHKERRQ(ierr); 1372 } 1373 PetscFunctionReturn(0); 1374 } 1375 1376 PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode) 1377 { 1378 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1379 PetscInt i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k; 1380 MatScalar *vp; 1381 PetscErrorCode ierr; 1382 1383 PetscFunctionBegin; 1384 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1385 /* To do: compress out the unused elements */ 1386 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 1387 ierr = PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);CHKERRQ(ierr); 1388 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr); 1389 ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);CHKERRQ(ierr); 1390 /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */ 1391 for (i=0; i<a->totalslices; ++i) { 1392 shift = a->sliidx[i]; /* starting index of the slice */ 1393 cp = a->colidx+shift; /* pointer to the column indices of the slice */ 1394 vp = a->val+shift; /* pointer to the nonzero values of the slice */ 1395 for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */ 1396 row = 8*i + row_in_slice; 1397 nrow = a->rlen[row]; /* number of nonzeros in row */ 1398 /* 1399 Search for the nearest nonzero. Normally setting the index to zero may cause extra communication. 1400 But if the entire slice are empty, it is fine to use 0 since the index will not be loaded. 1401 */ 1402 lastcol = 0; 1403 if (nrow>0) { /* nonempty row */ 1404 lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */ 1405 } else if (!row_in_slice) { /* first row of the currect slice is empty */ 1406 for (j=1;j<8;j++) { 1407 if (a->rlen[8*i+j]) { 1408 lastcol = cp[j]; 1409 break; 1410 } 1411 } 1412 } else { 1413 if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */ 1414 } 1415 1416 for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) { 1417 cp[8*k+row_in_slice] = lastcol; 1418 vp[8*k+row_in_slice] = (MatScalar)0; 1419 } 1420 } 1421 } 1422 1423 A->info.mallocs += a->reallocs; 1424 a->reallocs = 0; 1425 1426 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1427 PetscFunctionReturn(0); 1428 } 1429 1430 PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info) 1431 { 1432 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1433 1434 PetscFunctionBegin; 1435 info->block_size = 1.0; 1436 info->nz_allocated = (double)a->maxallocmat; 1437 info->nz_used = (double)a->sliidx[a->totalslices]; /* include padding zeros */ 1438 info->nz_unneeded = (double)(a->maxallocmat-a->sliidx[a->totalslices]); 1439 info->assemblies = (double)A->num_ass; 1440 info->mallocs = (double)A->info.mallocs; 1441 info->memory = ((PetscObject)A)->mem; 1442 if (A->factortype) { 1443 info->fill_ratio_given = A->info.fill_ratio_given; 1444 info->fill_ratio_needed = A->info.fill_ratio_needed; 1445 info->factor_mallocs = A->info.factor_mallocs; 1446 } else { 1447 info->fill_ratio_given = 0; 1448 info->fill_ratio_needed = 0; 1449 info->factor_mallocs = 0; 1450 } 1451 PetscFunctionReturn(0); 1452 } 1453 1454 PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1455 { 1456 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1457 PetscInt shift,i,k,l,low,high,t,ii,row,col,nrow; 1458 PetscInt *cp,nonew=a->nonew,lastcol=-1; 1459 MatScalar *vp,value; 1460 PetscErrorCode ierr; 1461 1462 PetscFunctionBegin; 1463 for (k=0; k<m; k++) { /* loop over added rows */ 1464 row = im[k]; 1465 if (row < 0) continue; 1466 #if defined(PETSC_USE_DEBUG) 1467 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 1468 #endif 1469 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1470 cp = a->colidx+shift; /* pointer to the row */ 1471 vp = a->val+shift; /* pointer to the row */ 1472 nrow = a->rlen[row]; 1473 low = 0; 1474 high = nrow; 1475 1476 for (l=0; l<n; l++) { /* loop over added columns */ 1477 col = in[l]; 1478 if (col<0) continue; 1479 #if defined(PETSC_USE_DEBUG) 1480 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1); 1481 #endif 1482 if (a->roworiented) { 1483 value = v[l+k*n]; 1484 } else { 1485 value = v[k+l*m]; 1486 } 1487 if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue; 1488 1489 /* search in this row for the specified colmun, i indicates the column to be set */ 1490 if (col <= lastcol) low = 0; 1491 else high = nrow; 1492 lastcol = col; 1493 while (high-low > 5) { 1494 t = (low+high)/2; 1495 if (*(cp+t*8) > col) high = t; 1496 else low = t; 1497 } 1498 for (i=low; i<high; i++) { 1499 if (*(cp+i*8) > col) break; 1500 if (*(cp+i*8) == col) { 1501 if (is == ADD_VALUES) *(vp+i*8) += value; 1502 else *(vp+i*8) = value; 1503 low = i + 1; 1504 goto noinsert; 1505 } 1506 } 1507 if (value == 0.0 && a->ignorezeroentries) goto noinsert; 1508 if (nonew == 1) goto noinsert; 1509 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1510 /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */ 1511 MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar); 1512 /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */ 1513 for (ii=nrow-1; ii>=i; ii--) { 1514 *(cp+(ii+1)*8) = *(cp+ii*8); 1515 *(vp+(ii+1)*8) = *(vp+ii*8); 1516 } 1517 a->rlen[row]++; 1518 *(cp+i*8) = col; 1519 *(vp+i*8) = value; 1520 a->nz++; 1521 A->nonzerostate++; 1522 low = i+1; high++; nrow++; 1523 noinsert:; 1524 } 1525 a->rlen[row] = nrow; 1526 } 1527 PetscFunctionReturn(0); 1528 } 1529 1530 PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str) 1531 { 1532 PetscErrorCode ierr; 1533 1534 PetscFunctionBegin; 1535 /* If the two matrices have the same copy implementation, use fast copy. */ 1536 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1537 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1538 Mat_SeqSELL *b=(Mat_SeqSELL*)B->data; 1539 1540 if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1541 ierr = PetscMemcpy(b->val,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1542 } else { 1543 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1544 } 1545 PetscFunctionReturn(0); 1546 } 1547 1548 PetscErrorCode MatSetUp_SeqSELL(Mat A) 1549 { 1550 PetscErrorCode ierr; 1551 1552 PetscFunctionBegin; 1553 ierr = MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,0);CHKERRQ(ierr); 1554 PetscFunctionReturn(0); 1555 } 1556 1557 PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[]) 1558 { 1559 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1560 1561 PetscFunctionBegin; 1562 *array = a->val; 1563 PetscFunctionReturn(0); 1564 } 1565 1566 PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[]) 1567 { 1568 PetscFunctionBegin; 1569 PetscFunctionReturn(0); 1570 } 1571 1572 PetscErrorCode MatRealPart_SeqSELL(Mat A) 1573 { 1574 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1575 PetscInt i; 1576 MatScalar *aval=a->val; 1577 1578 PetscFunctionBegin; 1579 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]); 1580 PetscFunctionReturn(0); 1581 } 1582 1583 PetscErrorCode MatImaginaryPart_SeqSELL(Mat A) 1584 { 1585 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1586 PetscInt i; 1587 MatScalar *aval=a->val; 1588 PetscErrorCode ierr; 1589 1590 PetscFunctionBegin; 1591 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]); 1592 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1593 PetscFunctionReturn(0); 1594 } 1595 1596 PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha) 1597 { 1598 Mat_SeqSELL *a=(Mat_SeqSELL*)inA->data; 1599 MatScalar *aval=a->val; 1600 PetscScalar oalpha=alpha; 1601 PetscBLASInt one=1,size; 1602 PetscErrorCode ierr; 1603 1604 PetscFunctionBegin; 1605 ierr = PetscBLASIntCast(a->sliidx[a->totalslices],&size);CHKERRQ(ierr); 1606 PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one)); 1607 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1608 ierr = MatSeqSELLInvalidateDiagonal(inA);CHKERRQ(ierr); 1609 PetscFunctionReturn(0); 1610 } 1611 1612 PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a) 1613 { 1614 Mat_SeqSELL *y=(Mat_SeqSELL*)Y->data; 1615 PetscErrorCode ierr; 1616 1617 PetscFunctionBegin; 1618 if (!Y->preallocated || !y->nz) { 1619 ierr = MatSeqSELLSetPreallocation(Y,1,NULL);CHKERRQ(ierr); 1620 } 1621 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1622 PetscFunctionReturn(0); 1623 } 1624 1625 PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1626 { 1627 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1628 PetscScalar *x,sum,*t; 1629 const MatScalar *idiag=0,*mdiag; 1630 const PetscScalar *b,*xb; 1631 PetscInt n,m=A->rmap->n,i,j,shift; 1632 const PetscInt *diag; 1633 PetscErrorCode ierr; 1634 1635 PetscFunctionBegin; 1636 its = its*lits; 1637 1638 if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */ 1639 if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqSELL(A,omega,fshift);CHKERRQ(ierr);} 1640 a->fshift = fshift; 1641 a->omega = omega; 1642 1643 diag = a->diag; 1644 t = a->ssor_work; 1645 idiag = a->idiag; 1646 mdiag = a->mdiag; 1647 1648 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1649 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1650 /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */ 1651 if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented"); 1652 if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented"); 1653 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 1654 1655 if (flag & SOR_ZERO_INITIAL_GUESS) { 1656 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1657 for (i=0; i<m; i++) { 1658 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1659 sum = b[i]; 1660 n = (diag[i]-shift)/8; 1661 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1662 t[i] = sum; 1663 x[i] = sum*idiag[i]; 1664 } 1665 xb = t; 1666 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1667 } else xb = b; 1668 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1669 for (i=m-1; i>=0; i--) { 1670 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1671 sum = xb[i]; 1672 n = a->rlen[i]-(diag[i]-shift)/8-1; 1673 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1674 if (xb == b) { 1675 x[i] = sum*idiag[i]; 1676 } else { 1677 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1678 } 1679 } 1680 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1681 } 1682 its--; 1683 } 1684 while (its--) { 1685 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1686 for (i=0; i<m; i++) { 1687 /* lower */ 1688 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1689 sum = b[i]; 1690 n = (diag[i]-shift)/8; 1691 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1692 t[i] = sum; /* save application of the lower-triangular part */ 1693 /* upper */ 1694 n = a->rlen[i]-(diag[i]-shift)/8-1; 1695 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1696 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1697 } 1698 xb = t; 1699 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1700 } else xb = b; 1701 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1702 for (i=m-1; i>=0; i--) { 1703 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1704 sum = xb[i]; 1705 if (xb == b) { 1706 /* whole matrix (no checkpointing available) */ 1707 n = a->rlen[i]; 1708 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1709 x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i]; 1710 } else { /* lower-triangular part has been saved, so only apply upper-triangular */ 1711 n = a->rlen[i]-(diag[i]-shift)/8-1; 1712 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1713 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1714 } 1715 } 1716 if (xb == b) { 1717 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1718 } else { 1719 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1720 } 1721 } 1722 } 1723 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1724 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1725 PetscFunctionReturn(0); 1726 } 1727 1728 /* -------------------------------------------------------------------*/ 1729 static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL, 1730 MatGetRow_SeqSELL, 1731 MatRestoreRow_SeqSELL, 1732 MatMult_SeqSELL, 1733 /* 4*/ MatMultAdd_SeqSELL, 1734 MatMultTranspose_SeqSELL, 1735 MatMultTransposeAdd_SeqSELL, 1736 0, 1737 0, 1738 0, 1739 /* 10*/ 0, 1740 0, 1741 0, 1742 MatSOR_SeqSELL, 1743 0, 1744 /* 15*/ MatGetInfo_SeqSELL, 1745 MatEqual_SeqSELL, 1746 MatGetDiagonal_SeqSELL, 1747 MatDiagonalScale_SeqSELL, 1748 0, 1749 /* 20*/ 0, 1750 MatAssemblyEnd_SeqSELL, 1751 MatSetOption_SeqSELL, 1752 MatZeroEntries_SeqSELL, 1753 /* 24*/ 0, 1754 0, 1755 0, 1756 0, 1757 0, 1758 /* 29*/ MatSetUp_SeqSELL, 1759 0, 1760 0, 1761 0, 1762 0, 1763 /* 34*/ MatDuplicate_SeqSELL, 1764 0, 1765 0, 1766 0, 1767 0, 1768 /* 39*/ 0, 1769 0, 1770 0, 1771 MatGetValues_SeqSELL, 1772 MatCopy_SeqSELL, 1773 /* 44*/ 0, 1774 MatScale_SeqSELL, 1775 MatShift_SeqSELL, 1776 0, 1777 0, 1778 /* 49*/ 0, 1779 0, 1780 0, 1781 0, 1782 0, 1783 /* 54*/ MatFDColoringCreate_SeqXAIJ, 1784 0, 1785 0, 1786 0, 1787 0, 1788 /* 59*/ 0, 1789 MatDestroy_SeqSELL, 1790 MatView_SeqSELL, 1791 0, 1792 0, 1793 /* 64*/ 0, 1794 0, 1795 0, 1796 0, 1797 0, 1798 /* 69*/ 0, 1799 0, 1800 0, 1801 0, 1802 0, 1803 /* 74*/ 0, 1804 MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */ 1805 0, 1806 0, 1807 0, 1808 /* 79*/ 0, 1809 0, 1810 0, 1811 0, 1812 0, 1813 /* 84*/ 0, 1814 0, 1815 0, 1816 0, 1817 0, 1818 /* 89*/ 0, 1819 0, 1820 0, 1821 0, 1822 0, 1823 /* 94*/ 0, 1824 0, 1825 0, 1826 0, 1827 0, 1828 /* 99*/ 0, 1829 0, 1830 0, 1831 MatConjugate_SeqSELL, 1832 0, 1833 /*104*/ 0, 1834 0, 1835 0, 1836 0, 1837 0, 1838 /*109*/ 0, 1839 0, 1840 0, 1841 0, 1842 MatMissingDiagonal_SeqSELL, 1843 /*114*/ 0, 1844 0, 1845 0, 1846 0, 1847 0, 1848 /*119*/ 0, 1849 0, 1850 0, 1851 0, 1852 0, 1853 /*124*/ 0, 1854 0, 1855 0, 1856 0, 1857 0, 1858 /*129*/ 0, 1859 0, 1860 0, 1861 0, 1862 0, 1863 /*134*/ 0, 1864 0, 1865 0, 1866 0, 1867 0, 1868 /*139*/ 0, 1869 0, 1870 0, 1871 MatFDColoringSetUp_SeqXAIJ, 1872 0, 1873 /*144*/0 1874 }; 1875 1876 PetscErrorCode MatStoreValues_SeqSELL(Mat mat) 1877 { 1878 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1879 PetscErrorCode ierr; 1880 1881 PetscFunctionBegin; 1882 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1883 1884 /* allocate space for values if not already there */ 1885 if (!a->saved_values) { 1886 ierr = PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);CHKERRQ(ierr); 1887 ierr = PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));CHKERRQ(ierr); 1888 } 1889 1890 /* copy values over */ 1891 ierr = PetscMemcpy(a->saved_values,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1892 PetscFunctionReturn(0); 1893 } 1894 1895 PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat) 1896 { 1897 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1898 PetscErrorCode ierr; 1899 1900 PetscFunctionBegin; 1901 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1902 if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1903 /* copy values over */ 1904 ierr = PetscMemcpy(a->val,a->saved_values,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1905 PetscFunctionReturn(0); 1906 } 1907 1908 /*@C 1909 MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray() 1910 1911 Not Collective 1912 1913 Input Parameters: 1914 . mat - a MATSEQSELL matrix 1915 . array - pointer to the data 1916 1917 Level: intermediate 1918 1919 .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90() 1920 @*/ 1921 PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array) 1922 { 1923 PetscErrorCode ierr; 1924 1925 PetscFunctionBegin; 1926 ierr = PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 1927 PetscFunctionReturn(0); 1928 } 1929 1930 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B) 1931 { 1932 Mat_SeqSELL *b; 1933 PetscMPIInt size; 1934 PetscErrorCode ierr; 1935 1936 PetscFunctionBegin; 1937 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 1938 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1"); 1939 1940 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1941 1942 B->data = (void*)b; 1943 1944 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1945 1946 b->row = 0; 1947 b->col = 0; 1948 b->icol = 0; 1949 b->reallocs = 0; 1950 b->ignorezeroentries = PETSC_FALSE; 1951 b->roworiented = PETSC_TRUE; 1952 b->nonew = 0; 1953 b->diag = 0; 1954 b->solve_work = 0; 1955 B->spptr = 0; 1956 b->saved_values = 0; 1957 b->idiag = 0; 1958 b->mdiag = 0; 1959 b->ssor_work = 0; 1960 b->omega = 1.0; 1961 b->fshift = 0.0; 1962 b->idiagvalid = PETSC_FALSE; 1963 b->ibdiagvalid = PETSC_FALSE; 1964 b->keepnonzeropattern = PETSC_FALSE; 1965 1966 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);CHKERRQ(ierr); 1967 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);CHKERRQ(ierr); 1968 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);CHKERRQ(ierr); 1969 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);CHKERRQ(ierr); 1970 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);CHKERRQ(ierr); 1971 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);CHKERRQ(ierr); 1972 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);CHKERRQ(ierr); 1973 PetscFunctionReturn(0); 1974 } 1975 1976 /* 1977 Given a matrix generated with MatGetFactor() duplicates all the information in A into B 1978 */ 1979 PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 1980 { 1981 Mat_SeqSELL *c,*a=(Mat_SeqSELL*)A->data; 1982 PetscInt i,m=A->rmap->n; 1983 PetscInt totalslices=a->totalslices; 1984 PetscErrorCode ierr; 1985 1986 PetscFunctionBegin; 1987 c = (Mat_SeqSELL*)C->data; 1988 1989 C->factortype = A->factortype; 1990 c->row = 0; 1991 c->col = 0; 1992 c->icol = 0; 1993 c->reallocs = 0; 1994 1995 C->assembled = PETSC_TRUE; 1996 1997 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 1998 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 1999 2000 ierr = PetscMalloc1(8*totalslices,&c->rlen);CHKERRQ(ierr); 2001 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 2002 ierr = PetscMalloc1(totalslices+1,&c->sliidx);CHKERRQ(ierr); 2003 ierr = PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 2004 2005 for (i=0; i<m; i++) c->rlen[i] = a->rlen[i]; 2006 for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i]; 2007 2008 /* allocate the matrix space */ 2009 if (mallocmatspace) { 2010 ierr = PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);CHKERRQ(ierr); 2011 ierr = PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2012 2013 c->singlemalloc = PETSC_TRUE; 2014 2015 if (m > 0) { 2016 ierr = PetscMemcpy(c->colidx,a->colidx,(a->maxallocmat)*sizeof(PetscInt));CHKERRQ(ierr); 2017 if (cpvalues == MAT_COPY_VALUES) { 2018 ierr = PetscMemcpy(c->val,a->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 2019 } else { 2020 ierr = PetscMemzero(c->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 2021 } 2022 } 2023 } 2024 2025 c->ignorezeroentries = a->ignorezeroentries; 2026 c->roworiented = a->roworiented; 2027 c->nonew = a->nonew; 2028 if (a->diag) { 2029 ierr = PetscMalloc1(m,&c->diag);CHKERRQ(ierr); 2030 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 2031 for (i=0; i<m; i++) { 2032 c->diag[i] = a->diag[i]; 2033 } 2034 } else c->diag = 0; 2035 2036 c->solve_work = 0; 2037 c->saved_values = 0; 2038 c->idiag = 0; 2039 c->ssor_work = 0; 2040 c->keepnonzeropattern = a->keepnonzeropattern; 2041 c->free_val = PETSC_TRUE; 2042 c->free_colidx = PETSC_TRUE; 2043 2044 c->maxallocmat = a->maxallocmat; 2045 c->maxallocrow = a->maxallocrow; 2046 c->rlenmax = a->rlenmax; 2047 c->nz = a->nz; 2048 C->preallocated = PETSC_TRUE; 2049 2050 c->nonzerorowcnt = a->nonzerorowcnt; 2051 C->nonzerostate = A->nonzerostate; 2052 2053 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2054 PetscFunctionReturn(0); 2055 } 2056 2057 PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B) 2058 { 2059 PetscErrorCode ierr; 2060 2061 PetscFunctionBegin; 2062 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 2063 ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr); 2064 if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) { 2065 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 2066 } 2067 ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2068 ierr = MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 2069 PetscFunctionReturn(0); 2070 } 2071 2072 /*@C 2073 MatCreateSeqSELL - Creates a sparse matrix in SELL format. 2074 2075 Collective on MPI_Comm 2076 2077 Input Parameters: 2078 + comm - MPI communicator, set to PETSC_COMM_SELF 2079 . m - number of rows 2080 . n - number of columns 2081 . rlenmax - maximum number of nonzeros in a row 2082 - rlen - array containing the number of nonzeros in the various rows 2083 (possibly different for each row) or NULL 2084 2085 Output Parameter: 2086 . A - the matrix 2087 2088 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2089 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2090 [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation] 2091 2092 Notes: 2093 If nnz is given then nz is ignored 2094 2095 Specify the preallocated storage with either rlenmax or rlen (not both). 2096 Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory 2097 allocation. For large problems you MUST preallocate memory or you 2098 will get TERRIBLE performance, see the users' manual chapter on matrices. 2099 2100 Level: intermediate 2101 2102 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatCreateSeqSELLWithArrays() 2103 2104 @*/ 2105 PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A) 2106 { 2107 PetscErrorCode ierr; 2108 2109 PetscFunctionBegin; 2110 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2111 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2112 ierr = MatSetType(*A,MATSEQSELL);CHKERRQ(ierr); 2113 ierr = MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);CHKERRQ(ierr); 2114 PetscFunctionReturn(0); 2115 } 2116 2117 PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg) 2118 { 2119 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data; 2120 PetscInt totalslices=a->totalslices; 2121 PetscErrorCode ierr; 2122 2123 PetscFunctionBegin; 2124 /* If the matrix dimensions are not equal,or no of nonzeros */ 2125 if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) { 2126 *flg = PETSC_FALSE; 2127 PetscFunctionReturn(0); 2128 } 2129 /* if the a->colidx are the same */ 2130 ierr = PetscMemcmp(a->colidx,b->colidx,a->sliidx[totalslices]*sizeof(PetscInt),flg);CHKERRQ(ierr); 2131 if (!*flg) PetscFunctionReturn(0); 2132 /* if a->val are the same */ 2133 ierr = PetscMemcmp(a->val,b->val,a->sliidx[totalslices]*sizeof(PetscScalar),flg);CHKERRQ(ierr); 2134 PetscFunctionReturn(0); 2135 } 2136 2137 PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A) 2138 { 2139 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2140 2141 PetscFunctionBegin; 2142 a->idiagvalid = PETSC_FALSE; 2143 a->ibdiagvalid = PETSC_FALSE; 2144 PetscFunctionReturn(0); 2145 } 2146 2147 PetscErrorCode MatConjugate_SeqSELL(Mat A) 2148 { 2149 #if defined(PETSC_USE_COMPLEX) 2150 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2151 PetscInt i; 2152 PetscScalar *val = a->val; 2153 2154 PetscFunctionBegin; 2155 for (i=0; i<a->sliidx[a->totalslices]; i++) { 2156 val[i] = PetscConj(val[i]); 2157 } 2158 #else 2159 PetscFunctionBegin; 2160 #endif 2161 PetscFunctionReturn(0); 2162 } 2163