1 2 /* 3 Defines the basic matrix operations for the SELL matrix storage format. 4 */ 5 #include <../src/mat/impls/sell/seq/sell.h> /*I "petscmat.h" I*/ 6 #include <petscblaslapack.h> 7 #include <petsc/private/kernels/blocktranspose.h> 8 #if defined(PETSC_HAVE_IMMINTRIN_H) && (defined(__AVX512F__) || (defined(__AVX2__) && defined(__FMA__)) || defined(__AVX__)) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 9 10 #include <immintrin.h> 11 12 #if !defined(_MM_SCALE_8) 13 #define _MM_SCALE_8 8 14 #endif 15 16 #if defined(__AVX512F__) 17 /* these do not work 18 vec_idx = _mm512_loadunpackhi_epi32(vec_idx,acolidx); 19 vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval); 20 */ 21 #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 22 /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \ 23 vec_idx = _mm256_loadu_si256((__m256i const*)acolidx); \ 24 vec_vals = _mm512_loadu_pd(aval); \ 25 vec_x = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \ 26 vec_y = _mm512_fmadd_pd(vec_x,vec_vals,vec_y) 27 #elif defined(__AVX2__) && defined(__FMA__) 28 #define AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 29 vec_vals = _mm256_loadu_pd(aval); \ 30 vec_idx = _mm_loadu_si128((__m128i const*)acolidx); /* SSE2 */ \ 31 vec_x = _mm256_i32gather_pd(x,vec_idx,_MM_SCALE_8); \ 32 vec_y = _mm256_fmadd_pd(vec_x,vec_vals,vec_y) 33 #endif 34 #endif /* PETSC_HAVE_IMMINTRIN_H */ 35 36 /*@C 37 MatSeqSELLSetPreallocation - For good matrix assembly performance 38 the user should preallocate the matrix storage by setting the parameter nz 39 (or the array nnz). By setting these parameters accurately, performance 40 during matrix assembly can be increased significantly. 41 42 Collective on MPI_Comm 43 44 Input Parameters: 45 + B - The matrix 46 . nz - number of nonzeros per row (same for all rows) 47 - nnz - array containing the number of nonzeros in the various rows 48 (possibly different for each row) or NULL 49 50 Notes: 51 If nnz is given then nz is ignored. 52 53 Specify the preallocated storage with either nz or nnz (not both). 54 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 55 allocation. For large problems you MUST preallocate memory or you 56 will get TERRIBLE performance, see the users' manual chapter on matrices. 57 58 You can call MatGetInfo() to get information on how effective the preallocation was; 59 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 60 You can also run with the option -info and look for messages with the string 61 malloc in them to see if additional memory allocation was needed. 62 63 Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix 64 entries or columns indices. 65 66 The maximum number of nonzeos in any row should be as accuate as possible. 67 If it is underesitmated, you will get bad performance due to reallocation 68 (MatSeqXSELLReallocateSELL). 69 70 Level: intermediate 71 72 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo() 73 74 @*/ 75 PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[]) 76 { 77 PetscErrorCode ierr; 78 79 PetscFunctionBegin; 80 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 81 PetscValidType(B,1); 82 ierr = PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));CHKERRQ(ierr); 83 PetscFunctionReturn(0); 84 } 85 86 PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[]) 87 { 88 Mat_SeqSELL *b; 89 PetscInt i,j,totalslices; 90 PetscBool skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE; 91 PetscErrorCode ierr; 92 93 PetscFunctionBegin; 94 if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE; 95 if (maxallocrow == MAT_SKIP_ALLOCATION) { 96 skipallocation = PETSC_TRUE; 97 maxallocrow = 0; 98 } 99 100 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 101 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 102 103 /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */ 104 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5; 105 if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow); 106 if (rlen) { 107 for (i=0; i<B->rmap->n; i++) { 108 if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]); 109 if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n); 110 } 111 } 112 113 B->preallocated = PETSC_TRUE; 114 115 b = (Mat_SeqSELL*)B->data; 116 117 totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */ 118 b->totalslices = totalslices; 119 if (!skipallocation) { 120 if (B->rmap->n & 0x07) PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n); 121 122 if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */ 123 ierr = PetscMalloc1(totalslices+1,&b->sliidx);CHKERRQ(ierr); 124 ierr = PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 125 } 126 if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */ 127 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10; 128 else if (maxallocrow < 0) maxallocrow = 1; 129 for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow; 130 } else { 131 maxallocrow = 0; 132 b->sliidx[0] = 0; 133 for (i=1; i<totalslices; i++) { 134 b->sliidx[i] = 0; 135 for (j=0;j<8;j++) { 136 b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]); 137 } 138 maxallocrow = PetscMax(b->sliidx[i],maxallocrow); 139 b->sliidx[i] = b->sliidx[i-1] + 8*b->sliidx[i]; 140 } 141 /* last slice */ 142 b->sliidx[totalslices] = 0; 143 for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]); 144 maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow); 145 b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices]; 146 } 147 148 /* allocate space for val, colidx, rlen */ 149 /* FIXME: should B's old memory be unlogged? */ 150 ierr = MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);CHKERRQ(ierr); 151 /* FIXME: assuming an element of the bit array takes 8 bits */ 152 ierr = PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);CHKERRQ(ierr); 153 ierr = PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 154 /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */ 155 ierr = PetscCalloc1(8*totalslices,&b->rlen);CHKERRQ(ierr); 156 ierr = PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));CHKERRQ(ierr); 157 158 b->singlemalloc = PETSC_TRUE; 159 b->free_val = PETSC_TRUE; 160 b->free_colidx = PETSC_TRUE; 161 } else { 162 b->free_val = PETSC_FALSE; 163 b->free_colidx = PETSC_FALSE; 164 } 165 166 b->nz = 0; 167 b->maxallocrow = maxallocrow; 168 b->rlenmax = maxallocrow; 169 b->maxallocmat = b->sliidx[totalslices]; 170 B->info.nz_unneeded = (double)b->maxallocmat; 171 if (realalloc) { 172 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 173 } 174 PetscFunctionReturn(0); 175 } 176 177 PetscErrorCode MatGetRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 178 { 179 Mat_SeqSELL *a = (Mat_SeqSELL*)A->data; 180 PetscInt shift; 181 182 PetscFunctionBegin; 183 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 184 if (nz) *nz = a->rlen[row]; 185 shift = a->sliidx[row>>3]+(row&0x07); 186 if (!a->getrowcols) { 187 PetscErrorCode ierr; 188 189 ierr = PetscMalloc2(a->rlenmax,&a->getrowcols,a->rlenmax,&a->getrowvals);CHKERRQ(ierr); 190 } 191 if (idx) { 192 PetscInt j; 193 for (j=0; j<a->rlen[row]; j++) a->getrowcols[j] = a->colidx[shift+8*j]; 194 *idx = a->getrowcols; 195 } 196 if (v) { 197 PetscInt j; 198 for (j=0; j<a->rlen[row]; j++) a->getrowvals[j] = a->val[shift+8*j]; 199 *v = a->getrowvals; 200 } 201 PetscFunctionReturn(0); 202 } 203 204 PetscErrorCode MatRestoreRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 205 { 206 PetscFunctionBegin; 207 PetscFunctionReturn(0); 208 } 209 210 PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 211 { 212 Mat B; 213 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 214 PetscInt i; 215 PetscErrorCode ierr; 216 217 PetscFunctionBegin; 218 if (reuse == MAT_REUSE_MATRIX) { 219 B = *newmat; 220 ierr = MatZeroEntries(B);CHKERRQ(ierr); 221 } else { 222 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 223 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 224 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 225 ierr = MatSeqAIJSetPreallocation(B,0,a->rlen);CHKERRQ(ierr); 226 } 227 228 for (i=0; i<A->rmap->n; i++) { 229 PetscInt nz = 0,*cols = NULL; 230 PetscScalar *vals = NULL; 231 232 ierr = MatGetRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 233 ierr = MatSetValues(B,1,&i,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 234 ierr = MatRestoreRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 235 } 236 237 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 238 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 239 B->rmap->bs = A->rmap->bs; 240 241 if (reuse == MAT_INPLACE_MATRIX) { 242 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 243 } else { 244 *newmat = B; 245 } 246 PetscFunctionReturn(0); 247 } 248 249 #include <../src/mat/impls/aij/seq/aij.h> 250 251 PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 252 { 253 Mat B; 254 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 255 PetscInt *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols; 256 const PetscInt *cols; 257 const PetscScalar *vals; 258 PetscErrorCode ierr; 259 260 PetscFunctionBegin; 261 262 if (reuse == MAT_REUSE_MATRIX) { 263 B = *newmat; 264 } else { 265 /* Can we just use ilen? */ 266 ierr = PetscMalloc1(m,&rowlengths);CHKERRQ(ierr); 267 for (i=0; i<m; i++) { 268 rowlengths[i] = ai[i+1] - ai[i]; 269 } 270 271 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 272 ierr = MatSetSizes(B,m,n,m,n);CHKERRQ(ierr); 273 ierr = MatSetType(B,MATSEQSELL);CHKERRQ(ierr); 274 ierr = MatSeqSELLSetPreallocation(B,0,rowlengths);CHKERRQ(ierr); 275 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 276 } 277 278 for (row=0; row<m; row++) { 279 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 280 ierr = MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 281 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 282 } 283 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 284 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 285 B->rmap->bs = A->rmap->bs; 286 287 if (reuse == MAT_INPLACE_MATRIX) { 288 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 289 } else { 290 *newmat = B; 291 } 292 PetscFunctionReturn(0); 293 } 294 295 PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy) 296 { 297 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 298 PetscScalar *y; 299 const PetscScalar *x; 300 const MatScalar *aval=a->val; 301 PetscInt totalslices=a->totalslices; 302 const PetscInt *acolidx=a->colidx; 303 PetscInt i,j; 304 PetscErrorCode ierr; 305 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 306 __m512d vec_x,vec_y,vec_vals; 307 __m256i vec_idx; 308 __mmask8 mask; 309 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 310 __m256i vec_idx2,vec_idx3,vec_idx4; 311 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 312 __m128i vec_idx; 313 __m256d vec_x,vec_y,vec_y2,vec_vals; 314 MatScalar yval; 315 PetscInt r,rows_left,row,nnz_in_row; 316 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 317 __m128d vec_x_tmp; 318 __m256d vec_x,vec_y,vec_y2,vec_vals; 319 MatScalar yval; 320 PetscInt r,rows_left,row,nnz_in_row; 321 #else 322 PetscScalar sum[8]; 323 #endif 324 325 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 326 #pragma disjoint(*x,*y,*aval) 327 #endif 328 329 PetscFunctionBegin; 330 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 331 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 332 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 333 for (i=0; i<totalslices; i++) { /* loop over slices */ 334 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 335 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 336 337 vec_y = _mm512_setzero_pd(); 338 vec_y2 = _mm512_setzero_pd(); 339 vec_y3 = _mm512_setzero_pd(); 340 vec_y4 = _mm512_setzero_pd(); 341 342 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 343 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 344 case 3: 345 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 346 acolidx += 8; aval += 8; 347 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 348 acolidx += 8; aval += 8; 349 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 350 acolidx += 8; aval += 8; 351 j += 3; 352 break; 353 case 2: 354 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 355 acolidx += 8; aval += 8; 356 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 357 acolidx += 8; aval += 8; 358 j += 2; 359 break; 360 case 1: 361 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 362 acolidx += 8; aval += 8; 363 j += 1; 364 break; 365 } 366 #pragma novector 367 for (; j<(a->sliidx[i+1]>>3); j+=4) { 368 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 369 acolidx += 8; aval += 8; 370 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 371 acolidx += 8; aval += 8; 372 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 373 acolidx += 8; aval += 8; 374 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 375 acolidx += 8; aval += 8; 376 } 377 378 vec_y = _mm512_add_pd(vec_y,vec_y2); 379 vec_y = _mm512_add_pd(vec_y,vec_y3); 380 vec_y = _mm512_add_pd(vec_y,vec_y4); 381 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 382 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 383 _mm512_mask_storeu_pd(&y[8*i],mask,vec_y); 384 } else { 385 _mm512_storeu_pd(&y[8*i],vec_y); 386 } 387 } 388 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 389 for (i=0; i<totalslices; i++) { /* loop over full slices */ 390 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 391 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 392 393 /* last slice may have padding rows. Don't use vectorization. */ 394 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 395 rows_left = A->rmap->n - 8*i; 396 for (r=0; r<rows_left; ++r) { 397 yval = (MatScalar)0; 398 row = 8*i + r; 399 nnz_in_row = a->rlen[row]; 400 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 401 y[row] = yval; 402 } 403 break; 404 } 405 406 vec_y = _mm256_setzero_pd(); 407 vec_y2 = _mm256_setzero_pd(); 408 409 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 410 #pragma novector 411 #pragma unroll(2) 412 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 413 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 414 aval += 4; acolidx += 4; 415 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y2); 416 aval += 4; acolidx += 4; 417 } 418 419 _mm256_storeu_pd(y+i*8,vec_y); 420 _mm256_storeu_pd(y+i*8+4,vec_y2); 421 } 422 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 423 for (i=0; i<totalslices; i++) { /* loop over full slices */ 424 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 425 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 426 427 vec_y = _mm256_setzero_pd(); 428 vec_y2 = _mm256_setzero_pd(); 429 430 /* last slice may have padding rows. Don't use vectorization. */ 431 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 432 rows_left = A->rmap->n - 8*i; 433 for (r=0; r<rows_left; ++r) { 434 yval = (MatScalar)0; 435 row = 8*i + r; 436 nnz_in_row = a->rlen[row]; 437 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]]; 438 y[row] = yval; 439 } 440 break; 441 } 442 443 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 444 #pragma novector 445 #pragma unroll(2) 446 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 447 vec_vals = _mm256_loadu_pd(aval); 448 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 449 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 450 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 451 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 452 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 453 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 454 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 455 aval += 4; 456 457 vec_vals = _mm256_loadu_pd(aval); 458 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 459 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 460 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 461 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 462 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 463 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 464 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 465 aval += 4; 466 } 467 468 _mm256_storeu_pd(y + i*8, vec_y); 469 _mm256_storeu_pd(y + i*8 + 4, vec_y2); 470 } 471 #else 472 for (i=0; i<totalslices; i++) { /* loop over slices */ 473 for (j=0; j<8; j++) sum[j] = 0.0; 474 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 475 sum[0] += aval[j] * x[acolidx[j]]; 476 sum[1] += aval[j+1] * x[acolidx[j+1]]; 477 sum[2] += aval[j+2] * x[acolidx[j+2]]; 478 sum[3] += aval[j+3] * x[acolidx[j+3]]; 479 sum[4] += aval[j+4] * x[acolidx[j+4]]; 480 sum[5] += aval[j+5] * x[acolidx[j+5]]; 481 sum[6] += aval[j+6] * x[acolidx[j+6]]; 482 sum[7] += aval[j+7] * x[acolidx[j+7]]; 483 } 484 if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */ 485 for(j=0; j<(A->rmap->n & 0x07); j++) y[8*i+j] = sum[j]; 486 } else { 487 for(j=0; j<8; j++) y[8*i+j] = sum[j]; 488 } 489 } 490 #endif 491 492 ierr = PetscLogFlops(2.0*a->nz-a->nonzerorowcnt);CHKERRQ(ierr); /* theoretical minimal FLOPs */ 493 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 494 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 495 PetscFunctionReturn(0); 496 } 497 498 #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h> 499 PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz) 500 { 501 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 502 PetscScalar *y,*z; 503 const PetscScalar *x; 504 const MatScalar *aval=a->val; 505 PetscInt totalslices=a->totalslices; 506 const PetscInt *acolidx=a->colidx; 507 PetscInt i,j; 508 PetscErrorCode ierr; 509 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 510 __m512d vec_x,vec_y,vec_vals; 511 __m256i vec_idx; 512 __mmask8 mask; 513 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 514 __m256i vec_idx2,vec_idx3,vec_idx4; 515 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 516 __m128d vec_x_tmp; 517 __m256d vec_x,vec_y,vec_y2,vec_vals; 518 MatScalar yval; 519 PetscInt r,row,nnz_in_row; 520 #else 521 PetscScalar sum[8]; 522 #endif 523 524 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 525 #pragma disjoint(*x,*y,*aval) 526 #endif 527 528 PetscFunctionBegin; 529 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 530 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 531 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 532 for (i=0; i<totalslices; i++) { /* loop over slices */ 533 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 534 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 535 536 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 537 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 538 vec_y = _mm512_mask_loadu_pd(vec_y,mask,&y[8*i]); 539 } else { 540 vec_y = _mm512_loadu_pd(&y[8*i]); 541 } 542 vec_y2 = _mm512_setzero_pd(); 543 vec_y3 = _mm512_setzero_pd(); 544 vec_y4 = _mm512_setzero_pd(); 545 546 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 547 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 548 case 3: 549 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 550 acolidx += 8; aval += 8; 551 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 552 acolidx += 8; aval += 8; 553 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 554 acolidx += 8; aval += 8; 555 j += 3; 556 break; 557 case 2: 558 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 559 acolidx += 8; aval += 8; 560 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 561 acolidx += 8; aval += 8; 562 j += 2; 563 break; 564 case 1: 565 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 566 acolidx += 8; aval += 8; 567 j += 1; 568 break; 569 } 570 #pragma novector 571 for (; j<(a->sliidx[i+1]>>3); j+=4) { 572 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 573 acolidx += 8; aval += 8; 574 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 575 acolidx += 8; aval += 8; 576 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 577 acolidx += 8; aval += 8; 578 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 579 acolidx += 8; aval += 8; 580 } 581 582 vec_y = _mm512_add_pd(vec_y,vec_y2); 583 vec_y = _mm512_add_pd(vec_y,vec_y3); 584 vec_y = _mm512_add_pd(vec_y,vec_y4); 585 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 586 _mm512_mask_storeu_pd(&z[8*i],mask,vec_y); 587 } else { 588 _mm512_storeu_pd(&z[8*i],vec_y); 589 } 590 } 591 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 592 for (i=0; i<totalslices; i++) { /* loop over full slices */ 593 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 594 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 595 596 /* last slice may have padding rows. Don't use vectorization. */ 597 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 598 for (r=0; r<(A->rmap->n & 0x07); ++r) { 599 row = 8*i + r; 600 yval = (MatScalar)0.0; 601 nnz_in_row = a->rlen[row]; 602 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 603 z[row] = y[row] + yval; 604 } 605 break; 606 } 607 608 vec_y = _mm256_loadu_pd(y+8*i); 609 vec_y2 = _mm256_loadu_pd(y+8*i+4); 610 611 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 612 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 613 vec_vals = _mm256_loadu_pd(aval); 614 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 615 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 616 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 617 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 618 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 619 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 620 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 621 aval += 4; 622 623 vec_vals = _mm256_loadu_pd(aval); 624 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 625 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 626 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 627 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 628 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 629 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 630 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 631 aval += 4; 632 } 633 634 _mm256_storeu_pd(z+i*8,vec_y); 635 _mm256_storeu_pd(z+i*8+4,vec_y2); 636 } 637 #else 638 for (i=0; i<totalslices; i++) { /* loop over slices */ 639 for (j=0; j<8; j++) sum[j] = 0.0; 640 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 641 sum[0] += aval[j] * x[acolidx[j]]; 642 sum[1] += aval[j+1] * x[acolidx[j+1]]; 643 sum[2] += aval[j+2] * x[acolidx[j+2]]; 644 sum[3] += aval[j+3] * x[acolidx[j+3]]; 645 sum[4] += aval[j+4] * x[acolidx[j+4]]; 646 sum[5] += aval[j+5] * x[acolidx[j+5]]; 647 sum[6] += aval[j+6] * x[acolidx[j+6]]; 648 sum[7] += aval[j+7] * x[acolidx[j+7]]; 649 } 650 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 651 for (j=0; j<(A->rmap->n & 0x07); j++) z[8*i+j] = y[8*i+j] + sum[j]; 652 } else { 653 for (j=0; j<8; j++) z[8*i+j] = y[8*i+j] + sum[j]; 654 } 655 } 656 #endif 657 658 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 659 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 660 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 661 PetscFunctionReturn(0); 662 } 663 664 PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy) 665 { 666 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 667 PetscScalar *y; 668 const PetscScalar *x; 669 const MatScalar *aval=a->val; 670 const PetscInt *acolidx=a->colidx; 671 PetscInt i,j,r,row,nnz_in_row,totalslices=a->totalslices; 672 PetscErrorCode ierr; 673 674 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 675 #pragma disjoint(*x,*y,*aval) 676 #endif 677 678 PetscFunctionBegin; 679 if (A->symmetric) { 680 ierr = MatMultAdd_SeqSELL(A,xx,zz,yy);CHKERRQ(ierr); 681 PetscFunctionReturn(0); 682 } 683 if (zz != yy) { ierr = VecCopy(zz,yy);CHKERRQ(ierr); } 684 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 685 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 686 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 687 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 688 for (r=0; r<(A->rmap->n & 0x07); ++r) { 689 row = 8*i + r; 690 nnz_in_row = a->rlen[row]; 691 for (j=0; j<nnz_in_row; ++j) y[acolidx[8*j+r]] += aval[8*j+r] * x[row]; 692 } 693 break; 694 } 695 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 696 y[acolidx[j]] += aval[j] * x[8*i]; 697 y[acolidx[j+1]] += aval[j+1] * x[8*i+1]; 698 y[acolidx[j+2]] += aval[j+2] * x[8*i+2]; 699 y[acolidx[j+3]] += aval[j+3] * x[8*i+3]; 700 y[acolidx[j+4]] += aval[j+4] * x[8*i+4]; 701 y[acolidx[j+5]] += aval[j+5] * x[8*i+5]; 702 y[acolidx[j+6]] += aval[j+6] * x[8*i+6]; 703 y[acolidx[j+7]] += aval[j+7] * x[8*i+7]; 704 } 705 } 706 ierr = PetscLogFlops(2.0*a->sliidx[a->totalslices]);CHKERRQ(ierr); 707 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 708 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy) 713 { 714 PetscErrorCode ierr; 715 716 PetscFunctionBegin; 717 if (A->symmetric) { 718 ierr = MatMult_SeqSELL(A,xx,yy);CHKERRQ(ierr); 719 } else { 720 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 721 ierr = MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);CHKERRQ(ierr); 722 } 723 PetscFunctionReturn(0); 724 } 725 726 /* 727 Checks for missing diagonals 728 */ 729 PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool *missing,PetscInt *d) 730 { 731 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 732 PetscInt *diag,i; 733 734 PetscFunctionBegin; 735 *missing = PETSC_FALSE; 736 if (A->rmap->n > 0 && !(a->colidx)) { 737 *missing = PETSC_TRUE; 738 if (d) *d = 0; 739 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 740 } else { 741 diag = a->diag; 742 for (i=0; i<A->rmap->n; i++) { 743 if (diag[i] == -1) { 744 *missing = PETSC_TRUE; 745 if (d) *d = i; 746 PetscInfo1(A,"Matrix is missing diagonal number %D\n",i); 747 break; 748 } 749 } 750 } 751 PetscFunctionReturn(0); 752 } 753 754 PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A) 755 { 756 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 757 PetscInt i,j,m=A->rmap->n,shift; 758 PetscErrorCode ierr; 759 760 PetscFunctionBegin; 761 if (!a->diag) { 762 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 763 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 764 a->free_diag = PETSC_TRUE; 765 } 766 for (i=0; i<m; i++) { /* loop over rows */ 767 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 768 a->diag[i] = -1; 769 for (j=0; j<a->rlen[i]; j++) { 770 if (a->colidx[shift+j*8] == i) { 771 a->diag[i] = shift+j*8; 772 break; 773 } 774 } 775 } 776 PetscFunctionReturn(0); 777 } 778 779 /* 780 Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways 781 */ 782 PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift) 783 { 784 Mat_SeqSELL *a=(Mat_SeqSELL*) A->data; 785 PetscInt i,*diag,m = A->rmap->n; 786 MatScalar *val = a->val; 787 PetscScalar *idiag,*mdiag; 788 PetscErrorCode ierr; 789 790 PetscFunctionBegin; 791 if (a->idiagvalid) PetscFunctionReturn(0); 792 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 793 diag = a->diag; 794 if (!a->idiag) { 795 ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr); 796 ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr); 797 val = a->val; 798 } 799 mdiag = a->mdiag; 800 idiag = a->idiag; 801 802 if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) { 803 for (i=0; i<m; i++) { 804 mdiag[i] = val[diag[i]]; 805 if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */ 806 if (PetscRealPart(fshift)) { 807 ierr = PetscInfo1(A,"Zero diagonal on row %D\n",i);CHKERRQ(ierr); 808 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 809 A->factorerror_zeropivot_value = 0.0; 810 A->factorerror_zeropivot_row = i; 811 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i); 812 } 813 idiag[i] = 1.0/val[diag[i]]; 814 } 815 ierr = PetscLogFlops(m);CHKERRQ(ierr); 816 } else { 817 for (i=0; i<m; i++) { 818 mdiag[i] = val[diag[i]]; 819 idiag[i] = omega/(fshift + val[diag[i]]); 820 } 821 ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr); 822 } 823 a->idiagvalid = PETSC_TRUE; 824 PetscFunctionReturn(0); 825 } 826 827 PetscErrorCode MatZeroEntries_SeqSELL(Mat A) 828 { 829 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 830 PetscErrorCode ierr; 831 832 PetscFunctionBegin; 833 ierr = PetscMemzero(a->val,(a->sliidx[a->totalslices])*sizeof(PetscScalar));CHKERRQ(ierr); 834 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 835 PetscFunctionReturn(0); 836 } 837 838 PetscErrorCode MatDestroy_SeqSELL(Mat A) 839 { 840 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 841 PetscErrorCode ierr; 842 843 PetscFunctionBegin; 844 #if defined(PETSC_USE_LOG) 845 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz); 846 #endif 847 ierr = MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);CHKERRQ(ierr); 848 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 849 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 850 ierr = PetscFree(a->diag);CHKERRQ(ierr); 851 ierr = PetscFree(a->rlen);CHKERRQ(ierr); 852 ierr = PetscFree(a->sliidx);CHKERRQ(ierr); 853 ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr); 854 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 855 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 856 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 857 ierr = PetscFree2(a->getrowcols,a->getrowvals);CHKERRQ(ierr); 858 859 ierr = PetscFree(A->data);CHKERRQ(ierr); 860 861 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 862 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 863 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 864 #if defined(PETSC_HAVE_ELEMENTAL) 865 #endif 866 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);CHKERRQ(ierr); 867 PetscFunctionReturn(0); 868 } 869 870 PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg) 871 { 872 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 873 PetscErrorCode ierr; 874 875 PetscFunctionBegin; 876 switch (op) { 877 case MAT_ROW_ORIENTED: 878 a->roworiented = flg; 879 break; 880 case MAT_KEEP_NONZERO_PATTERN: 881 a->keepnonzeropattern = flg; 882 break; 883 case MAT_NEW_NONZERO_LOCATIONS: 884 a->nonew = (flg ? 0 : 1); 885 break; 886 case MAT_NEW_NONZERO_LOCATION_ERR: 887 a->nonew = (flg ? -1 : 0); 888 break; 889 case MAT_NEW_NONZERO_ALLOCATION_ERR: 890 a->nonew = (flg ? -2 : 0); 891 break; 892 case MAT_UNUSED_NONZERO_LOCATION_ERR: 893 a->nounused = (flg ? -1 : 0); 894 break; 895 case MAT_NEW_DIAGONALS: 896 case MAT_IGNORE_OFF_PROC_ENTRIES: 897 case MAT_USE_HASH_TABLE: 898 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 899 break; 900 case MAT_SPD: 901 case MAT_SYMMETRIC: 902 case MAT_STRUCTURALLY_SYMMETRIC: 903 case MAT_HERMITIAN: 904 case MAT_SYMMETRY_ETERNAL: 905 /* These options are handled directly by MatSetOption() */ 906 break; 907 default: 908 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 909 } 910 PetscFunctionReturn(0); 911 } 912 913 PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v) 914 { 915 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 916 PetscInt i,j,n,shift; 917 PetscScalar *x,zero=0.0; 918 PetscErrorCode ierr; 919 920 PetscFunctionBegin; 921 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 922 if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 923 924 if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) { 925 PetscInt *diag=a->diag; 926 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 927 for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]]; 928 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 929 PetscFunctionReturn(0); 930 } 931 932 ierr = VecSet(v,zero);CHKERRQ(ierr); 933 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 934 for (i=0; i<n; i++) { /* loop over rows */ 935 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 936 x[i] = 0; 937 for (j=0; j<a->rlen[i]; j++) { 938 if (a->colidx[shift+j*8] == i) { 939 x[i] = a->val[shift+j*8]; 940 break; 941 } 942 } 943 } 944 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 945 PetscFunctionReturn(0); 946 } 947 948 PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr) 949 { 950 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 951 const PetscScalar *l,*r; 952 PetscInt i,j,m,n,row; 953 PetscErrorCode ierr; 954 955 PetscFunctionBegin; 956 if (ll) { 957 /* The local size is used so that VecMPI can be passed to this routine 958 by MatDiagonalScale_MPISELL */ 959 ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr); 960 if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 961 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 962 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 963 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 964 a->val[j] *= l[8*i+row]; 965 } 966 } 967 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 968 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 969 } 970 if (rr) { 971 ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr); 972 if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 973 ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr); 974 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 975 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) { 976 a->val[j] *= r[a->colidx[j]]; 977 } 978 } 979 ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr); 980 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 981 } 982 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 983 PetscFunctionReturn(0); 984 } 985 986 extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 987 988 PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 989 { 990 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 991 PetscInt *cp,i,k,low,high,t,row,col,l; 992 PetscInt shift; 993 MatScalar *vp; 994 995 PetscFunctionBegin; 996 for (k=0; k<m; k++) { /* loop over requested rows */ 997 row = im[k]; 998 if (row<0) continue; 999 #if defined(PETSC_USE_DEBUG) 1000 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 1001 #endif 1002 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1003 cp = a->colidx+shift; /* pointer to the row */ 1004 vp = a->val+shift; /* pointer to the row */ 1005 for (l=0; l<n; l++) { /* loop over requested columns */ 1006 col = in[l]; 1007 if (col<0) continue; 1008 #if defined(PETSC_USE_DEBUG) 1009 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1); 1010 #endif 1011 high = a->rlen[row]; low = 0; /* assume unsorted */ 1012 while (high-low > 5) { 1013 t = (low+high)/2; 1014 if (*(cp+t*8) > col) high = t; 1015 else low = t; 1016 } 1017 for (i=low; i<high; i++) { 1018 if (*(cp+8*i) > col) break; 1019 if (*(cp+8*i) == col) { 1020 *v++ = *(vp+8*i); 1021 goto finished; 1022 } 1023 } 1024 *v++ = 0.0; 1025 finished:; 1026 } 1027 } 1028 PetscFunctionReturn(0); 1029 } 1030 1031 PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer) 1032 { 1033 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1034 PetscInt i,j,m=A->rmap->n,shift; 1035 const char *name; 1036 PetscViewerFormat format; 1037 PetscErrorCode ierr; 1038 1039 PetscFunctionBegin; 1040 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1041 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1042 PetscInt nofinalvalue = 0; 1043 /* 1044 if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) { 1045 nofinalvalue = 1; 1046 } 1047 */ 1048 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1049 ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);CHKERRQ(ierr); 1050 ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);CHKERRQ(ierr); 1051 #if defined(PETSC_USE_COMPLEX) 1052 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 1053 #else 1054 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 1055 #endif 1056 ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr); 1057 1058 for (i=0; i<m; i++) { 1059 shift = a->sliidx[i>>3]+(i&0x07); 1060 for (j=0; j<a->rlen[i]; j++) { 1061 #if defined(PETSC_USE_COMPLEX) 1062 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1063 #else 1064 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1065 #endif 1066 } 1067 } 1068 /* 1069 if (nofinalvalue) { 1070 #if defined(PETSC_USE_COMPLEX) 1071 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr); 1072 #else 1073 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr); 1074 #endif 1075 } 1076 */ 1077 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 1078 ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr); 1079 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1080 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 1081 PetscFunctionReturn(0); 1082 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1083 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1084 for (i=0; i<m; i++) { 1085 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1086 shift = a->sliidx[i>>3]+(i&0x07); 1087 for (j=0; j<a->rlen[i]; j++) { 1088 #if defined(PETSC_USE_COMPLEX) 1089 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1090 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1091 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1092 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1093 } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) { 1094 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1095 } 1096 #else 1097 if (a->val[shift+8*j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);} 1098 #endif 1099 } 1100 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1101 } 1102 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1103 } else if (format == PETSC_VIEWER_ASCII_DENSE) { 1104 PetscInt cnt=0,jcnt; 1105 PetscScalar value; 1106 #if defined(PETSC_USE_COMPLEX) 1107 PetscBool realonly=PETSC_TRUE; 1108 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1109 if (PetscImaginaryPart(a->val[i]) != 0.0) { 1110 realonly = PETSC_FALSE; 1111 break; 1112 } 1113 } 1114 #endif 1115 1116 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1117 for (i=0; i<m; i++) { 1118 jcnt = 0; 1119 shift = a->sliidx[i>>3]+(i&0x07); 1120 for (j=0; j<A->cmap->n; j++) { 1121 if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) { 1122 value = a->val[cnt++]; 1123 jcnt++; 1124 } else { 1125 value = 0.0; 1126 } 1127 #if defined(PETSC_USE_COMPLEX) 1128 if (realonly) { 1129 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr); 1130 } else { 1131 ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr); 1132 } 1133 #else 1134 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr); 1135 #endif 1136 } 1137 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1138 } 1139 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1140 } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) { 1141 PetscInt fshift=1; 1142 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1143 #if defined(PETSC_USE_COMPLEX) 1144 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr); 1145 #else 1146 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr); 1147 #endif 1148 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);CHKERRQ(ierr); 1149 for (i=0; i<m; i++) { 1150 shift = a->sliidx[i>>3]+(i&0x07); 1151 for (j=0; j<a->rlen[i]; j++) { 1152 #if defined(PETSC_USE_COMPLEX) 1153 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1154 #else 1155 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1156 #endif 1157 } 1158 } 1159 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1160 } else if (format == PETSC_VIEWER_NATIVE) { 1161 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 1162 PetscInt row; 1163 ierr = PetscViewerASCIIPrintf(viewer,"slice %D: %D %D\n",i,a->sliidx[i],a->sliidx[i+1]);CHKERRQ(ierr); 1164 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 1165 #if defined(PETSC_USE_COMPLEX) 1166 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1167 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g + %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1168 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1169 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g - %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),-(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1170 } else { 1171 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1172 } 1173 #else 1174 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g\n",8*i+row,a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1175 #endif 1176 } 1177 } 1178 } else { 1179 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1180 if (A->factortype) { 1181 for (i=0; i<m; i++) { 1182 shift = a->sliidx[i>>3]+(i&0x07); 1183 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1184 /* L part */ 1185 for (j=shift; j<a->diag[i]; j+=8) { 1186 #if defined(PETSC_USE_COMPLEX) 1187 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) { 1188 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1189 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) { 1190 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1191 } else { 1192 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1193 } 1194 #else 1195 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1196 #endif 1197 } 1198 /* diagonal */ 1199 j = a->diag[i]; 1200 #if defined(PETSC_USE_COMPLEX) 1201 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1202 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));CHKERRQ(ierr); 1203 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1204 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));CHKERRQ(ierr); 1205 } else { 1206 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));CHKERRQ(ierr); 1207 } 1208 #else 1209 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));CHKERRQ(ierr); 1210 #endif 1211 1212 /* U part */ 1213 for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) { 1214 #if defined(PETSC_USE_COMPLEX) 1215 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1216 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1217 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1218 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1219 } else { 1220 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1221 } 1222 #else 1223 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1224 #endif 1225 } 1226 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1227 } 1228 } else { 1229 for (i=0; i<m; i++) { 1230 shift = a->sliidx[i>>3]+(i&0x07); 1231 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1232 for (j=0; j<a->rlen[i]; j++) { 1233 #if defined(PETSC_USE_COMPLEX) 1234 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1235 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1236 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1237 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1238 } else { 1239 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1240 } 1241 #else 1242 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr); 1243 #endif 1244 } 1245 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1246 } 1247 } 1248 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1249 } 1250 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #include <petscdraw.h> 1255 PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa) 1256 { 1257 Mat A=(Mat)Aa; 1258 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1259 PetscInt i,j,m=A->rmap->n,shift; 1260 int color; 1261 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1262 PetscViewer viewer; 1263 PetscViewerFormat format; 1264 PetscErrorCode ierr; 1265 1266 PetscFunctionBegin; 1267 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1268 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1269 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1270 1271 /* loop over matrix elements drawing boxes */ 1272 1273 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1274 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1275 /* Blue for negative, Cyan for zero and Red for positive */ 1276 color = PETSC_DRAW_BLUE; 1277 for (i=0; i<m; i++) { 1278 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1279 y_l = m - i - 1.0; y_r = y_l + 1.0; 1280 for (j=0; j<a->rlen[i]; j++) { 1281 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1282 if (PetscRealPart(a->val[shift+8*j]) >= 0.) continue; 1283 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1284 } 1285 } 1286 color = PETSC_DRAW_CYAN; 1287 for (i=0; i<m; i++) { 1288 shift = a->sliidx[i>>3]+(i&0x07); 1289 y_l = m - i - 1.0; y_r = y_l + 1.0; 1290 for (j=0; j<a->rlen[i]; j++) { 1291 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1292 if (a->val[shift+8*j] != 0.) continue; 1293 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1294 } 1295 } 1296 color = PETSC_DRAW_RED; 1297 for (i=0; i<m; i++) { 1298 shift = a->sliidx[i>>3]+(i&0x07); 1299 y_l = m - i - 1.0; y_r = y_l + 1.0; 1300 for (j=0; j<a->rlen[i]; j++) { 1301 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1302 if (PetscRealPart(a->val[shift+8*j]) <= 0.) continue; 1303 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1304 } 1305 } 1306 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1307 } else { 1308 /* use contour shading to indicate magnitude of values */ 1309 /* first determine max of all nonzero values */ 1310 PetscReal minv=0.0,maxv=0.0; 1311 PetscInt count=0; 1312 PetscDraw popup; 1313 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1314 if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]); 1315 } 1316 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1317 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1318 ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr); 1319 1320 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1321 for (i=0; i<m; i++) { 1322 shift = a->sliidx[i>>3]+(i&0x07); 1323 y_l = m - i - 1.0; 1324 y_r = y_l + 1.0; 1325 for (j=0; j<a->rlen[i]; j++) { 1326 x_l = a->colidx[shift+j*8]; 1327 x_r = x_l + 1.0; 1328 color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv); 1329 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1330 count++; 1331 } 1332 } 1333 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1334 } 1335 PetscFunctionReturn(0); 1336 } 1337 1338 #include <petscdraw.h> 1339 PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer) 1340 { 1341 PetscDraw draw; 1342 PetscReal xr,yr,xl,yl,h,w; 1343 PetscBool isnull; 1344 PetscErrorCode ierr; 1345 1346 PetscFunctionBegin; 1347 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1348 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1349 if (isnull) PetscFunctionReturn(0); 1350 1351 xr = A->cmap->n; yr = A->rmap->n; h = yr/10.0; w = xr/10.0; 1352 xr += w; yr += h; xl = -w; yl = -h; 1353 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1354 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1355 ierr = PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);CHKERRQ(ierr); 1356 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1357 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1358 PetscFunctionReturn(0); 1359 } 1360 1361 PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer) 1362 { 1363 PetscBool iascii,isbinary,isdraw; 1364 PetscErrorCode ierr; 1365 1366 PetscFunctionBegin; 1367 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1368 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1369 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1370 if (iascii) { 1371 ierr = MatView_SeqSELL_ASCII(A,viewer);CHKERRQ(ierr); 1372 } else if (isbinary) { 1373 /* ierr = MatView_SeqSELL_Binary(A,viewer);CHKERRQ(ierr); */ 1374 } else if (isdraw) { 1375 ierr = MatView_SeqSELL_Draw(A,viewer);CHKERRQ(ierr); 1376 } 1377 PetscFunctionReturn(0); 1378 } 1379 1380 PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode) 1381 { 1382 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1383 PetscInt i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k; 1384 MatScalar *vp; 1385 PetscErrorCode ierr; 1386 1387 PetscFunctionBegin; 1388 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1389 /* To do: compress out the unused elements */ 1390 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 1391 ierr = PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);CHKERRQ(ierr); 1392 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr); 1393 ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);CHKERRQ(ierr); 1394 /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */ 1395 for (i=0; i<a->totalslices; ++i) { 1396 shift = a->sliidx[i]; /* starting index of the slice */ 1397 cp = a->colidx+shift; /* pointer to the column indices of the slice */ 1398 vp = a->val+shift; /* pointer to the nonzero values of the slice */ 1399 for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */ 1400 row = 8*i + row_in_slice; 1401 nrow = a->rlen[row]; /* number of nonzeros in row */ 1402 /* 1403 Search for the nearest nonzero. Normally setting the index to zero may cause extra communication. 1404 But if the entire slice are empty, it is fine to use 0 since the index will not be loaded. 1405 */ 1406 lastcol = 0; 1407 if (nrow>0) { /* nonempty row */ 1408 lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */ 1409 } else if (!row_in_slice) { /* first row of the currect slice is empty */ 1410 for (j=1;j<8;j++) { 1411 if (a->rlen[8*i+j]) { 1412 lastcol = cp[j]; 1413 break; 1414 } 1415 } 1416 } else { 1417 if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */ 1418 } 1419 1420 for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) { 1421 cp[8*k+row_in_slice] = lastcol; 1422 vp[8*k+row_in_slice] = (MatScalar)0; 1423 } 1424 } 1425 } 1426 1427 A->info.mallocs += a->reallocs; 1428 a->reallocs = 0; 1429 1430 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1431 PetscFunctionReturn(0); 1432 } 1433 1434 PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info) 1435 { 1436 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1437 1438 PetscFunctionBegin; 1439 info->block_size = 1.0; 1440 info->nz_allocated = (double)a->maxallocmat; 1441 info->nz_used = (double)a->sliidx[a->totalslices]; /* include padding zeros */ 1442 info->nz_unneeded = (double)(a->maxallocmat-a->sliidx[a->totalslices]); 1443 info->assemblies = (double)A->num_ass; 1444 info->mallocs = (double)A->info.mallocs; 1445 info->memory = ((PetscObject)A)->mem; 1446 if (A->factortype) { 1447 info->fill_ratio_given = A->info.fill_ratio_given; 1448 info->fill_ratio_needed = A->info.fill_ratio_needed; 1449 info->factor_mallocs = A->info.factor_mallocs; 1450 } else { 1451 info->fill_ratio_given = 0; 1452 info->fill_ratio_needed = 0; 1453 info->factor_mallocs = 0; 1454 } 1455 PetscFunctionReturn(0); 1456 } 1457 1458 PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1459 { 1460 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1461 PetscInt shift,i,k,l,low,high,t,ii,row,col,nrow; 1462 PetscInt *cp,nonew=a->nonew,lastcol=-1; 1463 MatScalar *vp,value; 1464 PetscErrorCode ierr; 1465 1466 PetscFunctionBegin; 1467 for (k=0; k<m; k++) { /* loop over added rows */ 1468 row = im[k]; 1469 if (row < 0) continue; 1470 #if defined(PETSC_USE_DEBUG) 1471 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 1472 #endif 1473 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1474 cp = a->colidx+shift; /* pointer to the row */ 1475 vp = a->val+shift; /* pointer to the row */ 1476 nrow = a->rlen[row]; 1477 low = 0; 1478 high = nrow; 1479 1480 for (l=0; l<n; l++) { /* loop over added columns */ 1481 col = in[l]; 1482 if (col<0) continue; 1483 #if defined(PETSC_USE_DEBUG) 1484 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1); 1485 #endif 1486 if (a->roworiented) { 1487 value = v[l+k*n]; 1488 } else { 1489 value = v[k+l*m]; 1490 } 1491 if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue; 1492 1493 /* search in this row for the specified colmun, i indicates the column to be set */ 1494 if (col <= lastcol) low = 0; 1495 else high = nrow; 1496 lastcol = col; 1497 while (high-low > 5) { 1498 t = (low+high)/2; 1499 if (*(cp+t*8) > col) high = t; 1500 else low = t; 1501 } 1502 for (i=low; i<high; i++) { 1503 if (*(cp+i*8) > col) break; 1504 if (*(cp+i*8) == col) { 1505 if (is == ADD_VALUES) *(vp+i*8) += value; 1506 else *(vp+i*8) = value; 1507 low = i + 1; 1508 goto noinsert; 1509 } 1510 } 1511 if (value == 0.0 && a->ignorezeroentries) goto noinsert; 1512 if (nonew == 1) goto noinsert; 1513 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1514 /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */ 1515 MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar); 1516 /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */ 1517 for (ii=nrow-1; ii>=i; ii--) { 1518 *(cp+(ii+1)*8) = *(cp+ii*8); 1519 *(vp+(ii+1)*8) = *(vp+ii*8); 1520 } 1521 a->rlen[row]++; 1522 *(cp+i*8) = col; 1523 *(vp+i*8) = value; 1524 a->nz++; 1525 A->nonzerostate++; 1526 low = i+1; high++; nrow++; 1527 noinsert:; 1528 } 1529 a->rlen[row] = nrow; 1530 } 1531 PetscFunctionReturn(0); 1532 } 1533 1534 PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str) 1535 { 1536 PetscErrorCode ierr; 1537 1538 PetscFunctionBegin; 1539 /* If the two matrices have the same copy implementation, use fast copy. */ 1540 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1541 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1542 Mat_SeqSELL *b=(Mat_SeqSELL*)B->data; 1543 1544 if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1545 ierr = PetscMemcpy(b->val,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1546 } else { 1547 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1548 } 1549 PetscFunctionReturn(0); 1550 } 1551 1552 PetscErrorCode MatSetUp_SeqSELL(Mat A) 1553 { 1554 PetscErrorCode ierr; 1555 1556 PetscFunctionBegin; 1557 ierr = MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,0);CHKERRQ(ierr); 1558 PetscFunctionReturn(0); 1559 } 1560 1561 PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[]) 1562 { 1563 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1564 1565 PetscFunctionBegin; 1566 *array = a->val; 1567 PetscFunctionReturn(0); 1568 } 1569 1570 PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[]) 1571 { 1572 PetscFunctionBegin; 1573 PetscFunctionReturn(0); 1574 } 1575 1576 PetscErrorCode MatRealPart_SeqSELL(Mat A) 1577 { 1578 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1579 PetscInt i; 1580 MatScalar *aval=a->val; 1581 1582 PetscFunctionBegin; 1583 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]); 1584 PetscFunctionReturn(0); 1585 } 1586 1587 PetscErrorCode MatImaginaryPart_SeqSELL(Mat A) 1588 { 1589 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1590 PetscInt i; 1591 MatScalar *aval=a->val; 1592 PetscErrorCode ierr; 1593 1594 PetscFunctionBegin; 1595 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]); 1596 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1597 PetscFunctionReturn(0); 1598 } 1599 1600 PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha) 1601 { 1602 Mat_SeqSELL *a=(Mat_SeqSELL*)inA->data; 1603 MatScalar *aval=a->val; 1604 PetscScalar oalpha=alpha; 1605 PetscBLASInt one=1,size; 1606 PetscErrorCode ierr; 1607 1608 PetscFunctionBegin; 1609 ierr = PetscBLASIntCast(a->sliidx[a->totalslices],&size);CHKERRQ(ierr); 1610 PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one)); 1611 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1612 ierr = MatSeqSELLInvalidateDiagonal(inA);CHKERRQ(ierr); 1613 PetscFunctionReturn(0); 1614 } 1615 1616 PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a) 1617 { 1618 Mat_SeqSELL *y=(Mat_SeqSELL*)Y->data; 1619 PetscErrorCode ierr; 1620 1621 PetscFunctionBegin; 1622 if (!Y->preallocated || !y->nz) { 1623 ierr = MatSeqSELLSetPreallocation(Y,1,NULL);CHKERRQ(ierr); 1624 } 1625 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1626 PetscFunctionReturn(0); 1627 } 1628 1629 PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1630 { 1631 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1632 PetscScalar *x,sum,*t; 1633 const MatScalar *idiag=0,*mdiag; 1634 const PetscScalar *b,*xb; 1635 PetscInt n,m=A->rmap->n,i,j,shift; 1636 const PetscInt *diag; 1637 PetscErrorCode ierr; 1638 1639 PetscFunctionBegin; 1640 its = its*lits; 1641 1642 if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */ 1643 if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqSELL(A,omega,fshift);CHKERRQ(ierr);} 1644 a->fshift = fshift; 1645 a->omega = omega; 1646 1647 diag = a->diag; 1648 t = a->ssor_work; 1649 idiag = a->idiag; 1650 mdiag = a->mdiag; 1651 1652 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1653 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1654 /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */ 1655 if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented"); 1656 if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented"); 1657 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 1658 1659 if (flag & SOR_ZERO_INITIAL_GUESS) { 1660 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1661 for (i=0; i<m; i++) { 1662 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1663 sum = b[i]; 1664 n = (diag[i]-shift)/8; 1665 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1666 t[i] = sum; 1667 x[i] = sum*idiag[i]; 1668 } 1669 xb = t; 1670 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1671 } else xb = b; 1672 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1673 for (i=m-1; i>=0; i--) { 1674 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1675 sum = xb[i]; 1676 n = a->rlen[i]-(diag[i]-shift)/8-1; 1677 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1678 if (xb == b) { 1679 x[i] = sum*idiag[i]; 1680 } else { 1681 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1682 } 1683 } 1684 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1685 } 1686 its--; 1687 } 1688 while (its--) { 1689 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1690 for (i=0; i<m; i++) { 1691 /* lower */ 1692 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1693 sum = b[i]; 1694 n = (diag[i]-shift)/8; 1695 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1696 t[i] = sum; /* save application of the lower-triangular part */ 1697 /* upper */ 1698 n = a->rlen[i]-(diag[i]-shift)/8-1; 1699 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1700 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1701 } 1702 xb = t; 1703 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1704 } else xb = b; 1705 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1706 for (i=m-1; i>=0; i--) { 1707 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1708 sum = xb[i]; 1709 if (xb == b) { 1710 /* whole matrix (no checkpointing available) */ 1711 n = a->rlen[i]; 1712 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1713 x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i]; 1714 } else { /* lower-triangular part has been saved, so only apply upper-triangular */ 1715 n = a->rlen[i]-(diag[i]-shift)/8-1; 1716 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1717 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1718 } 1719 } 1720 if (xb == b) { 1721 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1722 } else { 1723 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1724 } 1725 } 1726 } 1727 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1728 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1729 PetscFunctionReturn(0); 1730 } 1731 1732 /* -------------------------------------------------------------------*/ 1733 static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL, 1734 MatGetRow_SeqSELL, 1735 MatRestoreRow_SeqSELL, 1736 MatMult_SeqSELL, 1737 /* 4*/ MatMultAdd_SeqSELL, 1738 MatMultTranspose_SeqSELL, 1739 MatMultTransposeAdd_SeqSELL, 1740 0, 1741 0, 1742 0, 1743 /* 10*/ 0, 1744 0, 1745 0, 1746 MatSOR_SeqSELL, 1747 0, 1748 /* 15*/ MatGetInfo_SeqSELL, 1749 MatEqual_SeqSELL, 1750 MatGetDiagonal_SeqSELL, 1751 MatDiagonalScale_SeqSELL, 1752 0, 1753 /* 20*/ 0, 1754 MatAssemblyEnd_SeqSELL, 1755 MatSetOption_SeqSELL, 1756 MatZeroEntries_SeqSELL, 1757 /* 24*/ 0, 1758 0, 1759 0, 1760 0, 1761 0, 1762 /* 29*/ MatSetUp_SeqSELL, 1763 0, 1764 0, 1765 0, 1766 0, 1767 /* 34*/ MatDuplicate_SeqSELL, 1768 0, 1769 0, 1770 0, 1771 0, 1772 /* 39*/ 0, 1773 0, 1774 0, 1775 MatGetValues_SeqSELL, 1776 MatCopy_SeqSELL, 1777 /* 44*/ 0, 1778 MatScale_SeqSELL, 1779 MatShift_SeqSELL, 1780 0, 1781 0, 1782 /* 49*/ 0, 1783 0, 1784 0, 1785 0, 1786 0, 1787 /* 54*/ MatFDColoringCreate_SeqXAIJ, 1788 0, 1789 0, 1790 0, 1791 0, 1792 /* 59*/ 0, 1793 MatDestroy_SeqSELL, 1794 MatView_SeqSELL, 1795 0, 1796 0, 1797 /* 64*/ 0, 1798 0, 1799 0, 1800 0, 1801 0, 1802 /* 69*/ 0, 1803 0, 1804 0, 1805 0, 1806 0, 1807 /* 74*/ 0, 1808 MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */ 1809 0, 1810 0, 1811 0, 1812 /* 79*/ 0, 1813 0, 1814 0, 1815 0, 1816 0, 1817 /* 84*/ 0, 1818 0, 1819 0, 1820 0, 1821 0, 1822 /* 89*/ 0, 1823 0, 1824 0, 1825 0, 1826 0, 1827 /* 94*/ 0, 1828 0, 1829 0, 1830 0, 1831 0, 1832 /* 99*/ 0, 1833 0, 1834 0, 1835 MatConjugate_SeqSELL, 1836 0, 1837 /*104*/ 0, 1838 0, 1839 0, 1840 0, 1841 0, 1842 /*109*/ 0, 1843 0, 1844 0, 1845 0, 1846 MatMissingDiagonal_SeqSELL, 1847 /*114*/ 0, 1848 0, 1849 0, 1850 0, 1851 0, 1852 /*119*/ 0, 1853 0, 1854 0, 1855 0, 1856 0, 1857 /*124*/ 0, 1858 0, 1859 0, 1860 0, 1861 0, 1862 /*129*/ 0, 1863 0, 1864 0, 1865 0, 1866 0, 1867 /*134*/ 0, 1868 0, 1869 0, 1870 0, 1871 0, 1872 /*139*/ 0, 1873 0, 1874 0, 1875 MatFDColoringSetUp_SeqXAIJ, 1876 0, 1877 /*144*/0 1878 }; 1879 1880 PetscErrorCode MatStoreValues_SeqSELL(Mat mat) 1881 { 1882 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1883 PetscErrorCode ierr; 1884 1885 PetscFunctionBegin; 1886 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1887 1888 /* allocate space for values if not already there */ 1889 if (!a->saved_values) { 1890 ierr = PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);CHKERRQ(ierr); 1891 ierr = PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));CHKERRQ(ierr); 1892 } 1893 1894 /* copy values over */ 1895 ierr = PetscMemcpy(a->saved_values,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1896 PetscFunctionReturn(0); 1897 } 1898 1899 PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat) 1900 { 1901 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1902 PetscErrorCode ierr; 1903 1904 PetscFunctionBegin; 1905 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1906 if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1907 /* copy values over */ 1908 ierr = PetscMemcpy(a->val,a->saved_values,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 /*@C 1913 MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray() 1914 1915 Not Collective 1916 1917 Input Parameters: 1918 . mat - a MATSEQSELL matrix 1919 . array - pointer to the data 1920 1921 Level: intermediate 1922 1923 .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90() 1924 @*/ 1925 PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array) 1926 { 1927 PetscErrorCode ierr; 1928 1929 PetscFunctionBegin; 1930 ierr = PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 1931 PetscFunctionReturn(0); 1932 } 1933 1934 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B) 1935 { 1936 Mat_SeqSELL *b; 1937 PetscMPIInt size; 1938 PetscErrorCode ierr; 1939 1940 PetscFunctionBegin; 1941 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 1942 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1"); 1943 1944 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1945 1946 B->data = (void*)b; 1947 1948 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1949 1950 b->row = 0; 1951 b->col = 0; 1952 b->icol = 0; 1953 b->reallocs = 0; 1954 b->ignorezeroentries = PETSC_FALSE; 1955 b->roworiented = PETSC_TRUE; 1956 b->nonew = 0; 1957 b->diag = 0; 1958 b->solve_work = 0; 1959 B->spptr = 0; 1960 b->saved_values = 0; 1961 b->idiag = 0; 1962 b->mdiag = 0; 1963 b->ssor_work = 0; 1964 b->omega = 1.0; 1965 b->fshift = 0.0; 1966 b->idiagvalid = PETSC_FALSE; 1967 b->keepnonzeropattern = PETSC_FALSE; 1968 1969 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);CHKERRQ(ierr); 1970 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);CHKERRQ(ierr); 1971 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);CHKERRQ(ierr); 1972 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);CHKERRQ(ierr); 1973 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);CHKERRQ(ierr); 1974 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);CHKERRQ(ierr); 1975 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);CHKERRQ(ierr); 1976 PetscFunctionReturn(0); 1977 } 1978 1979 /* 1980 Given a matrix generated with MatGetFactor() duplicates all the information in A into B 1981 */ 1982 PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 1983 { 1984 Mat_SeqSELL *c,*a=(Mat_SeqSELL*)A->data; 1985 PetscInt i,m=A->rmap->n; 1986 PetscInt totalslices=a->totalslices; 1987 PetscErrorCode ierr; 1988 1989 PetscFunctionBegin; 1990 c = (Mat_SeqSELL*)C->data; 1991 1992 C->factortype = A->factortype; 1993 c->row = 0; 1994 c->col = 0; 1995 c->icol = 0; 1996 c->reallocs = 0; 1997 1998 C->assembled = PETSC_TRUE; 1999 2000 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 2001 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 2002 2003 ierr = PetscMalloc1(8*totalslices,&c->rlen);CHKERRQ(ierr); 2004 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 2005 ierr = PetscMalloc1(totalslices+1,&c->sliidx);CHKERRQ(ierr); 2006 ierr = PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 2007 2008 for (i=0; i<m; i++) c->rlen[i] = a->rlen[i]; 2009 for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i]; 2010 2011 /* allocate the matrix space */ 2012 if (mallocmatspace) { 2013 ierr = PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);CHKERRQ(ierr); 2014 ierr = PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2015 2016 c->singlemalloc = PETSC_TRUE; 2017 2018 if (m > 0) { 2019 ierr = PetscMemcpy(c->colidx,a->colidx,(a->maxallocmat)*sizeof(PetscInt));CHKERRQ(ierr); 2020 if (cpvalues == MAT_COPY_VALUES) { 2021 ierr = PetscMemcpy(c->val,a->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 2022 } else { 2023 ierr = PetscMemzero(c->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 2024 } 2025 } 2026 } 2027 2028 c->ignorezeroentries = a->ignorezeroentries; 2029 c->roworiented = a->roworiented; 2030 c->nonew = a->nonew; 2031 if (a->diag) { 2032 ierr = PetscMalloc1(m,&c->diag);CHKERRQ(ierr); 2033 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 2034 for (i=0; i<m; i++) { 2035 c->diag[i] = a->diag[i]; 2036 } 2037 } else c->diag = 0; 2038 2039 c->solve_work = 0; 2040 c->saved_values = 0; 2041 c->idiag = 0; 2042 c->ssor_work = 0; 2043 c->keepnonzeropattern = a->keepnonzeropattern; 2044 c->free_val = PETSC_TRUE; 2045 c->free_colidx = PETSC_TRUE; 2046 2047 c->maxallocmat = a->maxallocmat; 2048 c->maxallocrow = a->maxallocrow; 2049 c->rlenmax = a->rlenmax; 2050 c->nz = a->nz; 2051 C->preallocated = PETSC_TRUE; 2052 2053 c->nonzerorowcnt = a->nonzerorowcnt; 2054 C->nonzerostate = A->nonzerostate; 2055 2056 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2057 PetscFunctionReturn(0); 2058 } 2059 2060 PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B) 2061 { 2062 PetscErrorCode ierr; 2063 2064 PetscFunctionBegin; 2065 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 2066 ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr); 2067 if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) { 2068 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 2069 } 2070 ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2071 ierr = MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 2072 PetscFunctionReturn(0); 2073 } 2074 2075 /*@C 2076 MatCreateSeqSELL - Creates a sparse matrix in SELL format. 2077 2078 Collective on MPI_Comm 2079 2080 Input Parameters: 2081 + comm - MPI communicator, set to PETSC_COMM_SELF 2082 . m - number of rows 2083 . n - number of columns 2084 . rlenmax - maximum number of nonzeros in a row 2085 - rlen - array containing the number of nonzeros in the various rows 2086 (possibly different for each row) or NULL 2087 2088 Output Parameter: 2089 . A - the matrix 2090 2091 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2092 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2093 [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation] 2094 2095 Notes: 2096 If nnz is given then nz is ignored 2097 2098 Specify the preallocated storage with either rlenmax or rlen (not both). 2099 Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory 2100 allocation. For large problems you MUST preallocate memory or you 2101 will get TERRIBLE performance, see the users' manual chapter on matrices. 2102 2103 Level: intermediate 2104 2105 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatCreateSeqSELLWithArrays() 2106 2107 @*/ 2108 PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A) 2109 { 2110 PetscErrorCode ierr; 2111 2112 PetscFunctionBegin; 2113 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2114 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2115 ierr = MatSetType(*A,MATSEQSELL);CHKERRQ(ierr); 2116 ierr = MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);CHKERRQ(ierr); 2117 PetscFunctionReturn(0); 2118 } 2119 2120 PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg) 2121 { 2122 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data; 2123 PetscInt totalslices=a->totalslices; 2124 PetscErrorCode ierr; 2125 2126 PetscFunctionBegin; 2127 /* If the matrix dimensions are not equal,or no of nonzeros */ 2128 if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) { 2129 *flg = PETSC_FALSE; 2130 PetscFunctionReturn(0); 2131 } 2132 /* if the a->colidx are the same */ 2133 ierr = PetscMemcmp(a->colidx,b->colidx,a->sliidx[totalslices]*sizeof(PetscInt),flg);CHKERRQ(ierr); 2134 if (!*flg) PetscFunctionReturn(0); 2135 /* if a->val are the same */ 2136 ierr = PetscMemcmp(a->val,b->val,a->sliidx[totalslices]*sizeof(PetscScalar),flg);CHKERRQ(ierr); 2137 PetscFunctionReturn(0); 2138 } 2139 2140 PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A) 2141 { 2142 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2143 2144 PetscFunctionBegin; 2145 a->idiagvalid = PETSC_FALSE; 2146 PetscFunctionReturn(0); 2147 } 2148 2149 PetscErrorCode MatConjugate_SeqSELL(Mat A) 2150 { 2151 #if defined(PETSC_USE_COMPLEX) 2152 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2153 PetscInt i; 2154 PetscScalar *val = a->val; 2155 2156 PetscFunctionBegin; 2157 for (i=0; i<a->sliidx[a->totalslices]; i++) { 2158 val[i] = PetscConj(val[i]); 2159 } 2160 #else 2161 PetscFunctionBegin; 2162 #endif 2163 PetscFunctionReturn(0); 2164 } 2165