1 2 /* 3 Defines the basic matrix operations for the SELL matrix storage format. 4 */ 5 #include <../src/mat/impls/sell/seq/sell.h> /*I "petscmat.h" I*/ 6 #include <petscblaslapack.h> 7 #include <petsc/private/kernels/blocktranspose.h> 8 #if defined(PETSC_HAVE_IMMINTRIN_H) && (defined(__AVX512F__) || (defined(__AVX2__) && defined(__FMA__)) || defined(__AVX__)) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 9 10 #include <immintrin.h> 11 12 #if !defined(_MM_SCALE_8) 13 #define _MM_SCALE_8 8 14 #endif 15 16 #if defined(__AVX512F__) 17 /* these do not work 18 vec_idx = _mm512_loadunpackhi_epi32(vec_idx,acolidx); 19 vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval); 20 */ 21 #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 22 /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \ 23 vec_idx = _mm256_loadu_si256((__m256i const*)acolidx); \ 24 vec_vals = _mm512_loadu_pd(aval); \ 25 vec_x = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \ 26 vec_y = _mm512_fmadd_pd(vec_x,vec_vals,vec_y) 27 #elif defined(__AVX2__) && defined(__FMA__) 28 #define AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 29 vec_vals = _mm256_loadu_pd(aval); \ 30 vec_idx = _mm_loadu_si128((__m128i const*)acolidx); /* SSE2 */ \ 31 vec_x = _mm256_i32gather_pd(x,vec_idx,_MM_SCALE_8); \ 32 vec_y = _mm256_fmadd_pd(vec_x,vec_vals,vec_y) 33 #endif 34 #endif /* PETSC_HAVE_IMMINTRIN_H */ 35 36 /*@C 37 MatSeqSELLSetPreallocation - For good matrix assembly performance 38 the user should preallocate the matrix storage by setting the parameter nz 39 (or the array nnz). By setting these parameters accurately, performance 40 during matrix assembly can be increased significantly. 41 42 Collective on MPI_Comm 43 44 Input Parameters: 45 + B - The matrix 46 . nz - number of nonzeros per row (same for all rows) 47 - nnz - array containing the number of nonzeros in the various rows 48 (possibly different for each row) or NULL 49 50 Notes: 51 If nnz is given then nz is ignored. 52 53 Specify the preallocated storage with either nz or nnz (not both). 54 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 55 allocation. For large problems you MUST preallocate memory or you 56 will get TERRIBLE performance, see the users' manual chapter on matrices. 57 58 You can call MatGetInfo() to get information on how effective the preallocation was; 59 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 60 You can also run with the option -info and look for messages with the string 61 malloc in them to see if additional memory allocation was needed. 62 63 Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix 64 entries or columns indices. 65 66 The maximum number of nonzeos in any row should be as accuate as possible. 67 If it is underesitmated, you will get bad performance due to reallocation 68 (MatSeqXSELLReallocateSELL). 69 70 Level: intermediate 71 72 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo() 73 74 @*/ 75 PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[]) 76 { 77 PetscErrorCode ierr; 78 79 PetscFunctionBegin; 80 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 81 PetscValidType(B,1); 82 ierr = PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));CHKERRQ(ierr); 83 PetscFunctionReturn(0); 84 } 85 86 PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[]) 87 { 88 Mat_SeqSELL *b; 89 PetscInt i,j,totalslices; 90 PetscBool skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE; 91 PetscErrorCode ierr; 92 93 PetscFunctionBegin; 94 if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE; 95 if (maxallocrow == MAT_SKIP_ALLOCATION) { 96 skipallocation = PETSC_TRUE; 97 maxallocrow = 0; 98 } 99 100 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 101 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 102 103 /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */ 104 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5; 105 if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow); 106 if (rlen) { 107 for (i=0; i<B->rmap->n; i++) { 108 if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]); 109 if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n); 110 } 111 } 112 113 B->preallocated = PETSC_TRUE; 114 115 b = (Mat_SeqSELL*)B->data; 116 117 totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */ 118 b->totalslices = totalslices; 119 if (!skipallocation) { 120 if (B->rmap->n & 0x07) {ierr = PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n);CHKERRQ(ierr);} 121 122 if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */ 123 ierr = PetscMalloc1(totalslices+1,&b->sliidx);CHKERRQ(ierr); 124 ierr = PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 125 } 126 if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */ 127 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10; 128 else if (maxallocrow < 0) maxallocrow = 1; 129 for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow; 130 } else { 131 maxallocrow = 0; 132 b->sliidx[0] = 0; 133 for (i=1; i<totalslices; i++) { 134 b->sliidx[i] = 0; 135 for (j=0;j<8;j++) { 136 b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]); 137 } 138 maxallocrow = PetscMax(b->sliidx[i],maxallocrow); 139 b->sliidx[i] = b->sliidx[i-1] + 8*b->sliidx[i]; 140 } 141 /* last slice */ 142 b->sliidx[totalslices] = 0; 143 for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]); 144 maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow); 145 b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices]; 146 } 147 148 /* allocate space for val, colidx, rlen */ 149 /* FIXME: should B's old memory be unlogged? */ 150 ierr = MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);CHKERRQ(ierr); 151 /* FIXME: assuming an element of the bit array takes 8 bits */ 152 ierr = PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);CHKERRQ(ierr); 153 ierr = PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 154 /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */ 155 ierr = PetscCalloc1(8*totalslices,&b->rlen);CHKERRQ(ierr); 156 ierr = PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));CHKERRQ(ierr); 157 158 b->singlemalloc = PETSC_TRUE; 159 b->free_val = PETSC_TRUE; 160 b->free_colidx = PETSC_TRUE; 161 } else { 162 b->free_val = PETSC_FALSE; 163 b->free_colidx = PETSC_FALSE; 164 } 165 166 b->nz = 0; 167 b->maxallocrow = maxallocrow; 168 b->rlenmax = maxallocrow; 169 b->maxallocmat = b->sliidx[totalslices]; 170 B->info.nz_unneeded = (double)b->maxallocmat; 171 if (realalloc) { 172 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 173 } 174 PetscFunctionReturn(0); 175 } 176 177 PetscErrorCode MatGetRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 178 { 179 Mat_SeqSELL *a = (Mat_SeqSELL*)A->data; 180 PetscInt shift; 181 182 PetscFunctionBegin; 183 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 184 if (nz) *nz = a->rlen[row]; 185 shift = a->sliidx[row>>3]+(row&0x07); 186 if (!a->getrowcols) { 187 PetscErrorCode ierr; 188 189 ierr = PetscMalloc2(a->rlenmax,&a->getrowcols,a->rlenmax,&a->getrowvals);CHKERRQ(ierr); 190 } 191 if (idx) { 192 PetscInt j; 193 for (j=0; j<a->rlen[row]; j++) a->getrowcols[j] = a->colidx[shift+8*j]; 194 *idx = a->getrowcols; 195 } 196 if (v) { 197 PetscInt j; 198 for (j=0; j<a->rlen[row]; j++) a->getrowvals[j] = a->val[shift+8*j]; 199 *v = a->getrowvals; 200 } 201 PetscFunctionReturn(0); 202 } 203 204 PetscErrorCode MatRestoreRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 205 { 206 PetscFunctionBegin; 207 PetscFunctionReturn(0); 208 } 209 210 PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 211 { 212 Mat B; 213 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 214 PetscInt i; 215 PetscErrorCode ierr; 216 217 PetscFunctionBegin; 218 if (reuse == MAT_REUSE_MATRIX) { 219 B = *newmat; 220 ierr = MatZeroEntries(B);CHKERRQ(ierr); 221 } else { 222 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 223 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 224 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 225 ierr = MatSeqAIJSetPreallocation(B,0,a->rlen);CHKERRQ(ierr); 226 } 227 228 for (i=0; i<A->rmap->n; i++) { 229 PetscInt nz = 0,*cols = NULL; 230 PetscScalar *vals = NULL; 231 232 ierr = MatGetRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 233 ierr = MatSetValues(B,1,&i,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 234 ierr = MatRestoreRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 235 } 236 237 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 238 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 239 B->rmap->bs = A->rmap->bs; 240 241 if (reuse == MAT_INPLACE_MATRIX) { 242 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 243 } else { 244 *newmat = B; 245 } 246 PetscFunctionReturn(0); 247 } 248 249 #include <../src/mat/impls/aij/seq/aij.h> 250 251 PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 252 { 253 Mat B; 254 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 255 PetscInt *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols; 256 const PetscInt *cols; 257 const PetscScalar *vals; 258 PetscErrorCode ierr; 259 260 PetscFunctionBegin; 261 if (A->rmap->bs > 1) { 262 ierr = MatConvert_Basic(A,newtype,reuse,newmat);CHKERRQ(ierr); 263 PetscFunctionReturn(0); 264 } 265 266 if (reuse == MAT_REUSE_MATRIX) { 267 B = *newmat; 268 } else { 269 /* Can we just use ilen? */ 270 ierr = PetscMalloc1(m,&rowlengths);CHKERRQ(ierr); 271 for (i=0; i<m; i++) { 272 rowlengths[i] = ai[i+1] - ai[i]; 273 } 274 275 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 276 ierr = MatSetSizes(B,m,n,m,n);CHKERRQ(ierr); 277 ierr = MatSetType(B,MATSEQSELL);CHKERRQ(ierr); 278 ierr = MatSeqSELLSetPreallocation(B,0,rowlengths);CHKERRQ(ierr); 279 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 280 } 281 282 for (row=0; row<m; row++) { 283 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 284 ierr = MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 285 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 286 } 287 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 288 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 289 B->rmap->bs = A->rmap->bs; 290 291 if (reuse == MAT_INPLACE_MATRIX) { 292 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 293 } else { 294 *newmat = B; 295 } 296 PetscFunctionReturn(0); 297 } 298 299 PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy) 300 { 301 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 302 PetscScalar *y; 303 const PetscScalar *x; 304 const MatScalar *aval=a->val; 305 PetscInt totalslices=a->totalslices; 306 const PetscInt *acolidx=a->colidx; 307 PetscInt i,j; 308 PetscErrorCode ierr; 309 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 310 __m512d vec_x,vec_y,vec_vals; 311 __m256i vec_idx; 312 __mmask8 mask; 313 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 314 __m256i vec_idx2,vec_idx3,vec_idx4; 315 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 316 __m128i vec_idx; 317 __m256d vec_x,vec_y,vec_y2,vec_vals; 318 MatScalar yval; 319 PetscInt r,rows_left,row,nnz_in_row; 320 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 321 __m128d vec_x_tmp; 322 __m256d vec_x,vec_y,vec_y2,vec_vals; 323 MatScalar yval; 324 PetscInt r,rows_left,row,nnz_in_row; 325 #else 326 PetscScalar sum[8]; 327 #endif 328 329 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 330 #pragma disjoint(*x,*y,*aval) 331 #endif 332 333 PetscFunctionBegin; 334 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 335 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 336 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 337 for (i=0; i<totalslices; i++) { /* loop over slices */ 338 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 339 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 340 341 vec_y = _mm512_setzero_pd(); 342 vec_y2 = _mm512_setzero_pd(); 343 vec_y3 = _mm512_setzero_pd(); 344 vec_y4 = _mm512_setzero_pd(); 345 346 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 347 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 348 case 3: 349 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 350 acolidx += 8; aval += 8; 351 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 352 acolidx += 8; aval += 8; 353 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 354 acolidx += 8; aval += 8; 355 j += 3; 356 break; 357 case 2: 358 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 359 acolidx += 8; aval += 8; 360 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 361 acolidx += 8; aval += 8; 362 j += 2; 363 break; 364 case 1: 365 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 366 acolidx += 8; aval += 8; 367 j += 1; 368 break; 369 } 370 #pragma novector 371 for (; j<(a->sliidx[i+1]>>3); j+=4) { 372 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 373 acolidx += 8; aval += 8; 374 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 375 acolidx += 8; aval += 8; 376 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 377 acolidx += 8; aval += 8; 378 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 379 acolidx += 8; aval += 8; 380 } 381 382 vec_y = _mm512_add_pd(vec_y,vec_y2); 383 vec_y = _mm512_add_pd(vec_y,vec_y3); 384 vec_y = _mm512_add_pd(vec_y,vec_y4); 385 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 386 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 387 _mm512_mask_storeu_pd(&y[8*i],mask,vec_y); 388 } else { 389 _mm512_storeu_pd(&y[8*i],vec_y); 390 } 391 } 392 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 393 for (i=0; i<totalslices; i++) { /* loop over full slices */ 394 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 395 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 396 397 /* last slice may have padding rows. Don't use vectorization. */ 398 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 399 rows_left = A->rmap->n - 8*i; 400 for (r=0; r<rows_left; ++r) { 401 yval = (MatScalar)0; 402 row = 8*i + r; 403 nnz_in_row = a->rlen[row]; 404 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 405 y[row] = yval; 406 } 407 break; 408 } 409 410 vec_y = _mm256_setzero_pd(); 411 vec_y2 = _mm256_setzero_pd(); 412 413 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 414 #pragma novector 415 #pragma unroll(2) 416 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 417 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 418 aval += 4; acolidx += 4; 419 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y2); 420 aval += 4; acolidx += 4; 421 } 422 423 _mm256_storeu_pd(y+i*8,vec_y); 424 _mm256_storeu_pd(y+i*8+4,vec_y2); 425 } 426 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 427 for (i=0; i<totalslices; i++) { /* loop over full slices */ 428 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 429 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 430 431 vec_y = _mm256_setzero_pd(); 432 vec_y2 = _mm256_setzero_pd(); 433 434 /* last slice may have padding rows. Don't use vectorization. */ 435 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 436 rows_left = A->rmap->n - 8*i; 437 for (r=0; r<rows_left; ++r) { 438 yval = (MatScalar)0; 439 row = 8*i + r; 440 nnz_in_row = a->rlen[row]; 441 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]]; 442 y[row] = yval; 443 } 444 break; 445 } 446 447 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 448 #pragma novector 449 #pragma unroll(2) 450 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 451 vec_vals = _mm256_loadu_pd(aval); 452 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 453 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 454 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 455 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 456 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 457 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 458 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 459 aval += 4; 460 461 vec_vals = _mm256_loadu_pd(aval); 462 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 463 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 464 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 465 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 466 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 467 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 468 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 469 aval += 4; 470 } 471 472 _mm256_storeu_pd(y + i*8, vec_y); 473 _mm256_storeu_pd(y + i*8 + 4, vec_y2); 474 } 475 #else 476 for (i=0; i<totalslices; i++) { /* loop over slices */ 477 for (j=0; j<8; j++) sum[j] = 0.0; 478 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 479 sum[0] += aval[j] * x[acolidx[j]]; 480 sum[1] += aval[j+1] * x[acolidx[j+1]]; 481 sum[2] += aval[j+2] * x[acolidx[j+2]]; 482 sum[3] += aval[j+3] * x[acolidx[j+3]]; 483 sum[4] += aval[j+4] * x[acolidx[j+4]]; 484 sum[5] += aval[j+5] * x[acolidx[j+5]]; 485 sum[6] += aval[j+6] * x[acolidx[j+6]]; 486 sum[7] += aval[j+7] * x[acolidx[j+7]]; 487 } 488 if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */ 489 for(j=0; j<(A->rmap->n & 0x07); j++) y[8*i+j] = sum[j]; 490 } else { 491 for(j=0; j<8; j++) y[8*i+j] = sum[j]; 492 } 493 } 494 #endif 495 496 ierr = PetscLogFlops(2.0*a->nz-a->nonzerorowcnt);CHKERRQ(ierr); /* theoretical minimal FLOPs */ 497 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 498 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 499 PetscFunctionReturn(0); 500 } 501 502 #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h> 503 PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz) 504 { 505 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 506 PetscScalar *y,*z; 507 const PetscScalar *x; 508 const MatScalar *aval=a->val; 509 PetscInt totalslices=a->totalslices; 510 const PetscInt *acolidx=a->colidx; 511 PetscInt i,j; 512 PetscErrorCode ierr; 513 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 514 __m512d vec_x,vec_y,vec_vals; 515 __m256i vec_idx; 516 __mmask8 mask; 517 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 518 __m256i vec_idx2,vec_idx3,vec_idx4; 519 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 520 __m128d vec_x_tmp; 521 __m256d vec_x,vec_y,vec_y2,vec_vals; 522 MatScalar yval; 523 PetscInt r,row,nnz_in_row; 524 #else 525 PetscScalar sum[8]; 526 #endif 527 528 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 529 #pragma disjoint(*x,*y,*aval) 530 #endif 531 532 PetscFunctionBegin; 533 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 534 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 535 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 536 for (i=0; i<totalslices; i++) { /* loop over slices */ 537 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 538 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 539 540 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 541 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 542 vec_y = _mm512_mask_loadu_pd(vec_y,mask,&y[8*i]); 543 } else { 544 vec_y = _mm512_loadu_pd(&y[8*i]); 545 } 546 vec_y2 = _mm512_setzero_pd(); 547 vec_y3 = _mm512_setzero_pd(); 548 vec_y4 = _mm512_setzero_pd(); 549 550 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 551 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 552 case 3: 553 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 554 acolidx += 8; aval += 8; 555 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 556 acolidx += 8; aval += 8; 557 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 558 acolidx += 8; aval += 8; 559 j += 3; 560 break; 561 case 2: 562 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 563 acolidx += 8; aval += 8; 564 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 565 acolidx += 8; aval += 8; 566 j += 2; 567 break; 568 case 1: 569 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 570 acolidx += 8; aval += 8; 571 j += 1; 572 break; 573 } 574 #pragma novector 575 for (; j<(a->sliidx[i+1]>>3); j+=4) { 576 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 577 acolidx += 8; aval += 8; 578 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 579 acolidx += 8; aval += 8; 580 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 581 acolidx += 8; aval += 8; 582 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 583 acolidx += 8; aval += 8; 584 } 585 586 vec_y = _mm512_add_pd(vec_y,vec_y2); 587 vec_y = _mm512_add_pd(vec_y,vec_y3); 588 vec_y = _mm512_add_pd(vec_y,vec_y4); 589 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 590 _mm512_mask_storeu_pd(&z[8*i],mask,vec_y); 591 } else { 592 _mm512_storeu_pd(&z[8*i],vec_y); 593 } 594 } 595 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 596 for (i=0; i<totalslices; i++) { /* loop over full slices */ 597 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 598 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 599 600 /* last slice may have padding rows. Don't use vectorization. */ 601 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 602 for (r=0; r<(A->rmap->n & 0x07); ++r) { 603 row = 8*i + r; 604 yval = (MatScalar)0.0; 605 nnz_in_row = a->rlen[row]; 606 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 607 z[row] = y[row] + yval; 608 } 609 break; 610 } 611 612 vec_y = _mm256_loadu_pd(y+8*i); 613 vec_y2 = _mm256_loadu_pd(y+8*i+4); 614 615 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 616 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 617 vec_vals = _mm256_loadu_pd(aval); 618 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 619 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 620 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 621 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 622 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 623 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 624 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 625 aval += 4; 626 627 vec_vals = _mm256_loadu_pd(aval); 628 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 629 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 630 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 631 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 632 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 633 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 634 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 635 aval += 4; 636 } 637 638 _mm256_storeu_pd(z+i*8,vec_y); 639 _mm256_storeu_pd(z+i*8+4,vec_y2); 640 } 641 #else 642 for (i=0; i<totalslices; i++) { /* loop over slices */ 643 for (j=0; j<8; j++) sum[j] = 0.0; 644 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 645 sum[0] += aval[j] * x[acolidx[j]]; 646 sum[1] += aval[j+1] * x[acolidx[j+1]]; 647 sum[2] += aval[j+2] * x[acolidx[j+2]]; 648 sum[3] += aval[j+3] * x[acolidx[j+3]]; 649 sum[4] += aval[j+4] * x[acolidx[j+4]]; 650 sum[5] += aval[j+5] * x[acolidx[j+5]]; 651 sum[6] += aval[j+6] * x[acolidx[j+6]]; 652 sum[7] += aval[j+7] * x[acolidx[j+7]]; 653 } 654 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 655 for (j=0; j<(A->rmap->n & 0x07); j++) z[8*i+j] = y[8*i+j] + sum[j]; 656 } else { 657 for (j=0; j<8; j++) z[8*i+j] = y[8*i+j] + sum[j]; 658 } 659 } 660 #endif 661 662 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 663 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 664 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 665 PetscFunctionReturn(0); 666 } 667 668 PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy) 669 { 670 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 671 PetscScalar *y; 672 const PetscScalar *x; 673 const MatScalar *aval=a->val; 674 const PetscInt *acolidx=a->colidx; 675 PetscInt i,j,r,row,nnz_in_row,totalslices=a->totalslices; 676 PetscErrorCode ierr; 677 678 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 679 #pragma disjoint(*x,*y,*aval) 680 #endif 681 682 PetscFunctionBegin; 683 if (A->symmetric) { 684 ierr = MatMultAdd_SeqSELL(A,xx,zz,yy);CHKERRQ(ierr); 685 PetscFunctionReturn(0); 686 } 687 if (zz != yy) { ierr = VecCopy(zz,yy);CHKERRQ(ierr); } 688 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 689 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 690 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 691 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 692 for (r=0; r<(A->rmap->n & 0x07); ++r) { 693 row = 8*i + r; 694 nnz_in_row = a->rlen[row]; 695 for (j=0; j<nnz_in_row; ++j) y[acolidx[8*j+r]] += aval[8*j+r] * x[row]; 696 } 697 break; 698 } 699 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 700 y[acolidx[j]] += aval[j] * x[8*i]; 701 y[acolidx[j+1]] += aval[j+1] * x[8*i+1]; 702 y[acolidx[j+2]] += aval[j+2] * x[8*i+2]; 703 y[acolidx[j+3]] += aval[j+3] * x[8*i+3]; 704 y[acolidx[j+4]] += aval[j+4] * x[8*i+4]; 705 y[acolidx[j+5]] += aval[j+5] * x[8*i+5]; 706 y[acolidx[j+6]] += aval[j+6] * x[8*i+6]; 707 y[acolidx[j+7]] += aval[j+7] * x[8*i+7]; 708 } 709 } 710 ierr = PetscLogFlops(2.0*a->sliidx[a->totalslices]);CHKERRQ(ierr); 711 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 712 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 713 PetscFunctionReturn(0); 714 } 715 716 PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy) 717 { 718 PetscErrorCode ierr; 719 720 PetscFunctionBegin; 721 if (A->symmetric) { 722 ierr = MatMult_SeqSELL(A,xx,yy);CHKERRQ(ierr); 723 } else { 724 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 725 ierr = MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);CHKERRQ(ierr); 726 } 727 PetscFunctionReturn(0); 728 } 729 730 /* 731 Checks for missing diagonals 732 */ 733 PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool *missing,PetscInt *d) 734 { 735 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 736 PetscInt *diag,i; 737 PetscErrorCode ierr; 738 739 PetscFunctionBegin; 740 *missing = PETSC_FALSE; 741 if (A->rmap->n > 0 && !(a->colidx)) { 742 *missing = PETSC_TRUE; 743 if (d) *d = 0; 744 ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 745 } else { 746 diag = a->diag; 747 for (i=0; i<A->rmap->n; i++) { 748 if (diag[i] == -1) { 749 *missing = PETSC_TRUE; 750 if (d) *d = i; 751 ierr = PetscInfo1(A,"Matrix is missing diagonal number %D\n",i);CHKERRQ(ierr); 752 break; 753 } 754 } 755 } 756 PetscFunctionReturn(0); 757 } 758 759 PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A) 760 { 761 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 762 PetscInt i,j,m=A->rmap->n,shift; 763 PetscErrorCode ierr; 764 765 PetscFunctionBegin; 766 if (!a->diag) { 767 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 768 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 769 a->free_diag = PETSC_TRUE; 770 } 771 for (i=0; i<m; i++) { /* loop over rows */ 772 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 773 a->diag[i] = -1; 774 for (j=0; j<a->rlen[i]; j++) { 775 if (a->colidx[shift+j*8] == i) { 776 a->diag[i] = shift+j*8; 777 break; 778 } 779 } 780 } 781 PetscFunctionReturn(0); 782 } 783 784 /* 785 Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways 786 */ 787 PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift) 788 { 789 Mat_SeqSELL *a=(Mat_SeqSELL*) A->data; 790 PetscInt i,*diag,m = A->rmap->n; 791 MatScalar *val = a->val; 792 PetscScalar *idiag,*mdiag; 793 PetscErrorCode ierr; 794 795 PetscFunctionBegin; 796 if (a->idiagvalid) PetscFunctionReturn(0); 797 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 798 diag = a->diag; 799 if (!a->idiag) { 800 ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr); 801 ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr); 802 val = a->val; 803 } 804 mdiag = a->mdiag; 805 idiag = a->idiag; 806 807 if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) { 808 for (i=0; i<m; i++) { 809 mdiag[i] = val[diag[i]]; 810 if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */ 811 if (PetscRealPart(fshift)) { 812 ierr = PetscInfo1(A,"Zero diagonal on row %D\n",i);CHKERRQ(ierr); 813 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 814 A->factorerror_zeropivot_value = 0.0; 815 A->factorerror_zeropivot_row = i; 816 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i); 817 } 818 idiag[i] = 1.0/val[diag[i]]; 819 } 820 ierr = PetscLogFlops(m);CHKERRQ(ierr); 821 } else { 822 for (i=0; i<m; i++) { 823 mdiag[i] = val[diag[i]]; 824 idiag[i] = omega/(fshift + val[diag[i]]); 825 } 826 ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr); 827 } 828 a->idiagvalid = PETSC_TRUE; 829 PetscFunctionReturn(0); 830 } 831 832 PetscErrorCode MatZeroEntries_SeqSELL(Mat A) 833 { 834 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 835 PetscErrorCode ierr; 836 837 PetscFunctionBegin; 838 ierr = PetscMemzero(a->val,(a->sliidx[a->totalslices])*sizeof(PetscScalar));CHKERRQ(ierr); 839 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 840 PetscFunctionReturn(0); 841 } 842 843 PetscErrorCode MatDestroy_SeqSELL(Mat A) 844 { 845 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 846 PetscErrorCode ierr; 847 848 PetscFunctionBegin; 849 #if defined(PETSC_USE_LOG) 850 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz); 851 #endif 852 ierr = MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);CHKERRQ(ierr); 853 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 854 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 855 ierr = PetscFree(a->diag);CHKERRQ(ierr); 856 ierr = PetscFree(a->rlen);CHKERRQ(ierr); 857 ierr = PetscFree(a->sliidx);CHKERRQ(ierr); 858 ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr); 859 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 860 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 861 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 862 ierr = PetscFree2(a->getrowcols,a->getrowvals);CHKERRQ(ierr); 863 864 ierr = PetscFree(A->data);CHKERRQ(ierr); 865 866 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 867 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 868 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 869 #if defined(PETSC_HAVE_ELEMENTAL) 870 #endif 871 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);CHKERRQ(ierr); 872 PetscFunctionReturn(0); 873 } 874 875 PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg) 876 { 877 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 878 PetscErrorCode ierr; 879 880 PetscFunctionBegin; 881 switch (op) { 882 case MAT_ROW_ORIENTED: 883 a->roworiented = flg; 884 break; 885 case MAT_KEEP_NONZERO_PATTERN: 886 a->keepnonzeropattern = flg; 887 break; 888 case MAT_NEW_NONZERO_LOCATIONS: 889 a->nonew = (flg ? 0 : 1); 890 break; 891 case MAT_NEW_NONZERO_LOCATION_ERR: 892 a->nonew = (flg ? -1 : 0); 893 break; 894 case MAT_NEW_NONZERO_ALLOCATION_ERR: 895 a->nonew = (flg ? -2 : 0); 896 break; 897 case MAT_UNUSED_NONZERO_LOCATION_ERR: 898 a->nounused = (flg ? -1 : 0); 899 break; 900 case MAT_NEW_DIAGONALS: 901 case MAT_IGNORE_OFF_PROC_ENTRIES: 902 case MAT_USE_HASH_TABLE: 903 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 904 break; 905 case MAT_SPD: 906 case MAT_SYMMETRIC: 907 case MAT_STRUCTURALLY_SYMMETRIC: 908 case MAT_HERMITIAN: 909 case MAT_SYMMETRY_ETERNAL: 910 /* These options are handled directly by MatSetOption() */ 911 break; 912 default: 913 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 914 } 915 PetscFunctionReturn(0); 916 } 917 918 PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v) 919 { 920 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 921 PetscInt i,j,n,shift; 922 PetscScalar *x,zero=0.0; 923 PetscErrorCode ierr; 924 925 PetscFunctionBegin; 926 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 927 if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 928 929 if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) { 930 PetscInt *diag=a->diag; 931 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 932 for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]]; 933 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 934 PetscFunctionReturn(0); 935 } 936 937 ierr = VecSet(v,zero);CHKERRQ(ierr); 938 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 939 for (i=0; i<n; i++) { /* loop over rows */ 940 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 941 x[i] = 0; 942 for (j=0; j<a->rlen[i]; j++) { 943 if (a->colidx[shift+j*8] == i) { 944 x[i] = a->val[shift+j*8]; 945 break; 946 } 947 } 948 } 949 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 950 PetscFunctionReturn(0); 951 } 952 953 PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr) 954 { 955 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 956 const PetscScalar *l,*r; 957 PetscInt i,j,m,n,row; 958 PetscErrorCode ierr; 959 960 PetscFunctionBegin; 961 if (ll) { 962 /* The local size is used so that VecMPI can be passed to this routine 963 by MatDiagonalScale_MPISELL */ 964 ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr); 965 if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 966 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 967 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 968 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 969 a->val[j] *= l[8*i+row]; 970 } 971 } 972 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 973 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 974 } 975 if (rr) { 976 ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr); 977 if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 978 ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr); 979 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 980 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) { 981 a->val[j] *= r[a->colidx[j]]; 982 } 983 } 984 ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr); 985 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 986 } 987 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 988 PetscFunctionReturn(0); 989 } 990 991 extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 992 993 PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 994 { 995 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 996 PetscInt *cp,i,k,low,high,t,row,col,l; 997 PetscInt shift; 998 MatScalar *vp; 999 1000 PetscFunctionBegin; 1001 for (k=0; k<m; k++) { /* loop over requested rows */ 1002 row = im[k]; 1003 if (row<0) continue; 1004 #if defined(PETSC_USE_DEBUG) 1005 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 1006 #endif 1007 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1008 cp = a->colidx+shift; /* pointer to the row */ 1009 vp = a->val+shift; /* pointer to the row */ 1010 for (l=0; l<n; l++) { /* loop over requested columns */ 1011 col = in[l]; 1012 if (col<0) continue; 1013 #if defined(PETSC_USE_DEBUG) 1014 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1); 1015 #endif 1016 high = a->rlen[row]; low = 0; /* assume unsorted */ 1017 while (high-low > 5) { 1018 t = (low+high)/2; 1019 if (*(cp+t*8) > col) high = t; 1020 else low = t; 1021 } 1022 for (i=low; i<high; i++) { 1023 if (*(cp+8*i) > col) break; 1024 if (*(cp+8*i) == col) { 1025 *v++ = *(vp+8*i); 1026 goto finished; 1027 } 1028 } 1029 *v++ = 0.0; 1030 finished:; 1031 } 1032 } 1033 PetscFunctionReturn(0); 1034 } 1035 1036 PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer) 1037 { 1038 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1039 PetscInt i,j,m=A->rmap->n,shift; 1040 const char *name; 1041 PetscViewerFormat format; 1042 PetscErrorCode ierr; 1043 1044 PetscFunctionBegin; 1045 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1046 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1047 PetscInt nofinalvalue = 0; 1048 /* 1049 if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) { 1050 nofinalvalue = 1; 1051 } 1052 */ 1053 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1054 ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);CHKERRQ(ierr); 1055 ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);CHKERRQ(ierr); 1056 #if defined(PETSC_USE_COMPLEX) 1057 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 1058 #else 1059 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 1060 #endif 1061 ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr); 1062 1063 for (i=0; i<m; i++) { 1064 shift = a->sliidx[i>>3]+(i&0x07); 1065 for (j=0; j<a->rlen[i]; j++) { 1066 #if defined(PETSC_USE_COMPLEX) 1067 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1068 #else 1069 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1070 #endif 1071 } 1072 } 1073 /* 1074 if (nofinalvalue) { 1075 #if defined(PETSC_USE_COMPLEX) 1076 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr); 1077 #else 1078 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr); 1079 #endif 1080 } 1081 */ 1082 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 1083 ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr); 1084 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1085 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 1086 PetscFunctionReturn(0); 1087 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1088 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1089 for (i=0; i<m; i++) { 1090 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1091 shift = a->sliidx[i>>3]+(i&0x07); 1092 for (j=0; j<a->rlen[i]; j++) { 1093 #if defined(PETSC_USE_COMPLEX) 1094 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1095 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1096 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1097 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1098 } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) { 1099 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1100 } 1101 #else 1102 if (a->val[shift+8*j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);} 1103 #endif 1104 } 1105 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1106 } 1107 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1108 } else if (format == PETSC_VIEWER_ASCII_DENSE) { 1109 PetscInt cnt=0,jcnt; 1110 PetscScalar value; 1111 #if defined(PETSC_USE_COMPLEX) 1112 PetscBool realonly=PETSC_TRUE; 1113 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1114 if (PetscImaginaryPart(a->val[i]) != 0.0) { 1115 realonly = PETSC_FALSE; 1116 break; 1117 } 1118 } 1119 #endif 1120 1121 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1122 for (i=0; i<m; i++) { 1123 jcnt = 0; 1124 shift = a->sliidx[i>>3]+(i&0x07); 1125 for (j=0; j<A->cmap->n; j++) { 1126 if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) { 1127 value = a->val[cnt++]; 1128 jcnt++; 1129 } else { 1130 value = 0.0; 1131 } 1132 #if defined(PETSC_USE_COMPLEX) 1133 if (realonly) { 1134 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr); 1135 } else { 1136 ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr); 1137 } 1138 #else 1139 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr); 1140 #endif 1141 } 1142 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1143 } 1144 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1145 } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) { 1146 PetscInt fshift=1; 1147 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1148 #if defined(PETSC_USE_COMPLEX) 1149 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr); 1150 #else 1151 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr); 1152 #endif 1153 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);CHKERRQ(ierr); 1154 for (i=0; i<m; i++) { 1155 shift = a->sliidx[i>>3]+(i&0x07); 1156 for (j=0; j<a->rlen[i]; j++) { 1157 #if defined(PETSC_USE_COMPLEX) 1158 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1159 #else 1160 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1161 #endif 1162 } 1163 } 1164 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1165 } else if (format == PETSC_VIEWER_NATIVE) { 1166 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 1167 PetscInt row; 1168 ierr = PetscViewerASCIIPrintf(viewer,"slice %D: %D %D\n",i,a->sliidx[i],a->sliidx[i+1]);CHKERRQ(ierr); 1169 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 1170 #if defined(PETSC_USE_COMPLEX) 1171 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1172 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g + %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1173 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1174 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g - %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),-(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1175 } else { 1176 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1177 } 1178 #else 1179 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g\n",8*i+row,a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1180 #endif 1181 } 1182 } 1183 } else { 1184 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1185 if (A->factortype) { 1186 for (i=0; i<m; i++) { 1187 shift = a->sliidx[i>>3]+(i&0x07); 1188 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1189 /* L part */ 1190 for (j=shift; j<a->diag[i]; j+=8) { 1191 #if defined(PETSC_USE_COMPLEX) 1192 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) { 1193 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1194 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) { 1195 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1196 } else { 1197 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1198 } 1199 #else 1200 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1201 #endif 1202 } 1203 /* diagonal */ 1204 j = a->diag[i]; 1205 #if defined(PETSC_USE_COMPLEX) 1206 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1207 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));CHKERRQ(ierr); 1208 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1209 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));CHKERRQ(ierr); 1210 } else { 1211 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));CHKERRQ(ierr); 1212 } 1213 #else 1214 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));CHKERRQ(ierr); 1215 #endif 1216 1217 /* U part */ 1218 for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) { 1219 #if defined(PETSC_USE_COMPLEX) 1220 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1221 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1222 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1223 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1224 } else { 1225 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1226 } 1227 #else 1228 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1229 #endif 1230 } 1231 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1232 } 1233 } else { 1234 for (i=0; i<m; i++) { 1235 shift = a->sliidx[i>>3]+(i&0x07); 1236 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1237 for (j=0; j<a->rlen[i]; j++) { 1238 #if defined(PETSC_USE_COMPLEX) 1239 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1240 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1241 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1242 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1243 } else { 1244 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1245 } 1246 #else 1247 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr); 1248 #endif 1249 } 1250 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1251 } 1252 } 1253 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1254 } 1255 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #include <petscdraw.h> 1260 PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa) 1261 { 1262 Mat A=(Mat)Aa; 1263 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1264 PetscInt i,j,m=A->rmap->n,shift; 1265 int color; 1266 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1267 PetscViewer viewer; 1268 PetscViewerFormat format; 1269 PetscErrorCode ierr; 1270 1271 PetscFunctionBegin; 1272 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1273 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1274 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1275 1276 /* loop over matrix elements drawing boxes */ 1277 1278 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1279 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1280 /* Blue for negative, Cyan for zero and Red for positive */ 1281 color = PETSC_DRAW_BLUE; 1282 for (i=0; i<m; i++) { 1283 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1284 y_l = m - i - 1.0; y_r = y_l + 1.0; 1285 for (j=0; j<a->rlen[i]; j++) { 1286 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1287 if (PetscRealPart(a->val[shift+8*j]) >= 0.) continue; 1288 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1289 } 1290 } 1291 color = PETSC_DRAW_CYAN; 1292 for (i=0; i<m; i++) { 1293 shift = a->sliidx[i>>3]+(i&0x07); 1294 y_l = m - i - 1.0; y_r = y_l + 1.0; 1295 for (j=0; j<a->rlen[i]; j++) { 1296 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1297 if (a->val[shift+8*j] != 0.) continue; 1298 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1299 } 1300 } 1301 color = PETSC_DRAW_RED; 1302 for (i=0; i<m; i++) { 1303 shift = a->sliidx[i>>3]+(i&0x07); 1304 y_l = m - i - 1.0; y_r = y_l + 1.0; 1305 for (j=0; j<a->rlen[i]; j++) { 1306 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1307 if (PetscRealPart(a->val[shift+8*j]) <= 0.) continue; 1308 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1309 } 1310 } 1311 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1312 } else { 1313 /* use contour shading to indicate magnitude of values */ 1314 /* first determine max of all nonzero values */ 1315 PetscReal minv=0.0,maxv=0.0; 1316 PetscInt count=0; 1317 PetscDraw popup; 1318 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1319 if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]); 1320 } 1321 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1322 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1323 ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr); 1324 1325 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1326 for (i=0; i<m; i++) { 1327 shift = a->sliidx[i>>3]+(i&0x07); 1328 y_l = m - i - 1.0; 1329 y_r = y_l + 1.0; 1330 for (j=0; j<a->rlen[i]; j++) { 1331 x_l = a->colidx[shift+j*8]; 1332 x_r = x_l + 1.0; 1333 color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv); 1334 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1335 count++; 1336 } 1337 } 1338 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1339 } 1340 PetscFunctionReturn(0); 1341 } 1342 1343 #include <petscdraw.h> 1344 PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer) 1345 { 1346 PetscDraw draw; 1347 PetscReal xr,yr,xl,yl,h,w; 1348 PetscBool isnull; 1349 PetscErrorCode ierr; 1350 1351 PetscFunctionBegin; 1352 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1353 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1354 if (isnull) PetscFunctionReturn(0); 1355 1356 xr = A->cmap->n; yr = A->rmap->n; h = yr/10.0; w = xr/10.0; 1357 xr += w; yr += h; xl = -w; yl = -h; 1358 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1359 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1360 ierr = PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);CHKERRQ(ierr); 1361 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1362 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer) 1367 { 1368 PetscBool iascii,isbinary,isdraw; 1369 PetscErrorCode ierr; 1370 1371 PetscFunctionBegin; 1372 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1373 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1374 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1375 if (iascii) { 1376 ierr = MatView_SeqSELL_ASCII(A,viewer);CHKERRQ(ierr); 1377 } else if (isbinary) { 1378 /* ierr = MatView_SeqSELL_Binary(A,viewer);CHKERRQ(ierr); */ 1379 } else if (isdraw) { 1380 ierr = MatView_SeqSELL_Draw(A,viewer);CHKERRQ(ierr); 1381 } 1382 PetscFunctionReturn(0); 1383 } 1384 1385 PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode) 1386 { 1387 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1388 PetscInt i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k; 1389 MatScalar *vp; 1390 PetscErrorCode ierr; 1391 1392 PetscFunctionBegin; 1393 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1394 /* To do: compress out the unused elements */ 1395 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 1396 ierr = PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);CHKERRQ(ierr); 1397 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr); 1398 ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);CHKERRQ(ierr); 1399 /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */ 1400 for (i=0; i<a->totalslices; ++i) { 1401 shift = a->sliidx[i]; /* starting index of the slice */ 1402 cp = a->colidx+shift; /* pointer to the column indices of the slice */ 1403 vp = a->val+shift; /* pointer to the nonzero values of the slice */ 1404 for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */ 1405 row = 8*i + row_in_slice; 1406 nrow = a->rlen[row]; /* number of nonzeros in row */ 1407 /* 1408 Search for the nearest nonzero. Normally setting the index to zero may cause extra communication. 1409 But if the entire slice are empty, it is fine to use 0 since the index will not be loaded. 1410 */ 1411 lastcol = 0; 1412 if (nrow>0) { /* nonempty row */ 1413 lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */ 1414 } else if (!row_in_slice) { /* first row of the currect slice is empty */ 1415 for (j=1;j<8;j++) { 1416 if (a->rlen[8*i+j]) { 1417 lastcol = cp[j]; 1418 break; 1419 } 1420 } 1421 } else { 1422 if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */ 1423 } 1424 1425 for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) { 1426 cp[8*k+row_in_slice] = lastcol; 1427 vp[8*k+row_in_slice] = (MatScalar)0; 1428 } 1429 } 1430 } 1431 1432 A->info.mallocs += a->reallocs; 1433 a->reallocs = 0; 1434 1435 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1436 PetscFunctionReturn(0); 1437 } 1438 1439 PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info) 1440 { 1441 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1442 1443 PetscFunctionBegin; 1444 info->block_size = 1.0; 1445 info->nz_allocated = (double)a->maxallocmat; 1446 info->nz_used = (double)a->sliidx[a->totalslices]; /* include padding zeros */ 1447 info->nz_unneeded = (double)(a->maxallocmat-a->sliidx[a->totalslices]); 1448 info->assemblies = (double)A->num_ass; 1449 info->mallocs = (double)A->info.mallocs; 1450 info->memory = ((PetscObject)A)->mem; 1451 if (A->factortype) { 1452 info->fill_ratio_given = A->info.fill_ratio_given; 1453 info->fill_ratio_needed = A->info.fill_ratio_needed; 1454 info->factor_mallocs = A->info.factor_mallocs; 1455 } else { 1456 info->fill_ratio_given = 0; 1457 info->fill_ratio_needed = 0; 1458 info->factor_mallocs = 0; 1459 } 1460 PetscFunctionReturn(0); 1461 } 1462 1463 PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1464 { 1465 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1466 PetscInt shift,i,k,l,low,high,t,ii,row,col,nrow; 1467 PetscInt *cp,nonew=a->nonew,lastcol=-1; 1468 MatScalar *vp,value; 1469 PetscErrorCode ierr; 1470 1471 PetscFunctionBegin; 1472 for (k=0; k<m; k++) { /* loop over added rows */ 1473 row = im[k]; 1474 if (row < 0) continue; 1475 #if defined(PETSC_USE_DEBUG) 1476 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 1477 #endif 1478 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1479 cp = a->colidx+shift; /* pointer to the row */ 1480 vp = a->val+shift; /* pointer to the row */ 1481 nrow = a->rlen[row]; 1482 low = 0; 1483 high = nrow; 1484 1485 for (l=0; l<n; l++) { /* loop over added columns */ 1486 col = in[l]; 1487 if (col<0) continue; 1488 #if defined(PETSC_USE_DEBUG) 1489 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1); 1490 #endif 1491 if (a->roworiented) { 1492 value = v[l+k*n]; 1493 } else { 1494 value = v[k+l*m]; 1495 } 1496 if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue; 1497 1498 /* search in this row for the specified colmun, i indicates the column to be set */ 1499 if (col <= lastcol) low = 0; 1500 else high = nrow; 1501 lastcol = col; 1502 while (high-low > 5) { 1503 t = (low+high)/2; 1504 if (*(cp+t*8) > col) high = t; 1505 else low = t; 1506 } 1507 for (i=low; i<high; i++) { 1508 if (*(cp+i*8) > col) break; 1509 if (*(cp+i*8) == col) { 1510 if (is == ADD_VALUES) *(vp+i*8) += value; 1511 else *(vp+i*8) = value; 1512 low = i + 1; 1513 goto noinsert; 1514 } 1515 } 1516 if (value == 0.0 && a->ignorezeroentries) goto noinsert; 1517 if (nonew == 1) goto noinsert; 1518 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1519 /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */ 1520 MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar); 1521 /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */ 1522 for (ii=nrow-1; ii>=i; ii--) { 1523 *(cp+(ii+1)*8) = *(cp+ii*8); 1524 *(vp+(ii+1)*8) = *(vp+ii*8); 1525 } 1526 a->rlen[row]++; 1527 *(cp+i*8) = col; 1528 *(vp+i*8) = value; 1529 a->nz++; 1530 A->nonzerostate++; 1531 low = i+1; high++; nrow++; 1532 noinsert:; 1533 } 1534 a->rlen[row] = nrow; 1535 } 1536 PetscFunctionReturn(0); 1537 } 1538 1539 PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str) 1540 { 1541 PetscErrorCode ierr; 1542 1543 PetscFunctionBegin; 1544 /* If the two matrices have the same copy implementation, use fast copy. */ 1545 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1546 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1547 Mat_SeqSELL *b=(Mat_SeqSELL*)B->data; 1548 1549 if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1550 ierr = PetscMemcpy(b->val,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1551 } else { 1552 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1553 } 1554 PetscFunctionReturn(0); 1555 } 1556 1557 PetscErrorCode MatSetUp_SeqSELL(Mat A) 1558 { 1559 PetscErrorCode ierr; 1560 1561 PetscFunctionBegin; 1562 ierr = MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,0);CHKERRQ(ierr); 1563 PetscFunctionReturn(0); 1564 } 1565 1566 PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[]) 1567 { 1568 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1569 1570 PetscFunctionBegin; 1571 *array = a->val; 1572 PetscFunctionReturn(0); 1573 } 1574 1575 PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[]) 1576 { 1577 PetscFunctionBegin; 1578 PetscFunctionReturn(0); 1579 } 1580 1581 PetscErrorCode MatRealPart_SeqSELL(Mat A) 1582 { 1583 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1584 PetscInt i; 1585 MatScalar *aval=a->val; 1586 1587 PetscFunctionBegin; 1588 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]); 1589 PetscFunctionReturn(0); 1590 } 1591 1592 PetscErrorCode MatImaginaryPart_SeqSELL(Mat A) 1593 { 1594 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1595 PetscInt i; 1596 MatScalar *aval=a->val; 1597 PetscErrorCode ierr; 1598 1599 PetscFunctionBegin; 1600 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]); 1601 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1602 PetscFunctionReturn(0); 1603 } 1604 1605 PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha) 1606 { 1607 Mat_SeqSELL *a=(Mat_SeqSELL*)inA->data; 1608 MatScalar *aval=a->val; 1609 PetscScalar oalpha=alpha; 1610 PetscBLASInt one=1,size; 1611 PetscErrorCode ierr; 1612 1613 PetscFunctionBegin; 1614 ierr = PetscBLASIntCast(a->sliidx[a->totalslices],&size);CHKERRQ(ierr); 1615 PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one)); 1616 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1617 ierr = MatSeqSELLInvalidateDiagonal(inA);CHKERRQ(ierr); 1618 PetscFunctionReturn(0); 1619 } 1620 1621 PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a) 1622 { 1623 Mat_SeqSELL *y=(Mat_SeqSELL*)Y->data; 1624 PetscErrorCode ierr; 1625 1626 PetscFunctionBegin; 1627 if (!Y->preallocated || !y->nz) { 1628 ierr = MatSeqSELLSetPreallocation(Y,1,NULL);CHKERRQ(ierr); 1629 } 1630 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1631 PetscFunctionReturn(0); 1632 } 1633 1634 PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1635 { 1636 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1637 PetscScalar *x,sum,*t; 1638 const MatScalar *idiag=0,*mdiag; 1639 const PetscScalar *b,*xb; 1640 PetscInt n,m=A->rmap->n,i,j,shift; 1641 const PetscInt *diag; 1642 PetscErrorCode ierr; 1643 1644 PetscFunctionBegin; 1645 its = its*lits; 1646 1647 if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */ 1648 if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqSELL(A,omega,fshift);CHKERRQ(ierr);} 1649 a->fshift = fshift; 1650 a->omega = omega; 1651 1652 diag = a->diag; 1653 t = a->ssor_work; 1654 idiag = a->idiag; 1655 mdiag = a->mdiag; 1656 1657 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1658 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1659 /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */ 1660 if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented"); 1661 if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented"); 1662 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 1663 1664 if (flag & SOR_ZERO_INITIAL_GUESS) { 1665 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1666 for (i=0; i<m; i++) { 1667 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1668 sum = b[i]; 1669 n = (diag[i]-shift)/8; 1670 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1671 t[i] = sum; 1672 x[i] = sum*idiag[i]; 1673 } 1674 xb = t; 1675 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1676 } else xb = b; 1677 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1678 for (i=m-1; i>=0; i--) { 1679 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1680 sum = xb[i]; 1681 n = a->rlen[i]-(diag[i]-shift)/8-1; 1682 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1683 if (xb == b) { 1684 x[i] = sum*idiag[i]; 1685 } else { 1686 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1687 } 1688 } 1689 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1690 } 1691 its--; 1692 } 1693 while (its--) { 1694 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1695 for (i=0; i<m; i++) { 1696 /* lower */ 1697 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1698 sum = b[i]; 1699 n = (diag[i]-shift)/8; 1700 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1701 t[i] = sum; /* save application of the lower-triangular part */ 1702 /* upper */ 1703 n = a->rlen[i]-(diag[i]-shift)/8-1; 1704 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1705 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1706 } 1707 xb = t; 1708 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1709 } else xb = b; 1710 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1711 for (i=m-1; i>=0; i--) { 1712 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1713 sum = xb[i]; 1714 if (xb == b) { 1715 /* whole matrix (no checkpointing available) */ 1716 n = a->rlen[i]; 1717 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1718 x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i]; 1719 } else { /* lower-triangular part has been saved, so only apply upper-triangular */ 1720 n = a->rlen[i]-(diag[i]-shift)/8-1; 1721 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1722 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1723 } 1724 } 1725 if (xb == b) { 1726 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1727 } else { 1728 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1729 } 1730 } 1731 } 1732 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1733 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1734 PetscFunctionReturn(0); 1735 } 1736 1737 /* -------------------------------------------------------------------*/ 1738 static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL, 1739 MatGetRow_SeqSELL, 1740 MatRestoreRow_SeqSELL, 1741 MatMult_SeqSELL, 1742 /* 4*/ MatMultAdd_SeqSELL, 1743 MatMultTranspose_SeqSELL, 1744 MatMultTransposeAdd_SeqSELL, 1745 0, 1746 0, 1747 0, 1748 /* 10*/ 0, 1749 0, 1750 0, 1751 MatSOR_SeqSELL, 1752 0, 1753 /* 15*/ MatGetInfo_SeqSELL, 1754 MatEqual_SeqSELL, 1755 MatGetDiagonal_SeqSELL, 1756 MatDiagonalScale_SeqSELL, 1757 0, 1758 /* 20*/ 0, 1759 MatAssemblyEnd_SeqSELL, 1760 MatSetOption_SeqSELL, 1761 MatZeroEntries_SeqSELL, 1762 /* 24*/ 0, 1763 0, 1764 0, 1765 0, 1766 0, 1767 /* 29*/ MatSetUp_SeqSELL, 1768 0, 1769 0, 1770 0, 1771 0, 1772 /* 34*/ MatDuplicate_SeqSELL, 1773 0, 1774 0, 1775 0, 1776 0, 1777 /* 39*/ 0, 1778 0, 1779 0, 1780 MatGetValues_SeqSELL, 1781 MatCopy_SeqSELL, 1782 /* 44*/ 0, 1783 MatScale_SeqSELL, 1784 MatShift_SeqSELL, 1785 0, 1786 0, 1787 /* 49*/ 0, 1788 0, 1789 0, 1790 0, 1791 0, 1792 /* 54*/ MatFDColoringCreate_SeqXAIJ, 1793 0, 1794 0, 1795 0, 1796 0, 1797 /* 59*/ 0, 1798 MatDestroy_SeqSELL, 1799 MatView_SeqSELL, 1800 0, 1801 0, 1802 /* 64*/ 0, 1803 0, 1804 0, 1805 0, 1806 0, 1807 /* 69*/ 0, 1808 0, 1809 0, 1810 0, 1811 0, 1812 /* 74*/ 0, 1813 MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */ 1814 0, 1815 0, 1816 0, 1817 /* 79*/ 0, 1818 0, 1819 0, 1820 0, 1821 0, 1822 /* 84*/ 0, 1823 0, 1824 0, 1825 0, 1826 0, 1827 /* 89*/ 0, 1828 0, 1829 0, 1830 0, 1831 0, 1832 /* 94*/ 0, 1833 0, 1834 0, 1835 0, 1836 0, 1837 /* 99*/ 0, 1838 0, 1839 0, 1840 MatConjugate_SeqSELL, 1841 0, 1842 /*104*/ 0, 1843 0, 1844 0, 1845 0, 1846 0, 1847 /*109*/ 0, 1848 0, 1849 0, 1850 0, 1851 MatMissingDiagonal_SeqSELL, 1852 /*114*/ 0, 1853 0, 1854 0, 1855 0, 1856 0, 1857 /*119*/ 0, 1858 0, 1859 0, 1860 0, 1861 0, 1862 /*124*/ 0, 1863 0, 1864 0, 1865 0, 1866 0, 1867 /*129*/ 0, 1868 0, 1869 0, 1870 0, 1871 0, 1872 /*134*/ 0, 1873 0, 1874 0, 1875 0, 1876 0, 1877 /*139*/ 0, 1878 0, 1879 0, 1880 MatFDColoringSetUp_SeqXAIJ, 1881 0, 1882 /*144*/0 1883 }; 1884 1885 PetscErrorCode MatStoreValues_SeqSELL(Mat mat) 1886 { 1887 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1888 PetscErrorCode ierr; 1889 1890 PetscFunctionBegin; 1891 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1892 1893 /* allocate space for values if not already there */ 1894 if (!a->saved_values) { 1895 ierr = PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);CHKERRQ(ierr); 1896 ierr = PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));CHKERRQ(ierr); 1897 } 1898 1899 /* copy values over */ 1900 ierr = PetscMemcpy(a->saved_values,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1901 PetscFunctionReturn(0); 1902 } 1903 1904 PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat) 1905 { 1906 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1907 PetscErrorCode ierr; 1908 1909 PetscFunctionBegin; 1910 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1911 if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1912 /* copy values over */ 1913 ierr = PetscMemcpy(a->val,a->saved_values,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1914 PetscFunctionReturn(0); 1915 } 1916 1917 /*@C 1918 MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray() 1919 1920 Not Collective 1921 1922 Input Parameters: 1923 . mat - a MATSEQSELL matrix 1924 . array - pointer to the data 1925 1926 Level: intermediate 1927 1928 .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90() 1929 @*/ 1930 PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array) 1931 { 1932 PetscErrorCode ierr; 1933 1934 PetscFunctionBegin; 1935 ierr = PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 1936 PetscFunctionReturn(0); 1937 } 1938 1939 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B) 1940 { 1941 Mat_SeqSELL *b; 1942 PetscMPIInt size; 1943 PetscErrorCode ierr; 1944 1945 PetscFunctionBegin; 1946 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 1947 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1"); 1948 1949 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1950 1951 B->data = (void*)b; 1952 1953 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1954 1955 b->row = 0; 1956 b->col = 0; 1957 b->icol = 0; 1958 b->reallocs = 0; 1959 b->ignorezeroentries = PETSC_FALSE; 1960 b->roworiented = PETSC_TRUE; 1961 b->nonew = 0; 1962 b->diag = 0; 1963 b->solve_work = 0; 1964 B->spptr = 0; 1965 b->saved_values = 0; 1966 b->idiag = 0; 1967 b->mdiag = 0; 1968 b->ssor_work = 0; 1969 b->omega = 1.0; 1970 b->fshift = 0.0; 1971 b->idiagvalid = PETSC_FALSE; 1972 b->keepnonzeropattern = PETSC_FALSE; 1973 1974 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);CHKERRQ(ierr); 1975 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);CHKERRQ(ierr); 1976 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);CHKERRQ(ierr); 1977 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);CHKERRQ(ierr); 1978 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);CHKERRQ(ierr); 1979 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);CHKERRQ(ierr); 1980 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);CHKERRQ(ierr); 1981 PetscFunctionReturn(0); 1982 } 1983 1984 /* 1985 Given a matrix generated with MatGetFactor() duplicates all the information in A into B 1986 */ 1987 PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 1988 { 1989 Mat_SeqSELL *c,*a=(Mat_SeqSELL*)A->data; 1990 PetscInt i,m=A->rmap->n; 1991 PetscInt totalslices=a->totalslices; 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 c = (Mat_SeqSELL*)C->data; 1996 1997 C->factortype = A->factortype; 1998 c->row = 0; 1999 c->col = 0; 2000 c->icol = 0; 2001 c->reallocs = 0; 2002 2003 C->assembled = PETSC_TRUE; 2004 2005 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 2006 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 2007 2008 ierr = PetscMalloc1(8*totalslices,&c->rlen);CHKERRQ(ierr); 2009 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 2010 ierr = PetscMalloc1(totalslices+1,&c->sliidx);CHKERRQ(ierr); 2011 ierr = PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 2012 2013 for (i=0; i<m; i++) c->rlen[i] = a->rlen[i]; 2014 for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i]; 2015 2016 /* allocate the matrix space */ 2017 if (mallocmatspace) { 2018 ierr = PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);CHKERRQ(ierr); 2019 ierr = PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2020 2021 c->singlemalloc = PETSC_TRUE; 2022 2023 if (m > 0) { 2024 ierr = PetscMemcpy(c->colidx,a->colidx,(a->maxallocmat)*sizeof(PetscInt));CHKERRQ(ierr); 2025 if (cpvalues == MAT_COPY_VALUES) { 2026 ierr = PetscMemcpy(c->val,a->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 2027 } else { 2028 ierr = PetscMemzero(c->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 2029 } 2030 } 2031 } 2032 2033 c->ignorezeroentries = a->ignorezeroentries; 2034 c->roworiented = a->roworiented; 2035 c->nonew = a->nonew; 2036 if (a->diag) { 2037 ierr = PetscMalloc1(m,&c->diag);CHKERRQ(ierr); 2038 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 2039 for (i=0; i<m; i++) { 2040 c->diag[i] = a->diag[i]; 2041 } 2042 } else c->diag = 0; 2043 2044 c->solve_work = 0; 2045 c->saved_values = 0; 2046 c->idiag = 0; 2047 c->ssor_work = 0; 2048 c->keepnonzeropattern = a->keepnonzeropattern; 2049 c->free_val = PETSC_TRUE; 2050 c->free_colidx = PETSC_TRUE; 2051 2052 c->maxallocmat = a->maxallocmat; 2053 c->maxallocrow = a->maxallocrow; 2054 c->rlenmax = a->rlenmax; 2055 c->nz = a->nz; 2056 C->preallocated = PETSC_TRUE; 2057 2058 c->nonzerorowcnt = a->nonzerorowcnt; 2059 C->nonzerostate = A->nonzerostate; 2060 2061 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2062 PetscFunctionReturn(0); 2063 } 2064 2065 PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B) 2066 { 2067 PetscErrorCode ierr; 2068 2069 PetscFunctionBegin; 2070 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 2071 ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr); 2072 if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) { 2073 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 2074 } 2075 ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2076 ierr = MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 2077 PetscFunctionReturn(0); 2078 } 2079 2080 /*@C 2081 MatCreateSeqSELL - Creates a sparse matrix in SELL format. 2082 2083 Collective on MPI_Comm 2084 2085 Input Parameters: 2086 + comm - MPI communicator, set to PETSC_COMM_SELF 2087 . m - number of rows 2088 . n - number of columns 2089 . rlenmax - maximum number of nonzeros in a row 2090 - rlen - array containing the number of nonzeros in the various rows 2091 (possibly different for each row) or NULL 2092 2093 Output Parameter: 2094 . A - the matrix 2095 2096 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2097 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2098 [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation] 2099 2100 Notes: 2101 If nnz is given then nz is ignored 2102 2103 Specify the preallocated storage with either rlenmax or rlen (not both). 2104 Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory 2105 allocation. For large problems you MUST preallocate memory or you 2106 will get TERRIBLE performance, see the users' manual chapter on matrices. 2107 2108 Level: intermediate 2109 2110 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatCreateSeqSELLWithArrays() 2111 2112 @*/ 2113 PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A) 2114 { 2115 PetscErrorCode ierr; 2116 2117 PetscFunctionBegin; 2118 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2119 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2120 ierr = MatSetType(*A,MATSEQSELL);CHKERRQ(ierr); 2121 ierr = MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);CHKERRQ(ierr); 2122 PetscFunctionReturn(0); 2123 } 2124 2125 PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg) 2126 { 2127 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data; 2128 PetscInt totalslices=a->totalslices; 2129 PetscErrorCode ierr; 2130 2131 PetscFunctionBegin; 2132 /* If the matrix dimensions are not equal,or no of nonzeros */ 2133 if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) { 2134 *flg = PETSC_FALSE; 2135 PetscFunctionReturn(0); 2136 } 2137 /* if the a->colidx are the same */ 2138 ierr = PetscMemcmp(a->colidx,b->colidx,a->sliidx[totalslices]*sizeof(PetscInt),flg);CHKERRQ(ierr); 2139 if (!*flg) PetscFunctionReturn(0); 2140 /* if a->val are the same */ 2141 ierr = PetscMemcmp(a->val,b->val,a->sliidx[totalslices]*sizeof(PetscScalar),flg);CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A) 2146 { 2147 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2148 2149 PetscFunctionBegin; 2150 a->idiagvalid = PETSC_FALSE; 2151 PetscFunctionReturn(0); 2152 } 2153 2154 PetscErrorCode MatConjugate_SeqSELL(Mat A) 2155 { 2156 #if defined(PETSC_USE_COMPLEX) 2157 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2158 PetscInt i; 2159 PetscScalar *val = a->val; 2160 2161 PetscFunctionBegin; 2162 for (i=0; i<a->sliidx[a->totalslices]; i++) { 2163 val[i] = PetscConj(val[i]); 2164 } 2165 #else 2166 PetscFunctionBegin; 2167 #endif 2168 PetscFunctionReturn(0); 2169 } 2170