1 2 /* 3 Defines the basic matrix operations for the SELL matrix storage format. 4 */ 5 #include <../src/mat/impls/sell/seq/sell.h> /*I "petscmat.h" I*/ 6 #include <petscblaslapack.h> 7 #include <petsc/private/kernels/blocktranspose.h> 8 #if defined(PETSC_HAVE_IMMINTRIN_H) 9 #include <immintrin.h> 10 11 #if !defined(_MM_SCALE_8) 12 #define _MM_SCALE_8 8 13 #endif 14 15 #if defined(__AVX512F__) 16 /* these do not work 17 vec_idx = _mm512_loadunpackhi_epi32(vec_idx,acolidx); 18 vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval); 19 */ 20 #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 21 /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \ 22 vec_idx = _mm256_load_si256((__m256i const*)acolidx); \ 23 vec_vals = _mm512_load_pd(aval); \ 24 vec_x = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \ 25 vec_y = _mm512_fmadd_pd(vec_x,vec_vals,vec_y) 26 #elif defined(__AVX2__) 27 #define AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 28 vec_vals = _mm256_load_pd(aval); \ 29 vec_idx = _mm_load_si128((__m128i const*)acolidx); /* SSE2 */ \ 30 vec_x = _mm256_i32gather_pd(x,vec_idx,_MM_SCALE_8); \ 31 vec_y = _mm256_fmadd_pd(vec_x,vec_vals,vec_y) 32 #endif 33 #endif /* PETSC_HAVE_IMMINTRIN_H */ 34 35 /*@C 36 MatSeqSELLSetPreallocation - For good matrix assembly performance 37 the user should preallocate the matrix storage by setting the parameter nz 38 (or the array nnz). By setting these parameters accurately, performance 39 during matrix assembly can be increased significantly. 40 41 Collective on MPI_Comm 42 43 Input Parameters: 44 + B - The matrix 45 . nz - number of nonzeros per row (same for all rows) 46 - nnz - array containing the number of nonzeros in the various rows 47 (possibly different for each row) or NULL 48 49 Notes: 50 If nnz is given then nz is ignored. 51 52 Specify the preallocated storage with either nz or nnz (not both). 53 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 54 allocation. For large problems you MUST preallocate memory or you 55 will get TERRIBLE performance, see the users' manual chapter on matrices. 56 57 You can call MatGetInfo() to get information on how effective the preallocation was; 58 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 59 You can also run with the option -info and look for messages with the string 60 malloc in them to see if additional memory allocation was needed. 61 62 Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix 63 entries or columns indices. 64 65 The maximum number of nonzeos in any row should be as accuate as possible. 66 If it is underesitmated, you will get bad performance due to reallocation 67 (MatSeqXSELLReallocateSELL). 68 69 Level: intermediate 70 71 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo() 72 73 @*/ 74 PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[]) 75 { 76 PetscErrorCode ierr; 77 78 PetscFunctionBegin; 79 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 80 PetscValidType(B,1); 81 ierr = PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));CHKERRQ(ierr); 82 PetscFunctionReturn(0); 83 } 84 85 PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[]) 86 { 87 Mat_SeqSELL *b; 88 PetscInt i,j,totalslices; 89 PetscBool skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE; 90 PetscErrorCode ierr; 91 92 PetscFunctionBegin; 93 if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE; 94 if (maxallocrow == MAT_SKIP_ALLOCATION) { 95 skipallocation = PETSC_TRUE; 96 maxallocrow = 0; 97 } 98 99 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 100 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 101 102 /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */ 103 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5; 104 if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow); 105 if (rlen) { 106 for (i=0; i<B->rmap->n; i++) { 107 if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]); 108 if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n); 109 } 110 } 111 112 B->preallocated = PETSC_TRUE; 113 114 b = (Mat_SeqSELL*)B->data; 115 116 totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */ 117 b->totalslices = totalslices; 118 if (!skipallocation) { 119 if (B->rmap->n & 0x07) PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n); 120 121 if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */ 122 ierr = PetscMalloc1(totalslices+1,&b->sliidx);CHKERRQ(ierr); 123 ierr = PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 124 } 125 if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */ 126 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10; 127 else if (maxallocrow < 0) maxallocrow = 1; 128 for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow; 129 } else { 130 maxallocrow = 0; 131 b->sliidx[0] = 0; 132 for (i=1; i<totalslices; i++) { 133 b->sliidx[i] = 0; 134 for (j=0;j<8;j++) { 135 b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]); 136 } 137 maxallocrow = PetscMax(b->sliidx[i],maxallocrow); 138 b->sliidx[i] = b->sliidx[i-1] + 8*b->sliidx[i]; 139 } 140 /* last slice */ 141 b->sliidx[totalslices] = 0; 142 for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]); 143 maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow); 144 b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices]; 145 } 146 147 /* allocate space for val, colidx, rlen */ 148 /* FIXME: should B's old memory be unlogged? */ 149 ierr = MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);CHKERRQ(ierr); 150 /* FIXME: assuming an element of the bit array takes 8 bits */ 151 ierr = PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);CHKERRQ(ierr); 152 ierr = PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 153 /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */ 154 ierr = PetscCalloc1(8*totalslices,&b->rlen);CHKERRQ(ierr); 155 ierr = PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));CHKERRQ(ierr); 156 157 b->singlemalloc = PETSC_TRUE; 158 b->free_val = PETSC_TRUE; 159 b->free_colidx = PETSC_TRUE; 160 } else { 161 b->free_val = PETSC_FALSE; 162 b->free_colidx = PETSC_FALSE; 163 } 164 165 b->nz = 0; 166 b->maxallocrow = maxallocrow; 167 b->rlenmax = maxallocrow; 168 b->maxallocmat = b->sliidx[totalslices]; 169 B->info.nz_unneeded = (double)b->maxallocmat; 170 if (realalloc) { 171 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 172 } 173 PetscFunctionReturn(0); 174 } 175 176 PetscErrorCode MatGetRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 177 { 178 Mat_SeqSELL *a = (Mat_SeqSELL*)A->data; 179 PetscInt shift; 180 181 PetscFunctionBegin; 182 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 183 if (nz) *nz = a->rlen[row]; 184 shift = a->sliidx[row>>3]+(row&0x07); 185 if (!a->getrowcols) { 186 PetscErrorCode ierr; 187 188 ierr = PetscMalloc2(a->rlenmax,&a->getrowcols,a->rlenmax,&a->getrowvals);CHKERRQ(ierr); 189 } 190 if (idx) { 191 PetscInt j; 192 for (j=0; j<a->rlen[row]; j++) a->getrowcols[j] = a->colidx[shift+8*j]; 193 *idx = a->getrowcols; 194 } 195 if (v) { 196 PetscInt j; 197 for (j=0; j<a->rlen[row]; j++) a->getrowvals[j] = a->val[shift+8*j]; 198 *v = a->getrowvals; 199 } 200 PetscFunctionReturn(0); 201 } 202 203 PetscErrorCode MatRestoreRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 204 { 205 PetscFunctionBegin; 206 PetscFunctionReturn(0); 207 } 208 209 PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 210 { 211 Mat B; 212 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 213 PetscInt i,j,row; 214 PetscBool isnonzero; 215 PetscErrorCode ierr; 216 217 PetscFunctionBegin; 218 if (reuse == MAT_REUSE_MATRIX) { 219 B = *newmat; 220 } else { 221 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 222 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 223 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 224 ierr = MatSeqAIJSetPreallocation(B,0,a->rlen);CHKERRQ(ierr); 225 ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 226 } 227 228 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 229 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 230 isnonzero = (PetscBool)((j-a->sliidx[i])/8 < a->rlen[8*i+row]); 231 if (isnonzero) { 232 ierr = MatSetValue(B,8*i+row,a->colidx[j],a->val[j],INSERT_VALUES);CHKERRQ(ierr); 233 } 234 } 235 } 236 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 237 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 238 B->rmap->bs = A->rmap->bs; 239 240 if (reuse == MAT_INPLACE_MATRIX) { 241 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 242 } else { 243 *newmat = B; 244 } 245 PetscFunctionReturn(0); 246 } 247 248 #include <../src/mat/impls/aij/seq/aij.h> 249 250 PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 251 { 252 Mat B; 253 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 254 PetscInt *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols; 255 const PetscInt *cols; 256 const PetscScalar *vals; 257 PetscErrorCode ierr; 258 259 PetscFunctionBegin; 260 if (A->rmap->bs > 1) { 261 ierr = MatConvert_Basic(A,newtype,reuse,newmat);CHKERRQ(ierr); 262 PetscFunctionReturn(0); 263 } 264 265 if (reuse == MAT_REUSE_MATRIX) { 266 B = *newmat; 267 } else { 268 /* Can we just use ilen? */ 269 ierr = PetscMalloc1(m,&rowlengths);CHKERRQ(ierr); 270 for (i=0; i<m; i++) { 271 rowlengths[i] = ai[i+1] - ai[i]; 272 } 273 274 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 275 ierr = MatSetSizes(B,m,n,m,n);CHKERRQ(ierr); 276 ierr = MatSetType(B,MATSEQSELL);CHKERRQ(ierr); 277 ierr = MatSeqSELLSetPreallocation(B,0,rowlengths);CHKERRQ(ierr); 278 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 279 } 280 281 ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 282 283 for (row=0; row<m; row++) { 284 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 285 ierr = MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 286 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 287 } 288 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 289 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 290 B->rmap->bs = A->rmap->bs; 291 292 if (reuse == MAT_INPLACE_MATRIX) { 293 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 294 } else { 295 *newmat = B; 296 } 297 PetscFunctionReturn(0); 298 } 299 300 PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy) 301 { 302 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 303 PetscScalar *y; 304 const PetscScalar *x; 305 const MatScalar *aval=a->val; 306 PetscInt totalslices=a->totalslices; 307 const PetscInt *acolidx=a->colidx; 308 PetscInt i,j; 309 PetscErrorCode ierr; 310 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 311 __m512d vec_x,vec_y,vec_vals; 312 __m256i vec_idx; 313 __mmask8 mask; 314 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 315 __m256i vec_idx2,vec_idx3,vec_idx4; 316 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 317 __m128i vec_idx; 318 __m256d vec_x,vec_y,vec_y2,vec_vals; 319 MatScalar yval; 320 PetscInt r,rows_left,row,nnz_in_row; 321 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 322 __m128d vec_x_tmp; 323 __m256d vec_x,vec_y,vec_y2,vec_vals; 324 MatScalar yval; 325 PetscInt r,rows_left,row,nnz_in_row; 326 #else 327 PetscScalar sum[8]; 328 #endif 329 330 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 331 #pragma disjoint(*x,*y,*aval) 332 #endif 333 334 PetscFunctionBegin; 335 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 336 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 337 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 338 for (i=0; i<totalslices; i++) { /* loop over slices */ 339 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 340 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 341 342 vec_y = _mm512_setzero_pd(); 343 vec_y2 = _mm512_setzero_pd(); 344 vec_y3 = _mm512_setzero_pd(); 345 vec_y4 = _mm512_setzero_pd(); 346 347 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 348 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 349 case 3: 350 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 351 acolidx += 8; aval += 8; 352 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 353 acolidx += 8; aval += 8; 354 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 355 acolidx += 8; aval += 8; 356 j += 3; 357 break; 358 case 2: 359 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 360 acolidx += 8; aval += 8; 361 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 362 acolidx += 8; aval += 8; 363 j += 2; 364 break; 365 case 1: 366 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 367 acolidx += 8; aval += 8; 368 j += 1; 369 break; 370 } 371 #pragma novector 372 for (; j<(a->sliidx[i+1]>>3); j+=4) { 373 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 374 acolidx += 8; aval += 8; 375 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 376 acolidx += 8; aval += 8; 377 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 378 acolidx += 8; aval += 8; 379 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 380 acolidx += 8; aval += 8; 381 } 382 383 vec_y = _mm512_add_pd(vec_y,vec_y2); 384 vec_y = _mm512_add_pd(vec_y,vec_y3); 385 vec_y = _mm512_add_pd(vec_y,vec_y4); 386 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 387 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 388 _mm512_mask_store_pd(&y[8*i],mask,vec_y); 389 } else { 390 _mm512_store_pd(&y[8*i],vec_y); 391 } 392 } 393 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 394 for (i=0; i<totalslices; i++) { /* loop over full slices */ 395 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 396 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 397 398 /* last slice may have padding rows. Don't use vectorization. */ 399 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 400 rows_left = A->rmap->n - 8*i; 401 for (r=0; r<rows_left; ++r) { 402 yval = (MatScalar)0; 403 row = 8*i + r; 404 nnz_in_row = a->rlen[row]; 405 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 406 y[row] = yval; 407 } 408 break; 409 } 410 411 vec_y = _mm256_setzero_pd(); 412 vec_y2 = _mm256_setzero_pd(); 413 414 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 415 #pragma novector 416 #pragma unroll(2) 417 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 418 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 419 aval += 4; acolidx += 4; 420 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y2); 421 aval += 4; acolidx += 4; 422 } 423 424 _mm256_store_pd(y+i*8,vec_y); 425 _mm256_store_pd(y+i*8+4,vec_y2); 426 } 427 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 428 for (i=0; i<totalslices; i++) { /* loop over full slices */ 429 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 430 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 431 432 vec_y = _mm256_setzero_pd(); 433 vec_y2 = _mm256_setzero_pd(); 434 435 /* last slice may have padding rows. Don't use vectorization. */ 436 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 437 rows_left = A->rmap->n - 8*i; 438 for (r=0; r<rows_left; ++r) { 439 yval = (MatScalar)0; 440 row = 8*i + r; 441 nnz_in_row = a->rlen[row]; 442 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]]; 443 y[row] = yval; 444 } 445 break; 446 } 447 448 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 449 #pragma novector 450 #pragma unroll(2) 451 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 452 vec_vals = _mm256_loadu_pd(aval); 453 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 454 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 455 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 456 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 457 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 458 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 459 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 460 aval += 4; 461 462 vec_vals = _mm256_loadu_pd(aval); 463 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 464 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 465 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 466 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 467 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 468 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 469 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 470 aval += 4; 471 } 472 473 _mm256_storeu_pd(y + i*8, vec_y); 474 _mm256_storeu_pd(y + i*8 + 4, vec_y2); 475 } 476 #else 477 for (i=0; i<totalslices; i++) { /* loop over slices */ 478 for (j=0; j<8; j++) sum[j] = 0.0; 479 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 480 sum[0] += aval[j] * x[acolidx[j]]; 481 sum[1] += aval[j+1] * x[acolidx[j+1]]; 482 sum[2] += aval[j+2] * x[acolidx[j+2]]; 483 sum[3] += aval[j+3] * x[acolidx[j+3]]; 484 sum[4] += aval[j+4] * x[acolidx[j+4]]; 485 sum[5] += aval[j+5] * x[acolidx[j+5]]; 486 sum[6] += aval[j+6] * x[acolidx[j+6]]; 487 sum[7] += aval[j+7] * x[acolidx[j+7]]; 488 } 489 if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */ 490 for(j=0; j<(A->rmap->n & 0x07); j++) y[8*i+j] = sum[j]; 491 } else { 492 for(j=0; j<8; j++) y[8*i+j] = sum[j]; 493 } 494 } 495 #endif 496 497 ierr = PetscLogFlops(2.0*a->nz-a->nonzerorowcnt);CHKERRQ(ierr); /* theoretical minimal FLOPs */ 498 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 499 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 500 PetscFunctionReturn(0); 501 } 502 503 #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h> 504 PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz) 505 { 506 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 507 PetscScalar *y,*z; 508 const PetscScalar *x; 509 const MatScalar *aval=a->val; 510 PetscInt totalslices=a->totalslices; 511 const PetscInt *acolidx=a->colidx; 512 PetscInt i,j; 513 PetscErrorCode ierr; 514 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 515 __m512d vec_x,vec_y,vec_vals; 516 __m256i vec_idx; 517 __mmask8 mask; 518 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 519 __m256i vec_idx2,vec_idx3,vec_idx4; 520 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 521 __m128d vec_x_tmp; 522 __m256d vec_x,vec_y,vec_y2,vec_vals; 523 MatScalar yval; 524 PetscInt r,row,nnz_in_row; 525 #else 526 PetscScalar sum[8]; 527 #endif 528 529 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 530 #pragma disjoint(*x,*y,*aval) 531 #endif 532 533 PetscFunctionBegin; 534 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 535 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 536 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 537 for (i=0; i<totalslices; i++) { /* loop over slices */ 538 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 539 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 540 541 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 542 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 543 vec_y = _mm512_mask_load_pd(vec_y,mask,&y[8*i]); 544 } else { 545 vec_y = _mm512_load_pd(&y[8*i]); 546 } 547 vec_y2 = _mm512_setzero_pd(); 548 vec_y3 = _mm512_setzero_pd(); 549 vec_y4 = _mm512_setzero_pd(); 550 551 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 552 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 553 case 3: 554 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 555 acolidx += 8; aval += 8; 556 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 557 acolidx += 8; aval += 8; 558 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 559 acolidx += 8; aval += 8; 560 j += 3; 561 break; 562 case 2: 563 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 564 acolidx += 8; aval += 8; 565 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 566 acolidx += 8; aval += 8; 567 j += 2; 568 break; 569 case 1: 570 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 571 acolidx += 8; aval += 8; 572 j += 1; 573 break; 574 } 575 #pragma novector 576 for (; j<(a->sliidx[i+1]>>3); j+=4) { 577 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 578 acolidx += 8; aval += 8; 579 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 580 acolidx += 8; aval += 8; 581 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 582 acolidx += 8; aval += 8; 583 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 584 acolidx += 8; aval += 8; 585 } 586 587 vec_y = _mm512_add_pd(vec_y,vec_y2); 588 vec_y = _mm512_add_pd(vec_y,vec_y3); 589 vec_y = _mm512_add_pd(vec_y,vec_y4); 590 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 591 _mm512_mask_store_pd(&z[8*i],mask,vec_y); 592 } else { 593 _mm512_store_pd(&z[8*i],vec_y); 594 } 595 } 596 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 597 for (i=0; i<totalslices; i++) { /* loop over full slices */ 598 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 599 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 600 601 /* last slice may have padding rows. Don't use vectorization. */ 602 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 603 for (r=0; r<(A->rmap->n & 0x07); ++r) { 604 row = 8*i + r; 605 yval = (MatScalar)0.0; 606 nnz_in_row = a->rlen[row]; 607 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 608 z[row] = y[row] + yval; 609 } 610 break; 611 } 612 613 vec_y = _mm256_loadu_pd(y+8*i); 614 vec_y2 = _mm256_loadu_pd(y+8*i+4); 615 616 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 617 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 618 vec_vals = _mm256_loadu_pd(aval); 619 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 620 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 621 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 622 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 623 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 624 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 625 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 626 aval += 4; 627 628 vec_vals = _mm256_loadu_pd(aval); 629 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 630 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 631 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 632 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 633 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 634 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 635 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 636 aval += 4; 637 } 638 639 _mm256_storeu_pd(z+i*8,vec_y); 640 _mm256_storeu_pd(z+i*8+4,vec_y2); 641 } 642 #else 643 for (i=0; i<totalslices; i++) { /* loop over slices */ 644 for (j=0; j<8; j++) sum[j] = 0.0; 645 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 646 sum[0] += aval[j] * x[acolidx[j]]; 647 sum[1] += aval[j+1] * x[acolidx[j+1]]; 648 sum[2] += aval[j+2] * x[acolidx[j+2]]; 649 sum[3] += aval[j+3] * x[acolidx[j+3]]; 650 sum[4] += aval[j+4] * x[acolidx[j+4]]; 651 sum[5] += aval[j+5] * x[acolidx[j+5]]; 652 sum[6] += aval[j+6] * x[acolidx[j+6]]; 653 sum[7] += aval[j+7] * x[acolidx[j+7]]; 654 } 655 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 656 for (j=0; j<(A->rmap->n & 0x07); j++) z[8*i+j] = y[8*i+j] + sum[j]; 657 } else { 658 for (j=0; j<8; j++) z[8*i+j] = y[8*i+j] + sum[j]; 659 } 660 } 661 #endif 662 663 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 664 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 665 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 666 PetscFunctionReturn(0); 667 } 668 669 PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy) 670 { 671 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 672 PetscScalar *y; 673 const PetscScalar *x; 674 const MatScalar *aval=a->val; 675 const PetscInt *acolidx=a->colidx; 676 PetscInt i,j,r,row,nnz_in_row,totalslices=a->totalslices; 677 PetscErrorCode ierr; 678 679 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 680 #pragma disjoint(*x,*y,*aval) 681 #endif 682 683 PetscFunctionBegin; 684 if (zz != yy) { ierr = VecCopy(zz,yy);CHKERRQ(ierr); } 685 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 686 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 687 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 688 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 689 for (r=0; r<(A->rmap->n & 0x07); ++r) { 690 row = 8*i + r; 691 nnz_in_row = a->rlen[row]; 692 for (j=0; j<nnz_in_row; ++j) y[acolidx[8*j+r]] += aval[8*j+r] * x[row]; 693 } 694 break; 695 } 696 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 697 y[acolidx[j]] += aval[j] * x[8*i]; 698 y[acolidx[j+1]] += aval[j+1] * x[8*i+1]; 699 y[acolidx[j+2]] += aval[j+2] * x[8*i+2]; 700 y[acolidx[j+3]] += aval[j+3] * x[8*i+3]; 701 y[acolidx[j+4]] += aval[j+4] * x[8*i+4]; 702 y[acolidx[j+5]] += aval[j+5] * x[8*i+5]; 703 y[acolidx[j+6]] += aval[j+6] * x[8*i+6]; 704 y[acolidx[j+7]] += aval[j+7] * x[8*i+7]; 705 } 706 } 707 ierr = PetscLogFlops(2.0*a->sliidx[a->totalslices]);CHKERRQ(ierr); 708 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 709 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 710 PetscFunctionReturn(0); 711 } 712 713 PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy) 714 { 715 PetscErrorCode ierr; 716 717 PetscFunctionBegin; 718 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 719 ierr = MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);CHKERRQ(ierr); 720 PetscFunctionReturn(0); 721 } 722 723 /* 724 Checks for missing diagonals 725 */ 726 PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool *missing,PetscInt *d) 727 { 728 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 729 PetscInt *diag,i; 730 731 PetscFunctionBegin; 732 *missing = PETSC_FALSE; 733 if (A->rmap->n > 0 && !(a->colidx)) { 734 *missing = PETSC_TRUE; 735 if (d) *d = 0; 736 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 737 } else { 738 diag = a->diag; 739 for (i=0; i<A->rmap->n; i++) { 740 if (diag[i] == -1) { 741 *missing = PETSC_TRUE; 742 if (d) *d = i; 743 PetscInfo1(A,"Matrix is missing diagonal number %D\n",i); 744 break; 745 } 746 } 747 } 748 PetscFunctionReturn(0); 749 } 750 751 PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A) 752 { 753 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 754 PetscInt i,j,m=A->rmap->n,shift; 755 PetscErrorCode ierr; 756 757 PetscFunctionBegin; 758 if (!a->diag) { 759 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 760 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 761 a->free_diag = PETSC_TRUE; 762 } 763 for (i=0; i<m; i++) { /* loop over rows */ 764 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 765 a->diag[i] = -1; 766 for (j=0; j<a->rlen[i]; j++) { 767 if (a->colidx[shift+j*8] == i) { 768 a->diag[i] = shift+j*8; 769 break; 770 } 771 } 772 } 773 PetscFunctionReturn(0); 774 } 775 776 /* 777 Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways 778 */ 779 PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift) 780 { 781 Mat_SeqSELL *a=(Mat_SeqSELL*) A->data; 782 PetscInt i,*diag,m = A->rmap->n; 783 MatScalar *val = a->val; 784 PetscScalar *idiag,*mdiag; 785 PetscErrorCode ierr; 786 787 PetscFunctionBegin; 788 if (a->idiagvalid) PetscFunctionReturn(0); 789 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 790 diag = a->diag; 791 if (!a->idiag) { 792 ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr); 793 ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr); 794 val = a->val; 795 } 796 mdiag = a->mdiag; 797 idiag = a->idiag; 798 799 if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) { 800 for (i=0; i<m; i++) { 801 mdiag[i] = val[diag[i]]; 802 if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */ 803 if (PetscRealPart(fshift)) { 804 ierr = PetscInfo1(A,"Zero diagonal on row %D\n",i);CHKERRQ(ierr); 805 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 806 A->factorerror_zeropivot_value = 0.0; 807 A->factorerror_zeropivot_row = i; 808 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i); 809 } 810 idiag[i] = 1.0/val[diag[i]]; 811 } 812 ierr = PetscLogFlops(m);CHKERRQ(ierr); 813 } else { 814 for (i=0; i<m; i++) { 815 mdiag[i] = val[diag[i]]; 816 idiag[i] = omega/(fshift + val[diag[i]]); 817 } 818 ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr); 819 } 820 a->idiagvalid = PETSC_TRUE; 821 PetscFunctionReturn(0); 822 } 823 824 PetscErrorCode MatZeroEntries_SeqSELL(Mat A) 825 { 826 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 827 PetscErrorCode ierr; 828 829 PetscFunctionBegin; 830 ierr = PetscMemzero(a->val,(a->sliidx[a->totalslices])*sizeof(PetscScalar));CHKERRQ(ierr); 831 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 832 PetscFunctionReturn(0); 833 } 834 835 PetscErrorCode MatDestroy_SeqSELL(Mat A) 836 { 837 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 838 PetscErrorCode ierr; 839 840 PetscFunctionBegin; 841 #if defined(PETSC_USE_LOG) 842 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz); 843 #endif 844 ierr = MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);CHKERRQ(ierr); 845 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 846 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 847 ierr = PetscFree(a->diag);CHKERRQ(ierr); 848 ierr = PetscFree(a->ibdiag);CHKERRQ(ierr); 849 ierr = PetscFree(a->rlen);CHKERRQ(ierr); 850 ierr = PetscFree(a->sliidx);CHKERRQ(ierr); 851 ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr); 852 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 853 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 854 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 855 ierr = PetscFree2(a->getrowcols,a->getrowvals);CHKERRQ(ierr); 856 857 ierr = PetscFree(A->data);CHKERRQ(ierr); 858 859 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 860 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 861 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 862 #if defined(PETSC_HAVE_ELEMENTAL) 863 #endif 864 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);CHKERRQ(ierr); 865 PetscFunctionReturn(0); 866 } 867 868 PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg) 869 { 870 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 871 PetscErrorCode ierr; 872 873 PetscFunctionBegin; 874 switch (op) { 875 case MAT_ROW_ORIENTED: 876 a->roworiented = flg; 877 break; 878 case MAT_KEEP_NONZERO_PATTERN: 879 a->keepnonzeropattern = flg; 880 break; 881 case MAT_NEW_NONZERO_LOCATIONS: 882 a->nonew = (flg ? 0 : 1); 883 break; 884 case MAT_NEW_NONZERO_LOCATION_ERR: 885 a->nonew = (flg ? -1 : 0); 886 break; 887 case MAT_NEW_NONZERO_ALLOCATION_ERR: 888 a->nonew = (flg ? -2 : 0); 889 break; 890 case MAT_UNUSED_NONZERO_LOCATION_ERR: 891 a->nounused = (flg ? -1 : 0); 892 break; 893 case MAT_NEW_DIAGONALS: 894 case MAT_IGNORE_OFF_PROC_ENTRIES: 895 case MAT_USE_HASH_TABLE: 896 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 897 break; 898 case MAT_SPD: 899 case MAT_SYMMETRIC: 900 case MAT_STRUCTURALLY_SYMMETRIC: 901 case MAT_HERMITIAN: 902 case MAT_SYMMETRY_ETERNAL: 903 /* These options are handled directly by MatSetOption() */ 904 break; 905 default: 906 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 907 } 908 PetscFunctionReturn(0); 909 } 910 911 PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v) 912 { 913 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 914 PetscInt i,j,n,shift; 915 PetscScalar *x,zero=0.0; 916 PetscErrorCode ierr; 917 918 PetscFunctionBegin; 919 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 920 if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 921 922 if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) { 923 PetscInt *diag=a->diag; 924 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 925 for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]]; 926 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 927 PetscFunctionReturn(0); 928 } 929 930 ierr = VecSet(v,zero);CHKERRQ(ierr); 931 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 932 for (i=0; i<n; i++) { /* loop over rows */ 933 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 934 x[i] = 0; 935 for (j=0; j<a->rlen[i]; j++) { 936 if (a->colidx[shift+j*8] == i) { 937 x[i] = a->val[shift+j*8]; 938 break; 939 } 940 } 941 } 942 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 943 PetscFunctionReturn(0); 944 } 945 946 PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr) 947 { 948 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 949 const PetscScalar *l,*r; 950 PetscInt i,j,m,n,row; 951 PetscErrorCode ierr; 952 953 PetscFunctionBegin; 954 if (ll) { 955 /* The local size is used so that VecMPI can be passed to this routine 956 by MatDiagonalScale_MPISELL */ 957 ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr); 958 if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 959 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 960 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 961 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 962 a->val[j] *= l[8*i+row]; 963 } 964 } 965 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 966 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 967 } 968 if (rr) { 969 ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr); 970 if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 971 ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr); 972 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 973 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) { 974 a->val[j] *= r[a->colidx[j]]; 975 } 976 } 977 ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr); 978 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 979 } 980 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 985 986 PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 987 { 988 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 989 PetscInt *cp,i,k,low,high,t,row,col,l; 990 PetscInt shift; 991 MatScalar *vp; 992 993 PetscFunctionBegin; 994 for (k=0; k<m; k++) { /* loop over added rows */ 995 row = im[k]; 996 if (row<0) continue; 997 #if defined(PETSC_USE_DEBUG) 998 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 999 #endif 1000 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1001 cp = a->colidx+shift; /* pointer to the row */ 1002 vp = a->val+shift; /* pointer to the row */ 1003 for (l=0; l<n; l++) { /* loop over added rows */ 1004 col = in[l]; 1005 if (col<0) continue; 1006 #if defined(PETSC_USE_DEBUG) 1007 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1); 1008 #endif 1009 high = a->rlen[row]; low = 0; /* assume unsorted */ 1010 while (high-low > 5) { 1011 t = (low+high)/2; 1012 if (*(cp+t*8) > col) high = t; 1013 else low = t; 1014 } 1015 for (i=low; i<high; i++) { 1016 if (*(cp+8*i) > col) break; 1017 if (*(cp+8*i) == col) { 1018 *v++ = *(vp+8*i); 1019 goto finished; 1020 } 1021 } 1022 *v++ = 0.0; 1023 finished:; 1024 } 1025 } 1026 PetscFunctionReturn(0); 1027 } 1028 1029 PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer) 1030 { 1031 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1032 PetscInt i,j,m=A->rmap->n,shift; 1033 const char *name; 1034 PetscViewerFormat format; 1035 PetscErrorCode ierr; 1036 1037 PetscFunctionBegin; 1038 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1039 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1040 PetscInt nofinalvalue = 0; 1041 /* 1042 if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) { 1043 nofinalvalue = 1; 1044 } 1045 */ 1046 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1047 ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);CHKERRQ(ierr); 1048 ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);CHKERRQ(ierr); 1049 #if defined(PETSC_USE_COMPLEX) 1050 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 1051 #else 1052 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 1053 #endif 1054 ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr); 1055 1056 for (i=0; i<m; i++) { 1057 shift = a->sliidx[i>>3]+(i&0x07); 1058 for (j=0; j<a->rlen[i]; j++) { 1059 #if defined(PETSC_USE_COMPLEX) 1060 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1061 #else 1062 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1063 #endif 1064 } 1065 } 1066 /* 1067 if (nofinalvalue) { 1068 #if defined(PETSC_USE_COMPLEX) 1069 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr); 1070 #else 1071 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr); 1072 #endif 1073 } 1074 */ 1075 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 1076 ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr); 1077 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1078 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 1079 PetscFunctionReturn(0); 1080 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1081 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1082 for (i=0; i<m; i++) { 1083 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1084 shift = a->sliidx[i>>3]+(i&0x07); 1085 for (j=0; j<a->rlen[i]; j++) { 1086 #if defined(PETSC_USE_COMPLEX) 1087 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1088 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1089 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1090 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1091 } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) { 1092 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1093 } 1094 #else 1095 if (a->val[shift+8*j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);} 1096 #endif 1097 } 1098 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1099 } 1100 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1101 } else if (format == PETSC_VIEWER_ASCII_DENSE) { 1102 PetscInt cnt=0,jcnt; 1103 PetscScalar value; 1104 #if defined(PETSC_USE_COMPLEX) 1105 PetscBool realonly=PETSC_TRUE; 1106 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1107 if (PetscImaginaryPart(a->val[i]) != 0.0) { 1108 realonly = PETSC_FALSE; 1109 break; 1110 } 1111 } 1112 #endif 1113 1114 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1115 for (i=0; i<m; i++) { 1116 jcnt = 0; 1117 shift = a->sliidx[i>>3]+(i&0x07); 1118 for (j=0; j<A->cmap->n; j++) { 1119 if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) { 1120 value = a->val[cnt++]; 1121 jcnt++; 1122 } else { 1123 value = 0.0; 1124 } 1125 #if defined(PETSC_USE_COMPLEX) 1126 if (realonly) { 1127 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr); 1128 } else { 1129 ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr); 1130 } 1131 #else 1132 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr); 1133 #endif 1134 } 1135 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1136 } 1137 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1138 } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) { 1139 PetscInt fshift=1; 1140 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1141 #if defined(PETSC_USE_COMPLEX) 1142 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr); 1143 #else 1144 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr); 1145 #endif 1146 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);CHKERRQ(ierr); 1147 for (i=0; i<m; i++) { 1148 shift = a->sliidx[i>>3]+(i&0x07); 1149 for (j=0; j<a->rlen[i]; j++) { 1150 #if defined(PETSC_USE_COMPLEX) 1151 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1152 #else 1153 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1154 #endif 1155 } 1156 } 1157 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1158 } else { 1159 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1160 if (A->factortype) { 1161 for (i=0; i<m; i++) { 1162 shift = a->sliidx[i>>3]+(i&0x07); 1163 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1164 /* L part */ 1165 for (j=shift; j<a->diag[i]; j+=8) { 1166 #if defined(PETSC_USE_COMPLEX) 1167 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) { 1168 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1169 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) { 1170 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1171 } else { 1172 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1173 } 1174 #else 1175 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1176 #endif 1177 } 1178 /* diagonal */ 1179 j = a->diag[i]; 1180 #if defined(PETSC_USE_COMPLEX) 1181 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1182 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));CHKERRQ(ierr); 1183 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1184 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));CHKERRQ(ierr); 1185 } else { 1186 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));CHKERRQ(ierr); 1187 } 1188 #else 1189 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));CHKERRQ(ierr); 1190 #endif 1191 1192 /* U part */ 1193 for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) { 1194 #if defined(PETSC_USE_COMPLEX) 1195 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1196 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1197 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1198 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1199 } else { 1200 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1201 } 1202 #else 1203 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1204 #endif 1205 } 1206 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1207 } 1208 } else { 1209 for (i=0; i<m; i++) { 1210 shift = a->sliidx[i>>3]+(i&0x07); 1211 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1212 for (j=0; j<a->rlen[i]; j++) { 1213 #if defined(PETSC_USE_COMPLEX) 1214 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1215 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1216 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1217 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1218 } else { 1219 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1220 } 1221 #else 1222 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr); 1223 #endif 1224 } 1225 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1226 } 1227 } 1228 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1229 } 1230 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #include <petscdraw.h> 1235 PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa) 1236 { 1237 Mat A=(Mat)Aa; 1238 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1239 PetscInt i,j,m=A->rmap->n,shift; 1240 int color; 1241 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1242 PetscViewer viewer; 1243 PetscViewerFormat format; 1244 PetscErrorCode ierr; 1245 1246 PetscFunctionBegin; 1247 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1248 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1249 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1250 1251 /* loop over matrix elements drawing boxes */ 1252 1253 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1254 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1255 /* Blue for negative, Cyan for zero and Red for positive */ 1256 color = PETSC_DRAW_BLUE; 1257 for (i=0; i<m; i++) { 1258 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1259 y_l = m - i - 1.0; y_r = y_l + 1.0; 1260 for (j=0; j<a->rlen[i]; j++) { 1261 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1262 if (PetscRealPart(a->val[shift+8*j]) >= 0.) continue; 1263 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1264 } 1265 } 1266 color = PETSC_DRAW_CYAN; 1267 for (i=0; i<m; i++) { 1268 shift = a->sliidx[i>>3]+(i&0x07); 1269 y_l = m - i - 1.0; y_r = y_l + 1.0; 1270 for (j=0; j<a->rlen[i]; j++) { 1271 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1272 if (a->val[shift+8*j] != 0.) continue; 1273 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1274 } 1275 } 1276 color = PETSC_DRAW_RED; 1277 for (i=0; i<m; i++) { 1278 shift = a->sliidx[i>>3]+(i&0x07); 1279 y_l = m - i - 1.0; y_r = y_l + 1.0; 1280 for (j=0; j<a->rlen[i]; j++) { 1281 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1282 if (PetscRealPart(a->val[shift+8*j]) <= 0.) continue; 1283 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1284 } 1285 } 1286 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1287 } else { 1288 /* use contour shading to indicate magnitude of values */ 1289 /* first determine max of all nonzero values */ 1290 PetscReal minv=0.0,maxv=0.0; 1291 PetscInt count=0; 1292 PetscDraw popup; 1293 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1294 if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]); 1295 } 1296 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1297 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1298 ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr); 1299 1300 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1301 for (i=0; i<m; i++) { 1302 shift = a->sliidx[i>>3]+(i&0x07); 1303 y_l = m - i - 1.0; 1304 y_r = y_l + 1.0; 1305 for (j=0; j<a->rlen[i]; j++) { 1306 x_l = a->colidx[shift+j*8]; 1307 x_r = x_l + 1.0; 1308 color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv); 1309 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1310 count++; 1311 } 1312 } 1313 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1314 } 1315 PetscFunctionReturn(0); 1316 } 1317 1318 #include <petscdraw.h> 1319 PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer) 1320 { 1321 PetscDraw draw; 1322 PetscReal xr,yr,xl,yl,h,w; 1323 PetscBool isnull; 1324 PetscErrorCode ierr; 1325 1326 PetscFunctionBegin; 1327 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1328 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1329 if (isnull) PetscFunctionReturn(0); 1330 1331 xr = A->cmap->n; yr = A->rmap->n; h = yr/10.0; w = xr/10.0; 1332 xr += w; yr += h; xl = -w; yl = -h; 1333 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1334 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1335 ierr = PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);CHKERRQ(ierr); 1336 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1337 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer) 1342 { 1343 PetscBool iascii,isbinary,isdraw; 1344 PetscErrorCode ierr; 1345 1346 PetscFunctionBegin; 1347 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1348 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1349 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1350 if (iascii) { 1351 ierr = MatView_SeqSELL_ASCII(A,viewer);CHKERRQ(ierr); 1352 } else if (isbinary) { 1353 /* ierr = MatView_SeqSELL_Binary(A,viewer);CHKERRQ(ierr); */ 1354 } else if (isdraw) { 1355 ierr = MatView_SeqSELL_Draw(A,viewer);CHKERRQ(ierr); 1356 } 1357 PetscFunctionReturn(0); 1358 } 1359 1360 PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode) 1361 { 1362 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1363 PetscInt i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k; 1364 MatScalar *vp; 1365 PetscErrorCode ierr; 1366 1367 PetscFunctionBegin; 1368 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1369 /* To do: compress out the unused elements */ 1370 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 1371 ierr = PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);CHKERRQ(ierr); 1372 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr); 1373 ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);CHKERRQ(ierr); 1374 /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */ 1375 for (i=0; i<a->totalslices; ++i) { 1376 shift = a->sliidx[i]; /* starting index of the slice */ 1377 cp = a->colidx+shift; /* pointer to the column indices of the slice */ 1378 vp = a->val+shift; /* pointer to the nonzero values of the slice */ 1379 for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */ 1380 row = 8*i + row_in_slice; 1381 nrow = a->rlen[row]; /* number of nonzeros in row */ 1382 /* 1383 Search for the nearest nonzero. Normally setting the index to zero may cause extra communication. 1384 But if the entire slice are empty, it is fine to use 0 since the index will not be loaded. 1385 */ 1386 lastcol = 0; 1387 if (nrow>0) { /* nonempty row */ 1388 lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */ 1389 } else if (!row_in_slice) { /* first row of the currect slice is empty */ 1390 for (j=1;j<8;j++) { 1391 if (a->rlen[8*i+j]) { 1392 lastcol = cp[j]; 1393 break; 1394 } 1395 } 1396 } else { 1397 if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */ 1398 } 1399 1400 for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) { 1401 cp[8*k+row_in_slice] = lastcol; 1402 vp[8*k+row_in_slice] = (MatScalar)0; 1403 } 1404 } 1405 } 1406 1407 A->info.mallocs += a->reallocs; 1408 a->reallocs = 0; 1409 1410 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1411 PetscFunctionReturn(0); 1412 } 1413 1414 PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info) 1415 { 1416 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1417 1418 PetscFunctionBegin; 1419 info->block_size = 1.0; 1420 info->nz_allocated = (double)a->maxallocmat; 1421 info->nz_used = (double)a->sliidx[a->totalslices]; /* include padding zeros */ 1422 info->nz_unneeded = (double)(a->maxallocmat-a->sliidx[a->totalslices]); 1423 info->assemblies = (double)A->num_ass; 1424 info->mallocs = (double)A->info.mallocs; 1425 info->memory = ((PetscObject)A)->mem; 1426 if (A->factortype) { 1427 info->fill_ratio_given = A->info.fill_ratio_given; 1428 info->fill_ratio_needed = A->info.fill_ratio_needed; 1429 info->factor_mallocs = A->info.factor_mallocs; 1430 } else { 1431 info->fill_ratio_given = 0; 1432 info->fill_ratio_needed = 0; 1433 info->factor_mallocs = 0; 1434 } 1435 PetscFunctionReturn(0); 1436 } 1437 1438 PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1439 { 1440 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1441 PetscInt shift,i,k,l,low,high,t,ii,row,col,nrow; 1442 PetscInt *cp,nonew=a->nonew,lastcol=-1; 1443 MatScalar *vp,value; 1444 PetscErrorCode ierr; 1445 1446 PetscFunctionBegin; 1447 for (k=0; k<m; k++) { /* loop over added rows */ 1448 row = im[k]; 1449 if (row < 0) continue; 1450 #if defined(PETSC_USE_DEBUG) 1451 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 1452 #endif 1453 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1454 cp = a->colidx+shift; /* pointer to the row */ 1455 vp = a->val+shift; /* pointer to the row */ 1456 nrow = a->rlen[row]; 1457 low = 0; 1458 high = nrow; 1459 1460 for (l=0; l<n; l++) { /* loop over added columns */ 1461 col = in[l]; 1462 if (col<0) continue; 1463 #if defined(PETSC_USE_DEBUG) 1464 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1); 1465 #endif 1466 if (a->roworiented) { 1467 value = v[l+k*n]; 1468 } else { 1469 value = v[k+l*m]; 1470 } 1471 if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue; 1472 1473 /* search in this row for the specified colmun, i indicates the column to be set */ 1474 if (col <= lastcol) low = 0; 1475 else high = nrow; 1476 lastcol = col; 1477 while (high-low > 5) { 1478 t = (low+high)/2; 1479 if (*(cp+t*8) > col) high = t; 1480 else low = t; 1481 } 1482 for (i=low; i<high; i++) { 1483 if (*(cp+i*8) > col) break; 1484 if (*(cp+i*8) == col) { 1485 if (is == ADD_VALUES) *(vp+i*8) += value; 1486 else *(vp+i*8) = value; 1487 low = i + 1; 1488 goto noinsert; 1489 } 1490 } 1491 if (value == 0.0 && a->ignorezeroentries) goto noinsert; 1492 if (nonew == 1) goto noinsert; 1493 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1494 /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */ 1495 MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar); 1496 /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */ 1497 for (ii=nrow-1; ii>=i; ii--) { 1498 *(cp+(ii+1)*8) = *(cp+ii*8); 1499 *(vp+(ii+1)*8) = *(vp+ii*8); 1500 } 1501 a->rlen[row]++; 1502 *(cp+i*8) = col; 1503 *(vp+i*8) = value; 1504 a->nz++; 1505 A->nonzerostate++; 1506 low = i+1; high++; nrow++; 1507 noinsert:; 1508 } 1509 a->rlen[row] = nrow; 1510 } 1511 PetscFunctionReturn(0); 1512 } 1513 1514 PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str) 1515 { 1516 PetscErrorCode ierr; 1517 1518 PetscFunctionBegin; 1519 /* If the two matrices have the same copy implementation, use fast copy. */ 1520 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1521 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1522 Mat_SeqSELL *b=(Mat_SeqSELL*)B->data; 1523 1524 if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1525 ierr = PetscMemcpy(b->val,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1526 } else { 1527 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1528 } 1529 PetscFunctionReturn(0); 1530 } 1531 1532 PetscErrorCode MatSetUp_SeqSELL(Mat A) 1533 { 1534 PetscErrorCode ierr; 1535 1536 PetscFunctionBegin; 1537 ierr = MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,0);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 1541 PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[]) 1542 { 1543 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1544 1545 PetscFunctionBegin; 1546 *array = a->val; 1547 PetscFunctionReturn(0); 1548 } 1549 1550 PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[]) 1551 { 1552 PetscFunctionBegin; 1553 PetscFunctionReturn(0); 1554 } 1555 1556 PetscErrorCode MatRealPart_SeqSELL(Mat A) 1557 { 1558 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1559 PetscInt i; 1560 MatScalar *aval=a->val; 1561 1562 PetscFunctionBegin; 1563 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]); 1564 PetscFunctionReturn(0); 1565 } 1566 1567 PetscErrorCode MatImaginaryPart_SeqSELL(Mat A) 1568 { 1569 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1570 PetscInt i; 1571 MatScalar *aval=a->val; 1572 PetscErrorCode ierr; 1573 1574 PetscFunctionBegin; 1575 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]); 1576 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1577 PetscFunctionReturn(0); 1578 } 1579 1580 PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha) 1581 { 1582 Mat_SeqSELL *a=(Mat_SeqSELL*)inA->data; 1583 MatScalar *aval=a->val; 1584 PetscScalar oalpha=alpha; 1585 PetscBLASInt one=1,size; 1586 PetscErrorCode ierr; 1587 1588 PetscFunctionBegin; 1589 ierr = PetscBLASIntCast(a->sliidx[a->totalslices],&size);CHKERRQ(ierr); 1590 PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one)); 1591 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1592 ierr = MatSeqSELLInvalidateDiagonal(inA);CHKERRQ(ierr); 1593 PetscFunctionReturn(0); 1594 } 1595 1596 PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a) 1597 { 1598 Mat_SeqSELL *y=(Mat_SeqSELL*)Y->data; 1599 PetscErrorCode ierr; 1600 1601 PetscFunctionBegin; 1602 if (!Y->preallocated || !y->nz) { 1603 ierr = MatSeqSELLSetPreallocation(Y,1,NULL);CHKERRQ(ierr); 1604 } 1605 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1606 PetscFunctionReturn(0); 1607 } 1608 1609 PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1610 { 1611 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1612 PetscScalar *x,sum,*t; 1613 const MatScalar *idiag=0,*mdiag; 1614 const PetscScalar *b,*xb; 1615 PetscInt n,m=A->rmap->n,i,j,shift; 1616 const PetscInt *diag; 1617 PetscErrorCode ierr; 1618 1619 PetscFunctionBegin; 1620 its = its*lits; 1621 1622 if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */ 1623 if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqSELL(A,omega,fshift);CHKERRQ(ierr);} 1624 a->fshift = fshift; 1625 a->omega = omega; 1626 1627 diag = a->diag; 1628 t = a->ssor_work; 1629 idiag = a->idiag; 1630 mdiag = a->mdiag; 1631 1632 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1633 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1634 /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */ 1635 if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented"); 1636 if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented"); 1637 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 1638 1639 if (flag & SOR_ZERO_INITIAL_GUESS) { 1640 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1641 for (i=0; i<m; i++) { 1642 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1643 sum = b[i]; 1644 n = (diag[i]-shift)/8; 1645 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1646 t[i] = sum; 1647 x[i] = sum*idiag[i]; 1648 } 1649 xb = t; 1650 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1651 } else xb = b; 1652 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1653 for (i=m-1; i>=0; i--) { 1654 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1655 sum = xb[i]; 1656 n = a->rlen[i]-(diag[i]-shift)/8-1; 1657 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1658 if (xb == b) { 1659 x[i] = sum*idiag[i]; 1660 } else { 1661 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1662 } 1663 } 1664 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1665 } 1666 its--; 1667 } 1668 while (its--) { 1669 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1670 for (i=0; i<m; i++) { 1671 /* lower */ 1672 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1673 sum = b[i]; 1674 n = (diag[i]-shift)/8; 1675 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1676 t[i] = sum; /* save application of the lower-triangular part */ 1677 /* upper */ 1678 n = a->rlen[i]-(diag[i]-shift)/8-1; 1679 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1680 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1681 } 1682 xb = t; 1683 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1684 } else xb = b; 1685 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1686 for (i=m-1; i>=0; i--) { 1687 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1688 sum = xb[i]; 1689 if (xb == b) { 1690 /* whole matrix (no checkpointing available) */ 1691 n = a->rlen[i]; 1692 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1693 x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i]; 1694 } else { /* lower-triangular part has been saved, so only apply upper-triangular */ 1695 n = a->rlen[i]-(diag[i]-shift)/8-1; 1696 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1697 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1698 } 1699 } 1700 if (xb == b) { 1701 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1702 } else { 1703 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1704 } 1705 } 1706 } 1707 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1708 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1709 PetscFunctionReturn(0); 1710 } 1711 1712 /* -------------------------------------------------------------------*/ 1713 static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL, 1714 MatGetRow_SeqSELL, 1715 MatRestoreRow_SeqSELL, 1716 MatMult_SeqSELL, 1717 /* 4*/ MatMultAdd_SeqSELL, 1718 MatMultTranspose_SeqSELL, 1719 MatMultTransposeAdd_SeqSELL, 1720 0, 1721 0, 1722 0, 1723 /* 10*/ 0, 1724 0, 1725 0, 1726 MatSOR_SeqSELL, 1727 0, 1728 /* 15*/ MatGetInfo_SeqSELL, 1729 MatEqual_SeqSELL, 1730 MatGetDiagonal_SeqSELL, 1731 MatDiagonalScale_SeqSELL, 1732 0, 1733 /* 20*/ 0, 1734 MatAssemblyEnd_SeqSELL, 1735 MatSetOption_SeqSELL, 1736 MatZeroEntries_SeqSELL, 1737 /* 24*/ 0, 1738 0, 1739 0, 1740 0, 1741 0, 1742 /* 29*/ MatSetUp_SeqSELL, 1743 0, 1744 0, 1745 0, 1746 0, 1747 /* 34*/ MatDuplicate_SeqSELL, 1748 0, 1749 0, 1750 0, 1751 0, 1752 /* 39*/ 0, 1753 0, 1754 0, 1755 MatGetValues_SeqSELL, 1756 MatCopy_SeqSELL, 1757 /* 44*/ 0, 1758 MatScale_SeqSELL, 1759 MatShift_SeqSELL, 1760 0, 1761 0, 1762 /* 49*/ 0, 1763 0, 1764 0, 1765 0, 1766 0, 1767 /* 54*/ MatFDColoringCreate_SeqXAIJ, 1768 0, 1769 0, 1770 0, 1771 0, 1772 /* 59*/ 0, 1773 MatDestroy_SeqSELL, 1774 MatView_SeqSELL, 1775 0, 1776 0, 1777 /* 64*/ 0, 1778 0, 1779 0, 1780 0, 1781 0, 1782 /* 69*/ 0, 1783 0, 1784 0, 1785 0, 1786 0, 1787 /* 74*/ 0, 1788 MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */ 1789 0, 1790 0, 1791 0, 1792 /* 79*/ 0, 1793 0, 1794 0, 1795 0, 1796 0, 1797 /* 84*/ 0, 1798 0, 1799 0, 1800 0, 1801 0, 1802 /* 89*/ 0, 1803 0, 1804 0, 1805 0, 1806 0, 1807 /* 94*/ 0, 1808 0, 1809 0, 1810 0, 1811 0, 1812 /* 99*/ 0, 1813 0, 1814 0, 1815 MatConjugate_SeqSELL, 1816 0, 1817 /*104*/ 0, 1818 0, 1819 0, 1820 0, 1821 0, 1822 /*109*/ 0, 1823 0, 1824 0, 1825 0, 1826 MatMissingDiagonal_SeqSELL, 1827 /*114*/ 0, 1828 0, 1829 0, 1830 0, 1831 0, 1832 /*119*/ 0, 1833 0, 1834 0, 1835 0, 1836 0, 1837 /*124*/ 0, 1838 0, 1839 0, 1840 0, 1841 0, 1842 /*129*/ 0, 1843 0, 1844 0, 1845 0, 1846 0, 1847 /*134*/ 0, 1848 0, 1849 0, 1850 0, 1851 0, 1852 /*139*/ 0, 1853 0, 1854 0, 1855 MatFDColoringSetUp_SeqXAIJ, 1856 0, 1857 /*144*/0 1858 }; 1859 1860 PetscErrorCode MatStoreValues_SeqSELL(Mat mat) 1861 { 1862 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1863 PetscErrorCode ierr; 1864 1865 PetscFunctionBegin; 1866 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1867 1868 /* allocate space for values if not already there */ 1869 if (!a->saved_values) { 1870 ierr = PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);CHKERRQ(ierr); 1871 ierr = PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));CHKERRQ(ierr); 1872 } 1873 1874 /* copy values over */ 1875 ierr = PetscMemcpy(a->saved_values,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1876 PetscFunctionReturn(0); 1877 } 1878 1879 PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat) 1880 { 1881 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1882 PetscErrorCode ierr; 1883 1884 PetscFunctionBegin; 1885 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1886 if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1887 /* copy values over */ 1888 ierr = PetscMemcpy(a->val,a->saved_values,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1889 PetscFunctionReturn(0); 1890 } 1891 1892 /*@C 1893 MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray() 1894 1895 Not Collective 1896 1897 Input Parameters: 1898 . mat - a MATSEQSELL matrix 1899 . array - pointer to the data 1900 1901 Level: intermediate 1902 1903 .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90() 1904 @*/ 1905 PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array) 1906 { 1907 PetscErrorCode ierr; 1908 1909 PetscFunctionBegin; 1910 ierr = PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 1911 PetscFunctionReturn(0); 1912 } 1913 1914 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B) 1915 { 1916 Mat_SeqSELL *b; 1917 PetscMPIInt size; 1918 PetscErrorCode ierr; 1919 1920 PetscFunctionBegin; 1921 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 1922 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1"); 1923 1924 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1925 1926 B->data = (void*)b; 1927 1928 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1929 1930 b->row = 0; 1931 b->col = 0; 1932 b->icol = 0; 1933 b->reallocs = 0; 1934 b->ignorezeroentries = PETSC_FALSE; 1935 b->roworiented = PETSC_TRUE; 1936 b->nonew = 0; 1937 b->diag = 0; 1938 b->solve_work = 0; 1939 B->spptr = 0; 1940 b->saved_values = 0; 1941 b->idiag = 0; 1942 b->mdiag = 0; 1943 b->ssor_work = 0; 1944 b->omega = 1.0; 1945 b->fshift = 0.0; 1946 b->idiagvalid = PETSC_FALSE; 1947 b->ibdiagvalid = PETSC_FALSE; 1948 b->keepnonzeropattern = PETSC_FALSE; 1949 1950 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);CHKERRQ(ierr); 1951 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);CHKERRQ(ierr); 1952 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);CHKERRQ(ierr); 1953 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);CHKERRQ(ierr); 1954 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);CHKERRQ(ierr); 1955 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);CHKERRQ(ierr); 1956 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);CHKERRQ(ierr); 1957 PetscFunctionReturn(0); 1958 } 1959 1960 /* 1961 Given a matrix generated with MatGetFactor() duplicates all the information in A into B 1962 */ 1963 PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 1964 { 1965 Mat_SeqSELL *c,*a=(Mat_SeqSELL*)A->data; 1966 PetscInt i,m=A->rmap->n; 1967 PetscInt totalslices=a->totalslices; 1968 PetscErrorCode ierr; 1969 1970 PetscFunctionBegin; 1971 c = (Mat_SeqSELL*)C->data; 1972 1973 C->factortype = A->factortype; 1974 c->row = 0; 1975 c->col = 0; 1976 c->icol = 0; 1977 c->reallocs = 0; 1978 1979 C->assembled = PETSC_TRUE; 1980 1981 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 1982 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 1983 1984 ierr = PetscMalloc1(8*totalslices,&c->rlen);CHKERRQ(ierr); 1985 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 1986 ierr = PetscMalloc1(totalslices+1,&c->sliidx);CHKERRQ(ierr); 1987 ierr = PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 1988 1989 for (i=0; i<m; i++) c->rlen[i] = a->rlen[i]; 1990 for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i]; 1991 1992 /* allocate the matrix space */ 1993 if (mallocmatspace) { 1994 ierr = PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);CHKERRQ(ierr); 1995 ierr = PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 1996 1997 c->singlemalloc = PETSC_TRUE; 1998 1999 if (m > 0) { 2000 ierr = PetscMemcpy(c->colidx,a->colidx,(a->maxallocmat)*sizeof(PetscInt));CHKERRQ(ierr); 2001 if (cpvalues == MAT_COPY_VALUES) { 2002 ierr = PetscMemcpy(c->val,a->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 2003 } else { 2004 ierr = PetscMemzero(c->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 2005 } 2006 } 2007 } 2008 2009 c->ignorezeroentries = a->ignorezeroentries; 2010 c->roworiented = a->roworiented; 2011 c->nonew = a->nonew; 2012 if (a->diag) { 2013 ierr = PetscMalloc1(m,&c->diag);CHKERRQ(ierr); 2014 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 2015 for (i=0; i<m; i++) { 2016 c->diag[i] = a->diag[i]; 2017 } 2018 } else c->diag = 0; 2019 2020 c->solve_work = 0; 2021 c->saved_values = 0; 2022 c->idiag = 0; 2023 c->ssor_work = 0; 2024 c->keepnonzeropattern = a->keepnonzeropattern; 2025 c->free_val = PETSC_TRUE; 2026 c->free_colidx = PETSC_TRUE; 2027 2028 c->maxallocmat = a->maxallocmat; 2029 c->maxallocrow = a->maxallocrow; 2030 c->rlenmax = a->rlenmax; 2031 c->nz = a->nz; 2032 C->preallocated = PETSC_TRUE; 2033 2034 c->nonzerorowcnt = a->nonzerorowcnt; 2035 C->nonzerostate = A->nonzerostate; 2036 2037 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2038 PetscFunctionReturn(0); 2039 } 2040 2041 PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B) 2042 { 2043 PetscErrorCode ierr; 2044 2045 PetscFunctionBegin; 2046 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 2047 ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr); 2048 if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) { 2049 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 2050 } 2051 ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2052 ierr = MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 2053 PetscFunctionReturn(0); 2054 } 2055 2056 /*@C 2057 MatCreateSeqSELL - Creates a sparse matrix in SELL format. 2058 2059 Collective on MPI_Comm 2060 2061 Input Parameters: 2062 + comm - MPI communicator, set to PETSC_COMM_SELF 2063 . m - number of rows 2064 . n - number of columns 2065 . rlenmax - maximum number of nonzeros in a row 2066 - rlen - array containing the number of nonzeros in the various rows 2067 (possibly different for each row) or NULL 2068 2069 Output Parameter: 2070 . A - the matrix 2071 2072 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2073 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2074 [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation] 2075 2076 Notes: 2077 If nnz is given then nz is ignored 2078 2079 Specify the preallocated storage with either rlenmax or rlen (not both). 2080 Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory 2081 allocation. For large problems you MUST preallocate memory or you 2082 will get TERRIBLE performance, see the users' manual chapter on matrices. 2083 2084 Level: intermediate 2085 2086 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatCreateSeqSELLWithArrays() 2087 2088 @*/ 2089 PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A) 2090 { 2091 PetscErrorCode ierr; 2092 2093 PetscFunctionBegin; 2094 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2095 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2096 ierr = MatSetType(*A,MATSEQSELL);CHKERRQ(ierr); 2097 ierr = MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);CHKERRQ(ierr); 2098 PetscFunctionReturn(0); 2099 } 2100 2101 PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg) 2102 { 2103 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data; 2104 PetscInt totalslices=a->totalslices; 2105 PetscErrorCode ierr; 2106 2107 PetscFunctionBegin; 2108 /* If the matrix dimensions are not equal,or no of nonzeros */ 2109 if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) { 2110 *flg = PETSC_FALSE; 2111 PetscFunctionReturn(0); 2112 } 2113 /* if the a->colidx are the same */ 2114 ierr = PetscMemcmp(a->colidx,b->colidx,a->sliidx[totalslices]*sizeof(PetscInt),flg);CHKERRQ(ierr); 2115 if (!*flg) PetscFunctionReturn(0); 2116 /* if a->val are the same */ 2117 ierr = PetscMemcmp(a->val,b->val,a->sliidx[totalslices]*sizeof(PetscScalar),flg);CHKERRQ(ierr); 2118 PetscFunctionReturn(0); 2119 } 2120 2121 PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A) 2122 { 2123 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2124 2125 PetscFunctionBegin; 2126 a->idiagvalid = PETSC_FALSE; 2127 a->ibdiagvalid = PETSC_FALSE; 2128 PetscFunctionReturn(0); 2129 } 2130 2131 PetscErrorCode MatConjugate_SeqSELL(Mat A) 2132 { 2133 #if defined(PETSC_USE_COMPLEX) 2134 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2135 PetscInt i; 2136 PetscScalar *val = a->val; 2137 2138 PetscFunctionBegin; 2139 for (i=0; i<a->sliidx[a->totalslices]; i++) { 2140 val[i] = PetscConj(val[i]); 2141 } 2142 #else 2143 PetscFunctionBegin; 2144 #endif 2145 PetscFunctionReturn(0); 2146 } 2147