1 2 /* 3 Defines the basic matrix operations for the SELL matrix storage format. 4 */ 5 #include <../src/mat/impls/sell/seq/sell.h> /*I "petscmat.h" I*/ 6 #include <petscblaslapack.h> 7 #include <petsc/private/kernels/blocktranspose.h> 8 #if defined(PETSC_HAVE_IMMINTRIN_H) 9 #include <immintrin.h> 10 11 #if !defined(_MM_SCALE_8) 12 #define _MM_SCALE_8 8 13 #endif 14 15 #if defined(__AVX512F__) 16 /* these do not work 17 vec_idx = _mm512_loadunpackhi_epi32(vec_idx,acolidx); 18 vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval); 19 */ 20 #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 21 /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \ 22 vec_idx = _mm256_load_si256((__m256i const*)acolidx); \ 23 vec_vals = _mm512_load_pd(aval); \ 24 vec_x = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \ 25 vec_y = _mm512_fmadd_pd(vec_x,vec_vals,vec_y) 26 #elif defined(__AVX2__) 27 #define AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 28 vec_vals = _mm256_load_pd(aval); \ 29 vec_idx = _mm_load_si128((__m128i const*)acolidx); /* SSE2 */ \ 30 vec_x = _mm256_i32gather_pd(x,vec_idx,_MM_SCALE_8); \ 31 vec_y = _mm256_fmadd_pd(vec_x,vec_vals,vec_y) 32 #endif 33 #endif /* PETSC_HAVE_IMMINTRIN_H */ 34 35 /*@C 36 MatSeqSELLSetPreallocation - For good matrix assembly performance 37 the user should preallocate the matrix storage by setting the parameter nz 38 (or the array nnz). By setting these parameters accurately, performance 39 during matrix assembly can be increased significantly. 40 41 Collective on MPI_Comm 42 43 Input Parameters: 44 + B - The matrix 45 . nz - number of nonzeros per row (same for all rows) 46 - nnz - array containing the number of nonzeros in the various rows 47 (possibly different for each row) or NULL 48 49 Notes: 50 If nnz is given then nz is ignored. 51 52 Specify the preallocated storage with either nz or nnz (not both). 53 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 54 allocation. For large problems you MUST preallocate memory or you 55 will get TERRIBLE performance, see the users' manual chapter on matrices. 56 57 You can call MatGetInfo() to get information on how effective the preallocation was; 58 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 59 You can also run with the option -info and look for messages with the string 60 malloc in them to see if additional memory allocation was needed. 61 62 Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix 63 entries or columns indices. 64 65 The maximum number of nonzeos in any row should be as accuate as possible. 66 If it is underesitmated, you will get bad performance due to reallocation 67 (MatSeqXSELLReallocateSELL). 68 69 Level: intermediate 70 71 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo() 72 73 @*/ 74 PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[]) 75 { 76 PetscErrorCode ierr; 77 78 PetscFunctionBegin; 79 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 80 PetscValidType(B,1); 81 ierr = PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));CHKERRQ(ierr); 82 PetscFunctionReturn(0); 83 } 84 85 PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[]) 86 { 87 Mat_SeqSELL *b; 88 PetscInt i,j,totalslices; 89 PetscBool skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE; 90 PetscErrorCode ierr; 91 92 PetscFunctionBegin; 93 if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE; 94 if (maxallocrow == MAT_SKIP_ALLOCATION) { 95 skipallocation = PETSC_TRUE; 96 maxallocrow = 0; 97 } 98 99 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 100 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 101 102 /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */ 103 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5; 104 if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow); 105 if (rlen) { 106 for (i=0; i<B->rmap->n; i++) { 107 if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]); 108 if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n); 109 } 110 } 111 112 B->preallocated = PETSC_TRUE; 113 114 b = (Mat_SeqSELL*)B->data; 115 116 totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */ 117 b->totalslices = totalslices; 118 if (!skipallocation) { 119 if (B->rmap->n & 0x07) PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n); 120 121 if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */ 122 ierr = PetscMalloc1(totalslices+1,&b->sliidx);CHKERRQ(ierr); 123 ierr = PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 124 } 125 if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */ 126 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10; 127 else if (maxallocrow < 0) maxallocrow = 1; 128 for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow; 129 } else { 130 maxallocrow = 0; 131 b->sliidx[0] = 0; 132 for (i=1; i<totalslices; i++) { 133 b->sliidx[i] = 0; 134 for (j=0;j<8;j++) { 135 b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]); 136 } 137 maxallocrow = PetscMax(b->sliidx[i],maxallocrow); 138 b->sliidx[i] = b->sliidx[i-1] + 8*b->sliidx[i]; 139 } 140 /* last slice */ 141 b->sliidx[totalslices] = 0; 142 for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]); 143 maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow); 144 b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices]; 145 } 146 147 /* allocate space for val, colidx, rlen */ 148 /* FIXME: should B's old memory be unlogged? */ 149 ierr = MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);CHKERRQ(ierr); 150 /* FIXME: assuming an element of the bit array takes 8 bits */ 151 ierr = PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);CHKERRQ(ierr); 152 ierr = PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 153 /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */ 154 ierr = PetscCalloc1(8*totalslices,&b->rlen);CHKERRQ(ierr); 155 ierr = PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));CHKERRQ(ierr); 156 157 b->singlemalloc = PETSC_TRUE; 158 b->free_val = PETSC_TRUE; 159 b->free_colidx = PETSC_TRUE; 160 } else { 161 b->free_val = PETSC_FALSE; 162 b->free_colidx = PETSC_FALSE; 163 } 164 165 b->nz = 0; 166 b->maxallocrow = maxallocrow; 167 b->rlenmax = maxallocrow; 168 b->maxallocmat = b->sliidx[totalslices]; 169 B->info.nz_unneeded = (double)b->maxallocmat; 170 if (realalloc) { 171 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 172 } 173 PetscFunctionReturn(0); 174 } 175 176 PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 177 { 178 Mat B; 179 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 180 PetscInt i,j,row; 181 PetscBool isnonzero; 182 PetscErrorCode ierr; 183 184 PetscFunctionBegin; 185 if (reuse == MAT_REUSE_MATRIX) { 186 B = *newmat; 187 } else { 188 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 189 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 190 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 191 ierr = MatSeqAIJSetPreallocation(B,0,a->rlen);CHKERRQ(ierr); 192 ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 193 } 194 195 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 196 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 197 isnonzero = (PetscBool)((j-a->sliidx[i])/8 < a->rlen[8*i+row]); 198 if (isnonzero) { 199 ierr = MatSetValue(B,8*i+row,a->colidx[j],a->val[j],INSERT_VALUES);CHKERRQ(ierr); 200 } 201 } 202 } 203 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 204 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 205 B->rmap->bs = A->rmap->bs; 206 207 if (reuse == MAT_INPLACE_MATRIX) { 208 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 209 } else { 210 *newmat = B; 211 } 212 PetscFunctionReturn(0); 213 } 214 215 #include <../src/mat/impls/aij/seq/aij.h> 216 217 PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 218 { 219 Mat B; 220 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 221 PetscInt *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols; 222 const PetscInt *cols; 223 const PetscScalar *vals; 224 PetscErrorCode ierr; 225 226 PetscFunctionBegin; 227 if (A->rmap->bs > 1) { 228 ierr = MatConvert_Basic(A,newtype,reuse,newmat);CHKERRQ(ierr); 229 PetscFunctionReturn(0); 230 } 231 232 if (reuse == MAT_REUSE_MATRIX) { 233 B = *newmat; 234 } else { 235 /* Can we just use ilen? */ 236 ierr = PetscMalloc1(m,&rowlengths);CHKERRQ(ierr); 237 for (i=0; i<m; i++) { 238 rowlengths[i] = ai[i+1] - ai[i]; 239 } 240 241 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 242 ierr = MatSetSizes(B,m,n,m,n);CHKERRQ(ierr); 243 ierr = MatSetType(B,MATSEQSELL);CHKERRQ(ierr); 244 ierr = MatSeqSELLSetPreallocation(B,0,rowlengths);CHKERRQ(ierr); 245 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 246 } 247 248 ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 249 250 for (row=0; row<m; row++) { 251 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 252 ierr = MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 253 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 254 } 255 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 256 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 257 B->rmap->bs = A->rmap->bs; 258 259 if (reuse == MAT_INPLACE_MATRIX) { 260 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 261 } else { 262 *newmat = B; 263 } 264 PetscFunctionReturn(0); 265 } 266 267 PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy) 268 { 269 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 270 PetscScalar *y; 271 const PetscScalar *x; 272 const MatScalar *aval=a->val; 273 PetscInt totalslices=a->totalslices; 274 const PetscInt *acolidx=a->colidx; 275 PetscInt i,j; 276 PetscErrorCode ierr; 277 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 278 __m512d vec_x,vec_y,vec_vals; 279 __m256i vec_idx; 280 __mmask8 mask; 281 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 282 __m256i vec_idx2,vec_idx3,vec_idx4; 283 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 284 __m128i vec_idx; 285 __m256d vec_x,vec_y,vec_y2,vec_vals; 286 MatScalar yval; 287 PetscInt r,rows_left,row,nnz_in_row; 288 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 289 __m128d vec_x_tmp; 290 __m256d vec_x,vec_y,vec_y2,vec_vals; 291 MatScalar yval; 292 PetscInt r,rows_left,row,nnz_in_row; 293 #else 294 PetscScalar sum[8]; 295 #endif 296 297 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 298 #pragma disjoint(*x,*y,*aval) 299 #endif 300 301 PetscFunctionBegin; 302 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 303 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 304 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 305 for (i=0; i<totalslices; i++) { /* loop over slices */ 306 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 307 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 308 309 vec_y = _mm512_setzero_pd(); 310 vec_y2 = _mm512_setzero_pd(); 311 vec_y3 = _mm512_setzero_pd(); 312 vec_y4 = _mm512_setzero_pd(); 313 314 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 315 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 316 case 3: 317 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 318 acolidx += 8; aval += 8; 319 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 320 acolidx += 8; aval += 8; 321 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 322 acolidx += 8; aval += 8; 323 j += 3; 324 break; 325 case 2: 326 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 327 acolidx += 8; aval += 8; 328 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 329 acolidx += 8; aval += 8; 330 j += 2; 331 break; 332 case 1: 333 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 334 acolidx += 8; aval += 8; 335 j += 1; 336 break; 337 } 338 #pragma novector 339 for (; j<(a->sliidx[i+1]>>3); j+=4) { 340 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 341 acolidx += 8; aval += 8; 342 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 343 acolidx += 8; aval += 8; 344 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 345 acolidx += 8; aval += 8; 346 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 347 acolidx += 8; aval += 8; 348 } 349 350 vec_y = _mm512_add_pd(vec_y,vec_y2); 351 vec_y = _mm512_add_pd(vec_y,vec_y3); 352 vec_y = _mm512_add_pd(vec_y,vec_y4); 353 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 354 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 355 _mm512_mask_store_pd(&y[8*i],mask,vec_y); 356 } else { 357 _mm512_store_pd(&y[8*i],vec_y); 358 } 359 } 360 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 361 for (i=0; i<totalslices; i++) { /* loop over full slices */ 362 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 363 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 364 365 /* last slice may have padding rows. Don't use vectorization. */ 366 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 367 rows_left = A->rmap->n - 8*i; 368 for (r=0; r<rows_left; ++r) { 369 yval = (MatScalar)0; 370 row = 8*i + r; 371 nnz_in_row = a->rlen[row]; 372 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 373 y[row] = yval; 374 } 375 break; 376 } 377 378 vec_y = _mm256_setzero_pd(); 379 vec_y2 = _mm256_setzero_pd(); 380 381 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 382 #pragma novector 383 #pragma unroll(2) 384 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 385 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 386 aval += 4; acolidx += 4; 387 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y2); 388 aval += 4; acolidx += 4; 389 } 390 391 _mm256_store_pd(y+i*8,vec_y); 392 _mm256_store_pd(y+i*8+4,vec_y2); 393 } 394 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 395 for (i=0; i<totalslices; i++) { /* loop over full slices */ 396 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 397 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 398 399 vec_y = _mm256_setzero_pd(); 400 vec_y2 = _mm256_setzero_pd(); 401 402 /* last slice may have padding rows. Don't use vectorization. */ 403 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 404 rows_left = A->rmap->n - 8*i; 405 for (r=0; r<rows_left; ++r) { 406 yval = (MatScalar)0; 407 row = 8*i + r; 408 nnz_in_row = a->rlen[row]; 409 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]]; 410 y[row] = yval; 411 } 412 break; 413 } 414 415 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 416 #pragma novector 417 #pragma unroll(2) 418 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 419 vec_vals = _mm256_loadu_pd(aval); 420 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 421 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 422 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 423 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 424 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 425 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 426 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 427 aval += 4; 428 429 vec_vals = _mm256_loadu_pd(aval); 430 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 431 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 432 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 433 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 434 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 435 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 436 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 437 aval += 4; 438 } 439 440 _mm256_storeu_pd(y + i*8, vec_y); 441 _mm256_storeu_pd(y + i*8 + 4, vec_y2); 442 } 443 #else 444 for (i=0; i<totalslices; i++) { /* loop over slices */ 445 for (j=0; j<8; j++) sum[j] = 0.0; 446 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 447 sum[0] += aval[j] * x[acolidx[j]]; 448 sum[1] += aval[j+1] * x[acolidx[j+1]]; 449 sum[2] += aval[j+2] * x[acolidx[j+2]]; 450 sum[3] += aval[j+3] * x[acolidx[j+3]]; 451 sum[4] += aval[j+4] * x[acolidx[j+4]]; 452 sum[5] += aval[j+5] * x[acolidx[j+5]]; 453 sum[6] += aval[j+6] * x[acolidx[j+6]]; 454 sum[7] += aval[j+7] * x[acolidx[j+7]]; 455 } 456 if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */ 457 for(j=0; j<(A->rmap->n & 0x07); j++) y[8*i+j] = sum[j]; 458 } else { 459 for(j=0; j<8; j++) y[8*i+j] = sum[j]; 460 } 461 } 462 #endif 463 464 ierr = PetscLogFlops(2.0*a->nz-a->nonzerorowcnt);CHKERRQ(ierr); /* theoretical minimal FLOPs */ 465 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 466 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 467 PetscFunctionReturn(0); 468 } 469 470 #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h> 471 PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz) 472 { 473 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 474 PetscScalar *y,*z; 475 const PetscScalar *x; 476 const MatScalar *aval=a->val; 477 PetscInt totalslices=a->totalslices; 478 const PetscInt *acolidx=a->colidx; 479 PetscInt i,j; 480 PetscErrorCode ierr; 481 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 482 __m512d vec_x,vec_y,vec_vals; 483 __m256i vec_idx; 484 __mmask8 mask; 485 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 486 __m256i vec_idx2,vec_idx3,vec_idx4; 487 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 488 __m128d vec_x_tmp; 489 __m256d vec_x,vec_y,vec_y2,vec_vals; 490 MatScalar yval; 491 PetscInt r,row,nnz_in_row; 492 #else 493 PetscScalar sum[8]; 494 #endif 495 496 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 497 #pragma disjoint(*x,*y,*aval) 498 #endif 499 500 PetscFunctionBegin; 501 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 502 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 503 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 504 for (i=0; i<totalslices; i++) { /* loop over slices */ 505 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 506 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 507 508 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 509 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 510 vec_y = _mm512_mask_load_pd(vec_y,mask,&y[8*i]); 511 } else { 512 vec_y = _mm512_load_pd(&y[8*i]); 513 } 514 vec_y2 = _mm512_setzero_pd(); 515 vec_y3 = _mm512_setzero_pd(); 516 vec_y4 = _mm512_setzero_pd(); 517 518 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 519 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 520 case 3: 521 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 522 acolidx += 8; aval += 8; 523 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 524 acolidx += 8; aval += 8; 525 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 526 acolidx += 8; aval += 8; 527 j += 3; 528 break; 529 case 2: 530 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 531 acolidx += 8; aval += 8; 532 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 533 acolidx += 8; aval += 8; 534 j += 2; 535 break; 536 case 1: 537 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 538 acolidx += 8; aval += 8; 539 j += 1; 540 break; 541 } 542 #pragma novector 543 for (; j<(a->sliidx[i+1]>>3); j+=4) { 544 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 545 acolidx += 8; aval += 8; 546 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 547 acolidx += 8; aval += 8; 548 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 549 acolidx += 8; aval += 8; 550 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 551 acolidx += 8; aval += 8; 552 } 553 554 vec_y = _mm512_add_pd(vec_y,vec_y2); 555 vec_y = _mm512_add_pd(vec_y,vec_y3); 556 vec_y = _mm512_add_pd(vec_y,vec_y4); 557 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 558 _mm512_mask_store_pd(&z[8*i],mask,vec_y); 559 } else { 560 _mm512_store_pd(&z[8*i],vec_y); 561 } 562 } 563 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 564 for (i=0; i<totalslices; i++) { /* loop over full slices */ 565 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 566 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 567 568 /* last slice may have padding rows. Don't use vectorization. */ 569 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 570 for (r=0; r<(A->rmap->n & 0x07); ++r) { 571 row = 8*i + r; 572 yval = (MatScalar)0.0; 573 nnz_in_row = a->rlen[row]; 574 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 575 z[row] = y[row] + yval; 576 } 577 break; 578 } 579 580 vec_y = _mm256_loadu_pd(y+8*i); 581 vec_y2 = _mm256_loadu_pd(y+8*i+4); 582 583 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 584 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 585 vec_vals = _mm256_loadu_pd(aval); 586 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 587 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 588 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 589 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 590 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 591 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 592 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 593 aval += 4; 594 595 vec_vals = _mm256_loadu_pd(aval); 596 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 597 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 598 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 599 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 600 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 601 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 602 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 603 aval += 4; 604 } 605 606 _mm256_storeu_pd(z+i*8,vec_y); 607 _mm256_storeu_pd(z+i*8+4,vec_y2); 608 } 609 #else 610 for (i=0; i<totalslices; i++) { /* loop over slices */ 611 for (j=0; j<8; j++) sum[j] = 0.0; 612 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 613 sum[0] += aval[j] * x[acolidx[j]]; 614 sum[1] += aval[j+1] * x[acolidx[j+1]]; 615 sum[2] += aval[j+2] * x[acolidx[j+2]]; 616 sum[3] += aval[j+3] * x[acolidx[j+3]]; 617 sum[4] += aval[j+4] * x[acolidx[j+4]]; 618 sum[5] += aval[j+5] * x[acolidx[j+5]]; 619 sum[6] += aval[j+6] * x[acolidx[j+6]]; 620 sum[7] += aval[j+7] * x[acolidx[j+7]]; 621 } 622 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 623 for (j=0; j<(A->rmap->n & 0x07); j++) z[8*i+j] = y[8*i+j] + sum[j]; 624 } else { 625 for (j=0; j<8; j++) z[8*i+j] = y[8*i+j] + sum[j]; 626 } 627 } 628 #endif 629 630 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 631 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 632 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 633 PetscFunctionReturn(0); 634 } 635 636 PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy) 637 { 638 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 639 PetscScalar *y; 640 const PetscScalar *x; 641 const MatScalar *aval=a->val; 642 const PetscInt *acolidx=a->colidx; 643 PetscInt i,j,r,row,nnz_in_row,totalslices=a->totalslices; 644 PetscErrorCode ierr; 645 646 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 647 #pragma disjoint(*x,*y,*aval) 648 #endif 649 650 PetscFunctionBegin; 651 if (zz != yy) { ierr = VecCopy(zz,yy);CHKERRQ(ierr); } 652 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 653 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 654 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 655 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 656 for (r=0; r<(A->rmap->n & 0x07); ++r) { 657 row = 8*i + r; 658 nnz_in_row = a->rlen[row]; 659 for (j=0; j<nnz_in_row; ++j) y[acolidx[8*j+r]] += aval[8*j+r] * x[row]; 660 } 661 break; 662 } 663 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 664 y[acolidx[j]] += aval[j] * x[8*i]; 665 y[acolidx[j+1]] += aval[j+1] * x[8*i+1]; 666 y[acolidx[j+2]] += aval[j+2] * x[8*i+2]; 667 y[acolidx[j+3]] += aval[j+3] * x[8*i+3]; 668 y[acolidx[j+4]] += aval[j+4] * x[8*i+4]; 669 y[acolidx[j+5]] += aval[j+5] * x[8*i+5]; 670 y[acolidx[j+6]] += aval[j+6] * x[8*i+6]; 671 y[acolidx[j+7]] += aval[j+7] * x[8*i+7]; 672 } 673 } 674 ierr = PetscLogFlops(2.0*a->sliidx[a->totalslices]);CHKERRQ(ierr); 675 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 676 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 680 PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy) 681 { 682 PetscErrorCode ierr; 683 684 PetscFunctionBegin; 685 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 686 ierr = MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 /* 691 Checks for missing diagonals 692 */ 693 PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool *missing,PetscInt *d) 694 { 695 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 696 PetscInt *diag,i; 697 698 PetscFunctionBegin; 699 *missing = PETSC_FALSE; 700 if (A->rmap->n > 0 && !(a->colidx)) { 701 *missing = PETSC_TRUE; 702 if (d) *d = 0; 703 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 704 } else { 705 diag = a->diag; 706 for (i=0; i<A->rmap->n; i++) { 707 if (diag[i] == -1) { 708 *missing = PETSC_TRUE; 709 if (d) *d = i; 710 PetscInfo1(A,"Matrix is missing diagonal number %D\n",i); 711 break; 712 } 713 } 714 } 715 PetscFunctionReturn(0); 716 } 717 718 PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A) 719 { 720 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 721 PetscInt i,j,m=A->rmap->n,shift; 722 PetscErrorCode ierr; 723 724 PetscFunctionBegin; 725 if (!a->diag) { 726 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 727 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 728 a->free_diag = PETSC_TRUE; 729 } 730 for (i=0; i<m; i++) { /* loop over rows */ 731 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 732 a->diag[i] = -1; 733 for (j=0; j<a->rlen[i]; j++) { 734 if (a->colidx[shift+j*8] == i) { 735 a->diag[i] = shift+j*8; 736 break; 737 } 738 } 739 } 740 PetscFunctionReturn(0); 741 } 742 743 /* 744 Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways 745 */ 746 PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift) 747 { 748 Mat_SeqSELL *a=(Mat_SeqSELL*) A->data; 749 PetscInt i,*diag,m = A->rmap->n; 750 MatScalar *val = a->val; 751 PetscScalar *idiag,*mdiag; 752 PetscErrorCode ierr; 753 754 PetscFunctionBegin; 755 if (a->idiagvalid) PetscFunctionReturn(0); 756 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 757 diag = a->diag; 758 if (!a->idiag) { 759 ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr); 760 ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr); 761 val = a->val; 762 } 763 mdiag = a->mdiag; 764 idiag = a->idiag; 765 766 if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) { 767 for (i=0; i<m; i++) { 768 mdiag[i] = val[diag[i]]; 769 if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */ 770 if (PetscRealPart(fshift)) { 771 ierr = PetscInfo1(A,"Zero diagonal on row %D\n",i);CHKERRQ(ierr); 772 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 773 A->factorerror_zeropivot_value = 0.0; 774 A->factorerror_zeropivot_row = i; 775 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i); 776 } 777 idiag[i] = 1.0/val[diag[i]]; 778 } 779 ierr = PetscLogFlops(m);CHKERRQ(ierr); 780 } else { 781 for (i=0; i<m; i++) { 782 mdiag[i] = val[diag[i]]; 783 idiag[i] = omega/(fshift + val[diag[i]]); 784 } 785 ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr); 786 } 787 a->idiagvalid = PETSC_TRUE; 788 PetscFunctionReturn(0); 789 } 790 791 PetscErrorCode MatZeroEntries_SeqSELL(Mat A) 792 { 793 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 794 PetscErrorCode ierr; 795 796 PetscFunctionBegin; 797 ierr = PetscMemzero(a->val,(a->sliidx[a->totalslices])*sizeof(PetscScalar));CHKERRQ(ierr); 798 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 799 PetscFunctionReturn(0); 800 } 801 802 PetscErrorCode MatDestroy_SeqSELL(Mat A) 803 { 804 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 805 PetscErrorCode ierr; 806 807 PetscFunctionBegin; 808 #if defined(PETSC_USE_LOG) 809 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz); 810 #endif 811 ierr = MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);CHKERRQ(ierr); 812 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 813 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 814 ierr = PetscFree(a->diag);CHKERRQ(ierr); 815 ierr = PetscFree(a->ibdiag);CHKERRQ(ierr); 816 ierr = PetscFree(a->rlen);CHKERRQ(ierr); 817 ierr = PetscFree(a->sliidx);CHKERRQ(ierr); 818 ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr); 819 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 820 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 821 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 822 823 ierr = PetscFree(A->data);CHKERRQ(ierr); 824 825 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 826 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 827 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 828 #if defined(PETSC_HAVE_ELEMENTAL) 829 #endif 830 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);CHKERRQ(ierr); 831 PetscFunctionReturn(0); 832 } 833 834 PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg) 835 { 836 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 837 PetscErrorCode ierr; 838 839 PetscFunctionBegin; 840 switch (op) { 841 case MAT_ROW_ORIENTED: 842 a->roworiented = flg; 843 break; 844 case MAT_KEEP_NONZERO_PATTERN: 845 a->keepnonzeropattern = flg; 846 break; 847 case MAT_NEW_NONZERO_LOCATIONS: 848 a->nonew = (flg ? 0 : 1); 849 break; 850 case MAT_NEW_NONZERO_LOCATION_ERR: 851 a->nonew = (flg ? -1 : 0); 852 break; 853 case MAT_NEW_NONZERO_ALLOCATION_ERR: 854 a->nonew = (flg ? -2 : 0); 855 break; 856 case MAT_UNUSED_NONZERO_LOCATION_ERR: 857 a->nounused = (flg ? -1 : 0); 858 break; 859 case MAT_NEW_DIAGONALS: 860 case MAT_IGNORE_OFF_PROC_ENTRIES: 861 case MAT_USE_HASH_TABLE: 862 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 863 break; 864 case MAT_SPD: 865 case MAT_SYMMETRIC: 866 case MAT_STRUCTURALLY_SYMMETRIC: 867 case MAT_HERMITIAN: 868 case MAT_SYMMETRY_ETERNAL: 869 /* These options are handled directly by MatSetOption() */ 870 break; 871 default: 872 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 873 } 874 PetscFunctionReturn(0); 875 } 876 877 PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v) 878 { 879 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 880 PetscInt i,j,n,shift; 881 PetscScalar *x,zero=0.0; 882 PetscErrorCode ierr; 883 884 PetscFunctionBegin; 885 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 886 if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 887 888 if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) { 889 PetscInt *diag=a->diag; 890 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 891 for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]]; 892 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 893 PetscFunctionReturn(0); 894 } 895 896 ierr = VecSet(v,zero);CHKERRQ(ierr); 897 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 898 for (i=0; i<n; i++) { /* loop over rows */ 899 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 900 x[i] = 0; 901 for (j=0; j<a->rlen[i]; j++) { 902 if (a->colidx[shift+j*8] == i) { 903 x[i] = a->val[shift+j*8]; 904 break; 905 } 906 } 907 } 908 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 909 PetscFunctionReturn(0); 910 } 911 912 PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr) 913 { 914 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 915 const PetscScalar *l,*r; 916 PetscInt i,j,m,n,row; 917 PetscErrorCode ierr; 918 919 PetscFunctionBegin; 920 if (ll) { 921 /* The local size is used so that VecMPI can be passed to this routine 922 by MatDiagonalScale_MPISELL */ 923 ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr); 924 if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 925 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 926 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 927 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 928 a->val[j] *= l[8*i+row]; 929 } 930 } 931 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 932 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 933 } 934 if (rr) { 935 ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr); 936 if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 937 ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr); 938 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 939 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) { 940 a->val[j] *= r[a->colidx[j]]; 941 } 942 } 943 ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr); 944 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 945 } 946 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 947 PetscFunctionReturn(0); 948 } 949 950 extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 951 952 PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 953 { 954 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 955 PetscInt *cp,i,k,low,high,t,row,col,l; 956 PetscInt shift; 957 MatScalar *vp; 958 959 PetscFunctionBegin; 960 for (k=0; k<m; k++) { /* loop over added rows */ 961 row = im[k]; 962 if (row<0) continue; 963 #if defined(PETSC_USE_DEBUG) 964 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 965 #endif 966 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 967 cp = a->colidx+shift; /* pointer to the row */ 968 vp = a->val+shift; /* pointer to the row */ 969 for (l=0; l<n; l++) { /* loop over added rows */ 970 col = in[l]; 971 if (col<0) continue; 972 #if defined(PETSC_USE_DEBUG) 973 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1); 974 #endif 975 high = a->rlen[row]; low = 0; /* assume unsorted */ 976 while (high-low > 5) { 977 t = (low+high)/2; 978 if (*(cp+t*8) > col) high = t; 979 else low = t; 980 } 981 for (i=low; i<high; i++) { 982 if (*(cp+8*i) > col) break; 983 if (*(cp+8*i) == col) { 984 *v++ = *(vp+8*i); 985 goto finished; 986 } 987 } 988 *v++ = 0.0; 989 finished:; 990 } 991 } 992 PetscFunctionReturn(0); 993 } 994 995 PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer) 996 { 997 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 998 PetscInt i,j,m=A->rmap->n,shift; 999 const char *name; 1000 PetscViewerFormat format; 1001 PetscErrorCode ierr; 1002 1003 PetscFunctionBegin; 1004 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1005 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1006 PetscInt nofinalvalue = 0; 1007 /* 1008 if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) { 1009 nofinalvalue = 1; 1010 } 1011 */ 1012 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1013 ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);CHKERRQ(ierr); 1014 ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);CHKERRQ(ierr); 1015 #if defined(PETSC_USE_COMPLEX) 1016 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 1017 #else 1018 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 1019 #endif 1020 ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr); 1021 1022 for (i=0; i<m; i++) { 1023 shift = a->sliidx[i>>3]+(i&0x07); 1024 for (j=0; j<a->rlen[i]; j++) { 1025 #if defined(PETSC_USE_COMPLEX) 1026 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1027 #else 1028 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1029 #endif 1030 } 1031 } 1032 /* 1033 if (nofinalvalue) { 1034 #if defined(PETSC_USE_COMPLEX) 1035 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr); 1036 #else 1037 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr); 1038 #endif 1039 } 1040 */ 1041 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 1042 ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr); 1043 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1044 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 1045 PetscFunctionReturn(0); 1046 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1047 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1048 for (i=0; i<m; i++) { 1049 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1050 shift = a->sliidx[i>>3]+(i&0x07); 1051 for (j=0; j<a->rlen[i]; j++) { 1052 #if defined(PETSC_USE_COMPLEX) 1053 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1054 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1055 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1056 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1057 } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) { 1058 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1059 } 1060 #else 1061 if (a->val[shift+8*j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);} 1062 #endif 1063 } 1064 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1065 } 1066 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1067 } else if (format == PETSC_VIEWER_ASCII_DENSE) { 1068 PetscInt cnt=0,jcnt; 1069 PetscScalar value; 1070 #if defined(PETSC_USE_COMPLEX) 1071 PetscBool realonly=PETSC_TRUE; 1072 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1073 if (PetscImaginaryPart(a->val[i]) != 0.0) { 1074 realonly = PETSC_FALSE; 1075 break; 1076 } 1077 } 1078 #endif 1079 1080 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1081 for (i=0; i<m; i++) { 1082 jcnt = 0; 1083 shift = a->sliidx[i>>3]+(i&0x07); 1084 for (j=0; j<A->cmap->n; j++) { 1085 if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) { 1086 value = a->val[cnt++]; 1087 jcnt++; 1088 } else { 1089 value = 0.0; 1090 } 1091 #if defined(PETSC_USE_COMPLEX) 1092 if (realonly) { 1093 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr); 1094 } else { 1095 ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr); 1096 } 1097 #else 1098 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr); 1099 #endif 1100 } 1101 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1102 } 1103 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1104 } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) { 1105 PetscInt fshift=1; 1106 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1107 #if defined(PETSC_USE_COMPLEX) 1108 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr); 1109 #else 1110 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr); 1111 #endif 1112 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);CHKERRQ(ierr); 1113 for (i=0; i<m; i++) { 1114 shift = a->sliidx[i>>3]+(i&0x07); 1115 for (j=0; j<a->rlen[i]; j++) { 1116 #if defined(PETSC_USE_COMPLEX) 1117 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1118 #else 1119 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1120 #endif 1121 } 1122 } 1123 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1124 } else { 1125 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1126 if (A->factortype) { 1127 for (i=0; i<m; i++) { 1128 shift = a->sliidx[i>>3]+(i&0x07); 1129 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1130 /* L part */ 1131 for (j=shift; j<a->diag[i]; j+=8) { 1132 #if defined(PETSC_USE_COMPLEX) 1133 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) { 1134 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1135 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) { 1136 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1137 } else { 1138 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1139 } 1140 #else 1141 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1142 #endif 1143 } 1144 /* diagonal */ 1145 j = a->diag[i]; 1146 #if defined(PETSC_USE_COMPLEX) 1147 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1148 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));CHKERRQ(ierr); 1149 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1150 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));CHKERRQ(ierr); 1151 } else { 1152 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));CHKERRQ(ierr); 1153 } 1154 #else 1155 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));CHKERRQ(ierr); 1156 #endif 1157 1158 /* U part */ 1159 for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) { 1160 #if defined(PETSC_USE_COMPLEX) 1161 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1162 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1163 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1164 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1165 } else { 1166 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1167 } 1168 #else 1169 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1170 #endif 1171 } 1172 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1173 } 1174 } else { 1175 for (i=0; i<m; i++) { 1176 shift = a->sliidx[i>>3]+(i&0x07); 1177 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1178 for (j=0; j<a->rlen[i]; j++) { 1179 #if defined(PETSC_USE_COMPLEX) 1180 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1181 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1182 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1183 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1184 } else { 1185 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1186 } 1187 #else 1188 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr); 1189 #endif 1190 } 1191 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1192 } 1193 } 1194 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1195 } 1196 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1197 PetscFunctionReturn(0); 1198 } 1199 1200 #include <petscdraw.h> 1201 PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa) 1202 { 1203 Mat A=(Mat)Aa; 1204 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1205 PetscInt i,j,m=A->rmap->n,shift; 1206 int color; 1207 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1208 PetscViewer viewer; 1209 PetscViewerFormat format; 1210 PetscErrorCode ierr; 1211 1212 PetscFunctionBegin; 1213 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1214 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1215 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1216 1217 /* loop over matrix elements drawing boxes */ 1218 1219 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1220 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1221 /* Blue for negative, Cyan for zero and Red for positive */ 1222 color = PETSC_DRAW_BLUE; 1223 for (i=0; i<m; i++) { 1224 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1225 y_l = m - i - 1.0; y_r = y_l + 1.0; 1226 for (j=0; j<a->rlen[i]; j++) { 1227 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1228 if (PetscRealPart(a->val[shift+8*j]) >= 0.) continue; 1229 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1230 } 1231 } 1232 color = PETSC_DRAW_CYAN; 1233 for (i=0; i<m; i++) { 1234 shift = a->sliidx[i>>3]+(i&0x07); 1235 y_l = m - i - 1.0; y_r = y_l + 1.0; 1236 for (j=0; j<a->rlen[i]; j++) { 1237 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1238 if (a->val[shift+8*j] != 0.) continue; 1239 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1240 } 1241 } 1242 color = PETSC_DRAW_RED; 1243 for (i=0; i<m; i++) { 1244 shift = a->sliidx[i>>3]+(i&0x07); 1245 y_l = m - i - 1.0; y_r = y_l + 1.0; 1246 for (j=0; j<a->rlen[i]; j++) { 1247 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1248 if (PetscRealPart(a->val[shift+8*j]) <= 0.) continue; 1249 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1250 } 1251 } 1252 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1253 } else { 1254 /* use contour shading to indicate magnitude of values */ 1255 /* first determine max of all nonzero values */ 1256 PetscReal minv=0.0,maxv=0.0; 1257 PetscInt count=0; 1258 PetscDraw popup; 1259 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1260 if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]); 1261 } 1262 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1263 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1264 ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr); 1265 1266 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1267 for (i=0; i<m; i++) { 1268 shift = a->sliidx[i>>3]+(i&0x07); 1269 y_l = m - i - 1.0; 1270 y_r = y_l + 1.0; 1271 for (j=0; j<a->rlen[i]; j++) { 1272 x_l = a->colidx[shift+j*8]; 1273 x_r = x_l + 1.0; 1274 color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv); 1275 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1276 count++; 1277 } 1278 } 1279 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1280 } 1281 PetscFunctionReturn(0); 1282 } 1283 1284 #include <petscdraw.h> 1285 PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer) 1286 { 1287 PetscDraw draw; 1288 PetscReal xr,yr,xl,yl,h,w; 1289 PetscBool isnull; 1290 PetscErrorCode ierr; 1291 1292 PetscFunctionBegin; 1293 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1294 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1295 if (isnull) PetscFunctionReturn(0); 1296 1297 xr = A->cmap->n; yr = A->rmap->n; h = yr/10.0; w = xr/10.0; 1298 xr += w; yr += h; xl = -w; yl = -h; 1299 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1300 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1301 ierr = PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);CHKERRQ(ierr); 1302 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1303 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer) 1308 { 1309 PetscBool iascii,isbinary,isdraw; 1310 PetscErrorCode ierr; 1311 1312 PetscFunctionBegin; 1313 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1314 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1315 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1316 if (iascii) { 1317 ierr = MatView_SeqSELL_ASCII(A,viewer);CHKERRQ(ierr); 1318 } else if (isbinary) { 1319 /* ierr = MatView_SeqSELL_Binary(A,viewer);CHKERRQ(ierr); */ 1320 } else if (isdraw) { 1321 ierr = MatView_SeqSELL_Draw(A,viewer);CHKERRQ(ierr); 1322 } 1323 PetscFunctionReturn(0); 1324 } 1325 1326 PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode) 1327 { 1328 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1329 PetscInt i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k; 1330 MatScalar *vp; 1331 PetscErrorCode ierr; 1332 1333 PetscFunctionBegin; 1334 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1335 /* To do: compress out the unused elements */ 1336 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 1337 ierr = PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);CHKERRQ(ierr); 1338 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr); 1339 ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);CHKERRQ(ierr); 1340 /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */ 1341 for (i=0; i<a->totalslices; ++i) { 1342 shift = a->sliidx[i]; /* starting index of the slice */ 1343 cp = a->colidx+shift; /* pointer to the column indices of the slice */ 1344 vp = a->val+shift; /* pointer to the nonzero values of the slice */ 1345 for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */ 1346 row = 8*i + row_in_slice; 1347 nrow = a->rlen[row]; /* number of nonzeros in row */ 1348 /* 1349 Search for the nearest nonzero. Normally setting the index to zero may cause extra communication. 1350 But if the entire slice are empty, it is fine to use 0 since the index will not be loaded. 1351 */ 1352 lastcol = 0; 1353 if (nrow>0) { /* nonempty row */ 1354 lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */ 1355 } else if (!row_in_slice) { /* first row of the currect slice is empty */ 1356 for (j=1;j<8;j++) { 1357 if (a->rlen[8*i+j]) { 1358 lastcol = cp[j]; 1359 break; 1360 } 1361 } 1362 } else { 1363 if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */ 1364 } 1365 1366 for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) { 1367 cp[8*k+row_in_slice] = lastcol; 1368 vp[8*k+row_in_slice] = (MatScalar)0; 1369 } 1370 } 1371 } 1372 1373 A->info.mallocs += a->reallocs; 1374 a->reallocs = 0; 1375 1376 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1377 PetscFunctionReturn(0); 1378 } 1379 1380 PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info) 1381 { 1382 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1383 1384 PetscFunctionBegin; 1385 info->block_size = 1.0; 1386 info->nz_allocated = (double)a->maxallocmat; 1387 info->nz_used = (double)a->sliidx[a->totalslices]; /* include padding zeros */ 1388 info->nz_unneeded = (double)(a->maxallocmat-a->sliidx[a->totalslices]); 1389 info->assemblies = (double)A->num_ass; 1390 info->mallocs = (double)A->info.mallocs; 1391 info->memory = ((PetscObject)A)->mem; 1392 if (A->factortype) { 1393 info->fill_ratio_given = A->info.fill_ratio_given; 1394 info->fill_ratio_needed = A->info.fill_ratio_needed; 1395 info->factor_mallocs = A->info.factor_mallocs; 1396 } else { 1397 info->fill_ratio_given = 0; 1398 info->fill_ratio_needed = 0; 1399 info->factor_mallocs = 0; 1400 } 1401 PetscFunctionReturn(0); 1402 } 1403 1404 PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1405 { 1406 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1407 PetscInt shift,i,k,l,low,high,t,ii,row,col,nrow; 1408 PetscInt *cp,nonew=a->nonew,lastcol=-1; 1409 MatScalar *vp,value; 1410 PetscErrorCode ierr; 1411 1412 PetscFunctionBegin; 1413 for (k=0; k<m; k++) { /* loop over added rows */ 1414 row = im[k]; 1415 if (row < 0) continue; 1416 #if defined(PETSC_USE_DEBUG) 1417 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 1418 #endif 1419 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1420 cp = a->colidx+shift; /* pointer to the row */ 1421 vp = a->val+shift; /* pointer to the row */ 1422 nrow = a->rlen[row]; 1423 low = 0; 1424 high = nrow; 1425 1426 for (l=0; l<n; l++) { /* loop over added columns */ 1427 col = in[l]; 1428 if (col<0) continue; 1429 #if defined(PETSC_USE_DEBUG) 1430 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1); 1431 #endif 1432 if (a->roworiented) { 1433 value = v[l+k*n]; 1434 } else { 1435 value = v[k+l*m]; 1436 } 1437 if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue; 1438 1439 /* search in this row for the specified colmun, i indicates the column to be set */ 1440 if (col <= lastcol) low = 0; 1441 else high = nrow; 1442 lastcol = col; 1443 while (high-low > 5) { 1444 t = (low+high)/2; 1445 if (*(cp+t*8) > col) high = t; 1446 else low = t; 1447 } 1448 for (i=low; i<high; i++) { 1449 if (*(cp+i*8) > col) break; 1450 if (*(cp+i*8) == col) { 1451 if (is == ADD_VALUES) *(vp+i*8) += value; 1452 else *(vp+i*8) = value; 1453 low = i + 1; 1454 goto noinsert; 1455 } 1456 } 1457 if (value == 0.0 && a->ignorezeroentries) goto noinsert; 1458 if (nonew == 1) goto noinsert; 1459 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1460 /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */ 1461 MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar); 1462 /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */ 1463 for (ii=nrow-1; ii>=i; ii--) { 1464 *(cp+(ii+1)*8) = *(cp+ii*8); 1465 *(vp+(ii+1)*8) = *(vp+ii*8); 1466 } 1467 a->rlen[row]++; 1468 *(cp+i*8) = col; 1469 *(vp+i*8) = value; 1470 a->nz++; 1471 A->nonzerostate++; 1472 low = i+1; high++; nrow++; 1473 noinsert:; 1474 } 1475 a->rlen[row] = nrow; 1476 } 1477 PetscFunctionReturn(0); 1478 } 1479 1480 PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str) 1481 { 1482 PetscErrorCode ierr; 1483 1484 PetscFunctionBegin; 1485 /* If the two matrices have the same copy implementation, use fast copy. */ 1486 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1487 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1488 Mat_SeqSELL *b=(Mat_SeqSELL*)B->data; 1489 1490 if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1491 ierr = PetscMemcpy(b->val,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1492 } else { 1493 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1494 } 1495 PetscFunctionReturn(0); 1496 } 1497 1498 PetscErrorCode MatSetUp_SeqSELL(Mat A) 1499 { 1500 PetscErrorCode ierr; 1501 1502 PetscFunctionBegin; 1503 ierr = MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,0);CHKERRQ(ierr); 1504 PetscFunctionReturn(0); 1505 } 1506 1507 PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[]) 1508 { 1509 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1510 1511 PetscFunctionBegin; 1512 *array = a->val; 1513 PetscFunctionReturn(0); 1514 } 1515 1516 PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[]) 1517 { 1518 PetscFunctionBegin; 1519 PetscFunctionReturn(0); 1520 } 1521 1522 PetscErrorCode MatRealPart_SeqSELL(Mat A) 1523 { 1524 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1525 PetscInt i; 1526 MatScalar *aval=a->val; 1527 1528 PetscFunctionBegin; 1529 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]); 1530 PetscFunctionReturn(0); 1531 } 1532 1533 PetscErrorCode MatImaginaryPart_SeqSELL(Mat A) 1534 { 1535 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1536 PetscInt i; 1537 MatScalar *aval=a->val; 1538 PetscErrorCode ierr; 1539 1540 PetscFunctionBegin; 1541 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]); 1542 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1543 PetscFunctionReturn(0); 1544 } 1545 1546 PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha) 1547 { 1548 Mat_SeqSELL *a=(Mat_SeqSELL*)inA->data; 1549 MatScalar *aval=a->val; 1550 PetscScalar oalpha=alpha; 1551 PetscBLASInt one=1,size; 1552 PetscErrorCode ierr; 1553 1554 PetscFunctionBegin; 1555 ierr = PetscBLASIntCast(a->sliidx[a->totalslices],&size);CHKERRQ(ierr); 1556 PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one)); 1557 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1558 ierr = MatSeqSELLInvalidateDiagonal(inA);CHKERRQ(ierr); 1559 PetscFunctionReturn(0); 1560 } 1561 1562 PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a) 1563 { 1564 Mat_SeqSELL *y=(Mat_SeqSELL*)Y->data; 1565 PetscErrorCode ierr; 1566 1567 PetscFunctionBegin; 1568 if (!Y->preallocated || !y->nz) { 1569 ierr = MatSeqSELLSetPreallocation(Y,1,NULL);CHKERRQ(ierr); 1570 } 1571 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1572 PetscFunctionReturn(0); 1573 } 1574 1575 PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1576 { 1577 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1578 PetscScalar *x,sum,*t; 1579 const MatScalar *idiag=0,*mdiag; 1580 const PetscScalar *b,*xb; 1581 PetscInt n,m=A->rmap->n,i,j,shift; 1582 const PetscInt *diag; 1583 PetscErrorCode ierr; 1584 1585 PetscFunctionBegin; 1586 its = its*lits; 1587 1588 if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */ 1589 if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqSELL(A,omega,fshift);CHKERRQ(ierr);} 1590 a->fshift = fshift; 1591 a->omega = omega; 1592 1593 diag = a->diag; 1594 t = a->ssor_work; 1595 idiag = a->idiag; 1596 mdiag = a->mdiag; 1597 1598 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1599 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1600 /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */ 1601 if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented"); 1602 if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented"); 1603 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 1604 1605 if (flag & SOR_ZERO_INITIAL_GUESS) { 1606 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1607 for (i=0; i<m; i++) { 1608 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1609 sum = b[i]; 1610 n = (diag[i]-shift)/8; 1611 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1612 t[i] = sum; 1613 x[i] = sum*idiag[i]; 1614 } 1615 xb = t; 1616 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1617 } else xb = b; 1618 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1619 for (i=m-1; i>=0; i--) { 1620 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1621 sum = xb[i]; 1622 n = a->rlen[i]-(diag[i]-shift)/8-1; 1623 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1624 if (xb == b) { 1625 x[i] = sum*idiag[i]; 1626 } else { 1627 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1628 } 1629 } 1630 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1631 } 1632 its--; 1633 } 1634 while (its--) { 1635 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1636 for (i=0; i<m; i++) { 1637 /* lower */ 1638 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1639 sum = b[i]; 1640 n = (diag[i]-shift)/8; 1641 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1642 t[i] = sum; /* save application of the lower-triangular part */ 1643 /* upper */ 1644 n = a->rlen[i]-(diag[i]-shift)/8-1; 1645 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1646 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1647 } 1648 xb = t; 1649 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1650 } else xb = b; 1651 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1652 for (i=m-1; i>=0; i--) { 1653 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1654 sum = xb[i]; 1655 if (xb == b) { 1656 /* whole matrix (no checkpointing available) */ 1657 n = a->rlen[i]; 1658 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1659 x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i]; 1660 } else { /* lower-triangular part has been saved, so only apply upper-triangular */ 1661 n = a->rlen[i]-(diag[i]-shift)/8-1; 1662 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1663 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1664 } 1665 } 1666 if (xb == b) { 1667 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1668 } else { 1669 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1670 } 1671 } 1672 } 1673 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1674 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1675 PetscFunctionReturn(0); 1676 } 1677 1678 /* -------------------------------------------------------------------*/ 1679 static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL, 1680 0, 1681 0, 1682 MatMult_SeqSELL, 1683 /* 4*/ MatMultAdd_SeqSELL, 1684 MatMultTranspose_SeqSELL, 1685 MatMultTransposeAdd_SeqSELL, 1686 0, 1687 0, 1688 0, 1689 /* 10*/ 0, 1690 0, 1691 0, 1692 MatSOR_SeqSELL, 1693 0, 1694 /* 15*/ MatGetInfo_SeqSELL, 1695 MatEqual_SeqSELL, 1696 MatGetDiagonal_SeqSELL, 1697 MatDiagonalScale_SeqSELL, 1698 0, 1699 /* 20*/ 0, 1700 MatAssemblyEnd_SeqSELL, 1701 MatSetOption_SeqSELL, 1702 MatZeroEntries_SeqSELL, 1703 /* 24*/ 0, 1704 0, 1705 0, 1706 0, 1707 0, 1708 /* 29*/ MatSetUp_SeqSELL, 1709 0, 1710 0, 1711 0, 1712 0, 1713 /* 34*/ MatDuplicate_SeqSELL, 1714 0, 1715 0, 1716 0, 1717 0, 1718 /* 39*/ 0, 1719 0, 1720 0, 1721 MatGetValues_SeqSELL, 1722 MatCopy_SeqSELL, 1723 /* 44*/ 0, 1724 MatScale_SeqSELL, 1725 MatShift_SeqSELL, 1726 0, 1727 0, 1728 /* 49*/ 0, 1729 0, 1730 0, 1731 0, 1732 0, 1733 /* 54*/ MatFDColoringCreate_SeqXAIJ, 1734 0, 1735 0, 1736 0, 1737 0, 1738 /* 59*/ 0, 1739 MatDestroy_SeqSELL, 1740 MatView_SeqSELL, 1741 0, 1742 0, 1743 /* 64*/ 0, 1744 0, 1745 0, 1746 0, 1747 0, 1748 /* 69*/ 0, 1749 0, 1750 0, 1751 0, 1752 0, 1753 /* 74*/ 0, 1754 MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */ 1755 0, 1756 0, 1757 0, 1758 /* 79*/ 0, 1759 0, 1760 0, 1761 0, 1762 0, 1763 /* 84*/ 0, 1764 0, 1765 0, 1766 0, 1767 0, 1768 /* 89*/ 0, 1769 0, 1770 0, 1771 0, 1772 0, 1773 /* 94*/ 0, 1774 0, 1775 0, 1776 0, 1777 0, 1778 /* 99*/ 0, 1779 0, 1780 0, 1781 MatConjugate_SeqSELL, 1782 0, 1783 /*104*/ 0, 1784 0, 1785 0, 1786 0, 1787 0, 1788 /*109*/ 0, 1789 0, 1790 0, 1791 0, 1792 MatMissingDiagonal_SeqSELL, 1793 /*114*/ 0, 1794 0, 1795 0, 1796 0, 1797 0, 1798 /*119*/ 0, 1799 0, 1800 0, 1801 0, 1802 0, 1803 /*124*/ 0, 1804 0, 1805 0, 1806 0, 1807 0, 1808 /*129*/ 0, 1809 0, 1810 0, 1811 0, 1812 0, 1813 /*134*/ 0, 1814 0, 1815 0, 1816 0, 1817 0, 1818 /*139*/ 0, 1819 0, 1820 0, 1821 MatFDColoringSetUp_SeqXAIJ, 1822 0, 1823 /*144*/0 1824 }; 1825 1826 PetscErrorCode MatStoreValues_SeqSELL(Mat mat) 1827 { 1828 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1829 PetscErrorCode ierr; 1830 1831 PetscFunctionBegin; 1832 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1833 1834 /* allocate space for values if not already there */ 1835 if (!a->saved_values) { 1836 ierr = PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);CHKERRQ(ierr); 1837 ierr = PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));CHKERRQ(ierr); 1838 } 1839 1840 /* copy values over */ 1841 ierr = PetscMemcpy(a->saved_values,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1842 PetscFunctionReturn(0); 1843 } 1844 1845 PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat) 1846 { 1847 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1848 PetscErrorCode ierr; 1849 1850 PetscFunctionBegin; 1851 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1852 if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1853 /* copy values over */ 1854 ierr = PetscMemcpy(a->val,a->saved_values,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1855 PetscFunctionReturn(0); 1856 } 1857 1858 /*@C 1859 MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray() 1860 1861 Not Collective 1862 1863 Input Parameters: 1864 . mat - a MATSEQSELL matrix 1865 . array - pointer to the data 1866 1867 Level: intermediate 1868 1869 .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90() 1870 @*/ 1871 PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array) 1872 { 1873 PetscErrorCode ierr; 1874 1875 PetscFunctionBegin; 1876 ierr = PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 1877 PetscFunctionReturn(0); 1878 } 1879 1880 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B) 1881 { 1882 Mat_SeqSELL *b; 1883 PetscMPIInt size; 1884 PetscErrorCode ierr; 1885 1886 PetscFunctionBegin; 1887 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 1888 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1"); 1889 1890 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1891 1892 B->data = (void*)b; 1893 1894 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1895 1896 b->row = 0; 1897 b->col = 0; 1898 b->icol = 0; 1899 b->reallocs = 0; 1900 b->ignorezeroentries = PETSC_FALSE; 1901 b->roworiented = PETSC_TRUE; 1902 b->nonew = 0; 1903 b->diag = 0; 1904 b->solve_work = 0; 1905 B->spptr = 0; 1906 b->saved_values = 0; 1907 b->idiag = 0; 1908 b->mdiag = 0; 1909 b->ssor_work = 0; 1910 b->omega = 1.0; 1911 b->fshift = 0.0; 1912 b->idiagvalid = PETSC_FALSE; 1913 b->ibdiagvalid = PETSC_FALSE; 1914 b->keepnonzeropattern = PETSC_FALSE; 1915 1916 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);CHKERRQ(ierr); 1917 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);CHKERRQ(ierr); 1918 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);CHKERRQ(ierr); 1919 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);CHKERRQ(ierr); 1920 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);CHKERRQ(ierr); 1921 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);CHKERRQ(ierr); 1922 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);CHKERRQ(ierr); 1923 PetscFunctionReturn(0); 1924 } 1925 1926 /* 1927 Given a matrix generated with MatGetFactor() duplicates all the information in A into B 1928 */ 1929 PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 1930 { 1931 Mat_SeqSELL *c,*a=(Mat_SeqSELL*)A->data; 1932 PetscInt i,m=A->rmap->n; 1933 PetscInt totalslices=a->totalslices; 1934 PetscErrorCode ierr; 1935 1936 PetscFunctionBegin; 1937 c = (Mat_SeqSELL*)C->data; 1938 1939 C->factortype = A->factortype; 1940 c->row = 0; 1941 c->col = 0; 1942 c->icol = 0; 1943 c->reallocs = 0; 1944 1945 C->assembled = PETSC_TRUE; 1946 1947 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 1948 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 1949 1950 ierr = PetscMalloc1(8*totalslices,&c->rlen);CHKERRQ(ierr); 1951 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 1952 ierr = PetscMalloc1(totalslices+1,&c->sliidx);CHKERRQ(ierr); 1953 ierr = PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 1954 1955 for (i=0; i<m; i++) c->rlen[i] = a->rlen[i]; 1956 for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i]; 1957 1958 /* allocate the matrix space */ 1959 if (mallocmatspace) { 1960 ierr = PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);CHKERRQ(ierr); 1961 ierr = PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 1962 1963 c->singlemalloc = PETSC_TRUE; 1964 1965 if (m > 0) { 1966 ierr = PetscMemcpy(c->colidx,a->colidx,(a->maxallocmat)*sizeof(PetscInt));CHKERRQ(ierr); 1967 if (cpvalues == MAT_COPY_VALUES) { 1968 ierr = PetscMemcpy(c->val,a->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 1969 } else { 1970 ierr = PetscMemzero(c->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 1971 } 1972 } 1973 } 1974 1975 c->ignorezeroentries = a->ignorezeroentries; 1976 c->roworiented = a->roworiented; 1977 c->nonew = a->nonew; 1978 if (a->diag) { 1979 ierr = PetscMalloc1(m,&c->diag);CHKERRQ(ierr); 1980 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 1981 for (i=0; i<m; i++) { 1982 c->diag[i] = a->diag[i]; 1983 } 1984 } else c->diag = 0; 1985 1986 c->solve_work = 0; 1987 c->saved_values = 0; 1988 c->idiag = 0; 1989 c->ssor_work = 0; 1990 c->keepnonzeropattern = a->keepnonzeropattern; 1991 c->free_val = PETSC_TRUE; 1992 c->free_colidx = PETSC_TRUE; 1993 1994 c->maxallocmat = a->maxallocmat; 1995 c->maxallocrow = a->maxallocrow; 1996 c->rlenmax = a->rlenmax; 1997 c->nz = a->nz; 1998 C->preallocated = PETSC_TRUE; 1999 2000 c->nonzerorowcnt = a->nonzerorowcnt; 2001 C->nonzerostate = A->nonzerostate; 2002 2003 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2004 PetscFunctionReturn(0); 2005 } 2006 2007 PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B) 2008 { 2009 PetscErrorCode ierr; 2010 2011 PetscFunctionBegin; 2012 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 2013 ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr); 2014 if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) { 2015 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 2016 } 2017 ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2018 ierr = MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 2019 PetscFunctionReturn(0); 2020 } 2021 2022 /*@C 2023 MatCreateSeqSELL - Creates a sparse matrix in SELL format. 2024 2025 Collective on MPI_Comm 2026 2027 Input Parameters: 2028 + comm - MPI communicator, set to PETSC_COMM_SELF 2029 . m - number of rows 2030 . n - number of columns 2031 . rlenmax - maximum number of nonzeros in a row 2032 - rlen - array containing the number of nonzeros in the various rows 2033 (possibly different for each row) or NULL 2034 2035 Output Parameter: 2036 . A - the matrix 2037 2038 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2039 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2040 [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation] 2041 2042 Notes: 2043 If nnz is given then nz is ignored 2044 2045 Specify the preallocated storage with either rlenmax or rlen (not both). 2046 Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory 2047 allocation. For large problems you MUST preallocate memory or you 2048 will get TERRIBLE performance, see the users' manual chapter on matrices. 2049 2050 Level: intermediate 2051 2052 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatCreateSeqSELLWithArrays() 2053 2054 @*/ 2055 PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A) 2056 { 2057 PetscErrorCode ierr; 2058 2059 PetscFunctionBegin; 2060 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2061 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2062 ierr = MatSetType(*A,MATSEQSELL);CHKERRQ(ierr); 2063 ierr = MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);CHKERRQ(ierr); 2064 PetscFunctionReturn(0); 2065 } 2066 2067 PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg) 2068 { 2069 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data; 2070 PetscInt totalslices=a->totalslices; 2071 PetscErrorCode ierr; 2072 2073 PetscFunctionBegin; 2074 /* If the matrix dimensions are not equal,or no of nonzeros */ 2075 if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) { 2076 *flg = PETSC_FALSE; 2077 PetscFunctionReturn(0); 2078 } 2079 /* if the a->colidx are the same */ 2080 ierr = PetscMemcmp(a->colidx,b->colidx,a->sliidx[totalslices]*sizeof(PetscInt),flg);CHKERRQ(ierr); 2081 if (!*flg) PetscFunctionReturn(0); 2082 /* if a->val are the same */ 2083 ierr = PetscMemcmp(a->val,b->val,a->sliidx[totalslices]*sizeof(PetscScalar),flg);CHKERRQ(ierr); 2084 PetscFunctionReturn(0); 2085 } 2086 2087 PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A) 2088 { 2089 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2090 2091 PetscFunctionBegin; 2092 a->idiagvalid = PETSC_FALSE; 2093 a->ibdiagvalid = PETSC_FALSE; 2094 PetscFunctionReturn(0); 2095 } 2096 2097 PetscErrorCode MatConjugate_SeqSELL(Mat A) 2098 { 2099 #if defined(PETSC_USE_COMPLEX) 2100 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2101 PetscInt i; 2102 PetscScalar *val = a->val; 2103 2104 PetscFunctionBegin; 2105 for (i=0; i<a->sliidx[a->totalslices]; i++) { 2106 val[i] = PetscConj(val[i]); 2107 } 2108 #else 2109 PetscFunctionBegin; 2110 #endif 2111 PetscFunctionReturn(0); 2112 } 2113