1 2 /* 3 Defines the basic matrix operations for the SELL matrix storage format. 4 */ 5 #include <../src/mat/impls/sell/seq/sell.h> /*I "petscmat.h" I*/ 6 #include <petscblaslapack.h> 7 #include <petsc/private/kernels/blocktranspose.h> 8 #if defined(PETSC_HAVE_IMMINTRIN_H) 9 #include <immintrin.h> 10 11 #if !defined(_MM_SCALE_8) 12 #define _MM_SCALE_8 8 13 #endif 14 15 #if defined(__AVX512F__) 16 /* these do not work 17 vec_idx = _mm512_loadunpackhi_epi32(vec_idx,acolidx); 18 vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval); 19 */ 20 #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 21 /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \ 22 vec_idx = _mm256_load_si256((__m256i const*)acolidx); \ 23 vec_vals = _mm512_load_pd(aval); \ 24 vec_x = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \ 25 vec_y = _mm512_fmadd_pd(vec_x,vec_vals,vec_y); 26 27 #endif 28 #endif /* PETSC_HAVE_IMMINTRIN_H */ 29 30 /*@C 31 MatSeqSELLSetPreallocation - For good matrix assembly performance 32 the user should preallocate the matrix storage by setting the parameter nz 33 (or the array nnz). By setting these parameters accurately, performance 34 during matrix assembly can be increased significantly. 35 36 Collective on MPI_Comm 37 38 Input Parameters: 39 + B - The matrix 40 . nz - number of nonzeros per row (same for all rows) 41 - nnz - array containing the number of nonzeros in the various rows 42 (possibly different for each row) or NULL 43 44 Notes: 45 If nnz is given then nz is ignored. 46 47 Specify the preallocated storage with either nz or nnz (not both). 48 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 49 allocation. For large problems you MUST preallocate memory or you 50 will get TERRIBLE performance, see the users' manual chapter on matrices. 51 52 You can call MatGetInfo() to get information on how effective the preallocation was; 53 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 54 You can also run with the option -info and look for messages with the string 55 malloc in them to see if additional memory allocation was needed. 56 57 Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix 58 entries or columns indices. 59 60 The maximum number of nonzeos in any row should be as accuate as possible. 61 If it is underesitmated, you will get bad performance due to reallocation 62 (MatSeqXSELLReallocateSELL). 63 64 Level: intermediate 65 66 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo() 67 68 @*/ 69 PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[]) 70 { 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 75 PetscValidType(B,1); 76 ierr = PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[]) 81 { 82 Mat_SeqSELL *b; 83 PetscInt i,j,totalslices; 84 PetscBool skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE; 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE; 89 if (maxallocrow == MAT_SKIP_ALLOCATION) { 90 skipallocation = PETSC_TRUE; 91 maxallocrow = 0; 92 } 93 94 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 95 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 96 97 /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */ 98 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5; 99 if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow); 100 if (rlen) { 101 for (i=0; i<B->rmap->n; i++) { 102 if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]); 103 if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n); 104 } 105 } 106 107 B->preallocated = PETSC_TRUE; 108 109 b = (Mat_SeqSELL*)B->data; 110 111 totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */ 112 b->totalslices = totalslices; 113 if (!skipallocation) { 114 if (B->rmap->n & 0x07) PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n); 115 116 if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */ 117 ierr = PetscMalloc1(totalslices+1,&b->sliidx);CHKERRQ(ierr); 118 ierr = PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 119 } 120 if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */ 121 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10; 122 else if (maxallocrow < 0) maxallocrow = 1; 123 for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow; 124 } else { 125 maxallocrow = 0; 126 b->sliidx[0] = 0; 127 for (i=1; i<totalslices; i++) { 128 b->sliidx[i] = 0; 129 for (j=0;j<8;j++) { 130 b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]); 131 } 132 maxallocrow = PetscMax(b->sliidx[i],maxallocrow); 133 b->sliidx[i] = b->sliidx[i-1] + 8*b->sliidx[i]; 134 } 135 /* last slice */ 136 b->sliidx[totalslices] = 0; 137 for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]); 138 maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow); 139 b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices]; 140 } 141 142 /* allocate space for val, colidx, rlen */ 143 /* FIXME: should B's old memory be unlogged? */ 144 ierr = MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);CHKERRQ(ierr); 145 /* FIXME: assuming an element of the bit array takes 8 bits */ 146 ierr = PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);CHKERRQ(ierr); 147 ierr = PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 148 /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */ 149 ierr = PetscCalloc1(8*totalslices,&b->rlen);CHKERRQ(ierr); 150 ierr = PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));CHKERRQ(ierr); 151 152 b->singlemalloc = PETSC_TRUE; 153 b->free_val = PETSC_TRUE; 154 b->free_colidx = PETSC_TRUE; 155 } else { 156 b->free_val = PETSC_FALSE; 157 b->free_colidx = PETSC_FALSE; 158 } 159 160 b->nz = 0; 161 b->maxallocrow = maxallocrow; 162 b->rlenmax = maxallocrow; 163 b->maxallocmat = b->sliidx[totalslices]; 164 B->info.nz_unneeded = (double)b->maxallocmat; 165 if (realalloc) { 166 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 167 } 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 172 { 173 Mat B; 174 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 175 PetscInt i,j,row; 176 PetscBool isnonzero; 177 PetscErrorCode ierr; 178 179 PetscFunctionBegin; 180 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 181 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 182 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 183 ierr = MatSeqAIJSetPreallocation(B,0,a->rlen);CHKERRQ(ierr); 184 ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 185 186 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 187 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 188 isnonzero = (PetscBool)((j-a->sliidx[i])/8 < a->rlen[8*i+row]); 189 if (isnonzero) { 190 ierr = MatSetValue(B,8*i+row,a->colidx[j],a->val[j],INSERT_VALUES);CHKERRQ(ierr); 191 } 192 } 193 } 194 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 195 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 196 B->rmap->bs = A->rmap->bs; 197 198 if (reuse == MAT_INPLACE_MATRIX) { 199 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 200 } else { 201 *newmat = B; 202 } 203 PetscFunctionReturn(0); 204 } 205 206 #include <../src/mat/impls/aij/seq/aij.h> 207 208 PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 209 { 210 Mat B; 211 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 212 PetscInt *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols; 213 const PetscInt *cols; 214 const PetscScalar *vals; 215 PetscErrorCode ierr; 216 217 PetscFunctionBegin; 218 if (A->rmap->bs > 1) { 219 ierr = MatConvert_Basic(A,newtype,reuse,newmat);CHKERRQ(ierr); 220 PetscFunctionReturn(0); 221 } 222 /* Can we just use ilen? */ 223 ierr = PetscMalloc1(m,&rowlengths);CHKERRQ(ierr); 224 for (i=0; i<m; i++) { 225 rowlengths[i] = ai[i+1] - ai[i]; 226 } 227 228 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 229 ierr = MatSetSizes(B,m,n,m,n);CHKERRQ(ierr); 230 ierr = MatSetType(B,MATSEQSELL);CHKERRQ(ierr); 231 ierr = MatSeqSELLSetPreallocation(B,0,rowlengths);CHKERRQ(ierr); 232 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 233 234 ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 235 236 for (row=0; row<m; row++) { 237 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 238 ierr = MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 239 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 240 } 241 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 242 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 243 B->rmap->bs = A->rmap->bs; 244 245 if (reuse == MAT_INPLACE_MATRIX) { 246 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 247 } else { 248 *newmat = B; 249 } 250 PetscFunctionReturn(0); 251 } 252 253 PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy) 254 { 255 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 256 PetscScalar *y; 257 const PetscScalar *x; 258 const MatScalar *aval=a->val; 259 PetscInt totalslices=a->totalslices; 260 const PetscInt *acolidx=a->colidx; 261 PetscInt i,j; 262 PetscErrorCode ierr; 263 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 264 __m512d vec_x,vec_y,vec_vals; 265 __m256i vec_idx; 266 __mmask8 mask; 267 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 268 __m256i vec_idx2,vec_idx3,vec_idx4; 269 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 270 __m128d vec_x_tmp; 271 __m256d vec_x,vec_y,vec_y2,vec_vals; 272 MatScalar yval; 273 PetscInt r,rows_left,row,nnz_in_row; 274 #else 275 PetscScalar sum[8]; 276 #endif 277 278 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 279 #pragma disjoint(*x,*y,*aval) 280 #endif 281 282 PetscFunctionBegin; 283 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 284 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 285 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 286 for (i=0; i<totalslices; i++) { /* loop over slices */ 287 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 288 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 289 290 vec_y = _mm512_setzero_pd(); 291 vec_y2 = _mm512_setzero_pd(); 292 vec_y3 = _mm512_setzero_pd(); 293 vec_y4 = _mm512_setzero_pd(); 294 295 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 296 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 297 case 3: 298 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 299 acolidx += 8; aval += 8; 300 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 301 acolidx += 8; aval += 8; 302 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 303 acolidx += 8; aval += 8; 304 j += 3; 305 break; 306 case 2: 307 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 308 acolidx += 8; aval += 8; 309 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 310 acolidx += 8; aval += 8; 311 j += 2; 312 break; 313 case 1: 314 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 315 acolidx += 8; aval += 8; 316 j += 1; 317 break; 318 } 319 #pragma novector 320 for (; j<(a->sliidx[i+1]>>3); j+=4) { 321 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 322 acolidx += 8; aval += 8; 323 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 324 acolidx += 8; aval += 8; 325 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 326 acolidx += 8; aval += 8; 327 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 328 acolidx += 8; aval += 8; 329 } 330 331 vec_y = _mm512_add_pd(vec_y,vec_y2); 332 vec_y = _mm512_add_pd(vec_y,vec_y3); 333 vec_y = _mm512_add_pd(vec_y,vec_y4); 334 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 335 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 336 _mm512_mask_store_pd(&y[8*i],mask,vec_y); 337 } else { 338 _mm512_store_pd(&y[8*i],vec_y); 339 } 340 } 341 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 342 for (i=0; i<totalslices; i++) { /* loop over full slices */ 343 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 344 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 345 346 vec_y = _mm256_setzero_pd(); 347 vec_y2 = _mm256_setzero_pd(); 348 349 /* last slice may have padding rows. Don't use vectorization. */ 350 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 351 rows_left = A->rmap->n - 8*i; 352 for (r=0; r<rows_left; ++r) { 353 yval = (MatScalar)0; 354 row = 8*i + r; 355 nnz_in_row = a->rlen[row]; 356 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]]; 357 y[row] = yval; 358 } 359 break; 360 } 361 362 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 363 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 364 vec_vals = _mm256_loadu_pd(aval); 365 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 366 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 367 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 368 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 369 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 370 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 371 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 372 aval += 4; 373 374 vec_vals = _mm256_loadu_pd(aval); 375 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 376 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 377 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 378 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 379 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 380 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 381 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 382 aval += 4; 383 } 384 385 _mm256_storeu_pd(y + i*8, vec_y); 386 _mm256_storeu_pd(y + i*8 + 4, vec_y2); 387 } 388 #else 389 for (i=0; i<totalslices; i++) { /* loop over slices */ 390 for (j=0; j<8; j++) sum[j] = 0.0; 391 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 392 sum[0] += aval[j] * x[acolidx[j]]; 393 sum[1] += aval[j+1] * x[acolidx[j+1]]; 394 sum[2] += aval[j+2] * x[acolidx[j+2]]; 395 sum[3] += aval[j+3] * x[acolidx[j+3]]; 396 sum[4] += aval[j+4] * x[acolidx[j+4]]; 397 sum[5] += aval[j+5] * x[acolidx[j+5]]; 398 sum[6] += aval[j+6] * x[acolidx[j+6]]; 399 sum[7] += aval[j+7] * x[acolidx[j+7]]; 400 } 401 if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */ 402 for(j=0; j<(A->rmap->n & 0x07); j++) y[8*i+j] = sum[j]; 403 } else { 404 for(j=0; j<8; j++) y[8*i+j] = sum[j]; 405 } 406 } 407 #endif 408 409 ierr = PetscLogFlops(2.0*a->nz-a->nonzerorowcnt);CHKERRQ(ierr); /* theoretical minimal FLOPs */ 410 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 411 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 412 PetscFunctionReturn(0); 413 } 414 415 #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h> 416 PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz) 417 { 418 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 419 PetscScalar *y,*z; 420 const PetscScalar *x; 421 const MatScalar *aval=a->val; 422 PetscInt totalslices=a->totalslices; 423 const PetscInt *acolidx=a->colidx; 424 PetscInt i,j; 425 PetscErrorCode ierr; 426 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 427 __m512d vec_x,vec_y,vec_vals; 428 __m256i vec_idx; 429 __mmask8 mask; 430 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 431 __m256i vec_idx2,vec_idx3,vec_idx4; 432 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 433 __m128d vec_x_tmp; 434 __m256d vec_x,vec_y,vec_y2,vec_vals; 435 MatScalar yval; 436 PetscInt r,row,nnz_in_row; 437 #else 438 PetscScalar sum[8]; 439 #endif 440 441 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 442 #pragma disjoint(*x,*y,*aval) 443 #endif 444 445 PetscFunctionBegin; 446 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 447 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 448 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 449 for (i=0; i<totalslices; i++) { /* loop over slices */ 450 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 451 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 452 453 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 454 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 455 vec_y = _mm512_mask_load_pd(vec_y,mask,&y[8*i]); 456 } else { 457 vec_y = _mm512_load_pd(&y[8*i]); 458 } 459 vec_y2 = _mm512_setzero_pd(); 460 vec_y3 = _mm512_setzero_pd(); 461 vec_y4 = _mm512_setzero_pd(); 462 463 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 464 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 465 case 3: 466 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 467 acolidx += 8; aval += 8; 468 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 469 acolidx += 8; aval += 8; 470 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 471 acolidx += 8; aval += 8; 472 j += 3; 473 break; 474 case 2: 475 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 476 acolidx += 8; aval += 8; 477 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 478 acolidx += 8; aval += 8; 479 j += 2; 480 break; 481 case 1: 482 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 483 acolidx += 8; aval += 8; 484 j += 1; 485 break; 486 } 487 #pragma novector 488 for (; j<(a->sliidx[i+1]>>3); j+=4) { 489 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 490 acolidx += 8; aval += 8; 491 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 492 acolidx += 8; aval += 8; 493 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 494 acolidx += 8; aval += 8; 495 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 496 acolidx += 8; aval += 8; 497 } 498 499 vec_y = _mm512_add_pd(vec_y,vec_y2); 500 vec_y = _mm512_add_pd(vec_y,vec_y3); 501 vec_y = _mm512_add_pd(vec_y,vec_y4); 502 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 503 _mm512_mask_store_pd(&z[8*i],mask,vec_y); 504 } else { 505 _mm512_store_pd(&z[8*i],vec_y); 506 } 507 } 508 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 509 for (i=0; i<totalslices; i++) { /* loop over full slices */ 510 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 511 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 512 513 /* last slice may have padding rows. Don't use vectorization. */ 514 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 515 for (r=0; r<(A->rmap->n & 0x07); ++r) { 516 row = 8*i + r; 517 yval = (MatScalar)0.0; 518 nnz_in_row = a->rlen[row]; 519 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 520 z[row] = y[row] + yval; 521 } 522 break; 523 } 524 525 vec_y = _mm256_loadu_pd(y+8*i); 526 vec_y2 = _mm256_loadu_pd(y+8*i+4); 527 528 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 529 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 530 vec_vals = _mm256_loadu_pd(aval); 531 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 532 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 533 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 534 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 535 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 536 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 537 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 538 aval += 4; 539 540 vec_vals = _mm256_loadu_pd(aval); 541 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 542 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 543 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 544 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 545 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 546 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 547 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 548 aval += 4; 549 } 550 551 _mm256_storeu_pd(z+i*8,vec_y); 552 _mm256_storeu_pd(z+i*8+4,vec_y2); 553 } 554 #else 555 for (i=0; i<totalslices; i++) { /* loop over slices */ 556 for (j=0; j<8; j++) sum[j] = 0.0; 557 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 558 sum[0] += aval[j] * x[acolidx[j]]; 559 sum[1] += aval[j+1] * x[acolidx[j+1]]; 560 sum[2] += aval[j+2] * x[acolidx[j+2]]; 561 sum[3] += aval[j+3] * x[acolidx[j+3]]; 562 sum[4] += aval[j+4] * x[acolidx[j+4]]; 563 sum[5] += aval[j+5] * x[acolidx[j+5]]; 564 sum[6] += aval[j+6] * x[acolidx[j+6]]; 565 sum[7] += aval[j+7] * x[acolidx[j+7]]; 566 } 567 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 568 for (j=0; j<(A->rmap->n & 0x07); j++) z[8*i+j] = y[8*i+j] + sum[j]; 569 } else { 570 for (j=0; j<8; j++) z[8*i+j] = y[8*i+j] + sum[j]; 571 } 572 } 573 #endif 574 575 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 576 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 577 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 578 PetscFunctionReturn(0); 579 } 580 581 PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy) 582 { 583 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 584 PetscScalar *y; 585 const PetscScalar *x; 586 const MatScalar *aval=a->val; 587 const PetscInt *acolidx=a->colidx; 588 PetscInt i,j,r,row,nnz_in_row,totalslices=a->totalslices; 589 PetscErrorCode ierr; 590 591 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 592 #pragma disjoint(*x,*y,*aval) 593 #endif 594 595 PetscFunctionBegin; 596 if (zz != yy) { ierr = VecCopy(zz,yy);CHKERRQ(ierr); } 597 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 598 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 599 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 600 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 601 for (r=0; r<(A->rmap->n & 0x07); ++r) { 602 row = 8*i + r; 603 nnz_in_row = a->rlen[row]; 604 for (j=0; j<nnz_in_row; ++j) y[acolidx[8*j+r]] += aval[8*j+r] * x[row]; 605 } 606 break; 607 } 608 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 609 y[acolidx[j]] += aval[j] * x[8*i]; 610 y[acolidx[j+1]] += aval[j+1] * x[8*i+1]; 611 y[acolidx[j+2]] += aval[j+2] * x[8*i+2]; 612 y[acolidx[j+3]] += aval[j+3] * x[8*i+3]; 613 y[acolidx[j+4]] += aval[j+4] * x[8*i+4]; 614 y[acolidx[j+5]] += aval[j+5] * x[8*i+5]; 615 y[acolidx[j+6]] += aval[j+6] * x[8*i+6]; 616 y[acolidx[j+7]] += aval[j+7] * x[8*i+7]; 617 } 618 } 619 ierr = PetscLogFlops(2.0*a->sliidx[a->totalslices]);CHKERRQ(ierr); 620 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 621 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 622 PetscFunctionReturn(0); 623 } 624 625 PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy) 626 { 627 PetscErrorCode ierr; 628 629 PetscFunctionBegin; 630 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 631 ierr = MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);CHKERRQ(ierr); 632 PetscFunctionReturn(0); 633 } 634 635 /* 636 Checks for missing diagonals 637 */ 638 PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool *missing,PetscInt *d) 639 { 640 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 641 PetscInt *diag,i; 642 643 PetscFunctionBegin; 644 *missing = PETSC_FALSE; 645 if (A->rmap->n > 0 && !(a->colidx)) { 646 *missing = PETSC_TRUE; 647 if (d) *d = 0; 648 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 649 } else { 650 diag = a->diag; 651 for (i=0; i<A->rmap->n; i++) { 652 if (diag[i] == -1) { 653 *missing = PETSC_TRUE; 654 if (d) *d = i; 655 PetscInfo1(A,"Matrix is missing diagonal number %D\n",i); 656 break; 657 } 658 } 659 } 660 PetscFunctionReturn(0); 661 } 662 663 PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A) 664 { 665 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 666 PetscInt i,j,m=A->rmap->n,shift; 667 PetscErrorCode ierr; 668 669 PetscFunctionBegin; 670 if (!a->diag) { 671 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 672 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 673 a->free_diag = PETSC_TRUE; 674 } 675 for (i=0; i<m; i++) { /* loop over rows */ 676 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 677 a->diag[i] = -1; 678 for (j=0; j<a->rlen[i]; j++) { 679 if (a->colidx[shift+j*8] == i) { 680 a->diag[i] = shift+j*8; 681 break; 682 } 683 } 684 } 685 PetscFunctionReturn(0); 686 } 687 688 /* 689 Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways 690 */ 691 PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift) 692 { 693 Mat_SeqSELL *a=(Mat_SeqSELL*) A->data; 694 PetscInt i,*diag,m = A->rmap->n; 695 MatScalar *val = a->val; 696 PetscScalar *idiag,*mdiag; 697 PetscErrorCode ierr; 698 699 PetscFunctionBegin; 700 if (a->idiagvalid) PetscFunctionReturn(0); 701 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 702 diag = a->diag; 703 if (!a->idiag) { 704 ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr); 705 ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr); 706 val = a->val; 707 } 708 mdiag = a->mdiag; 709 idiag = a->idiag; 710 711 if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) { 712 for (i=0; i<m; i++) { 713 mdiag[i] = val[diag[i]]; 714 if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */ 715 if (PetscRealPart(fshift)) { 716 ierr = PetscInfo1(A,"Zero diagonal on row %D\n",i);CHKERRQ(ierr); 717 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 718 A->factorerror_zeropivot_value = 0.0; 719 A->factorerror_zeropivot_row = i; 720 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i); 721 } 722 idiag[i] = 1.0/val[diag[i]]; 723 } 724 ierr = PetscLogFlops(m);CHKERRQ(ierr); 725 } else { 726 for (i=0; i<m; i++) { 727 mdiag[i] = val[diag[i]]; 728 idiag[i] = omega/(fshift + val[diag[i]]); 729 } 730 ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr); 731 } 732 a->idiagvalid = PETSC_TRUE; 733 PetscFunctionReturn(0); 734 } 735 736 PetscErrorCode MatZeroEntries_SeqSELL(Mat A) 737 { 738 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 739 PetscErrorCode ierr; 740 741 PetscFunctionBegin; 742 ierr = PetscMemzero(a->val,(a->sliidx[a->totalslices])*sizeof(PetscScalar));CHKERRQ(ierr); 743 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 744 PetscFunctionReturn(0); 745 } 746 747 PetscErrorCode MatDestroy_SeqSELL(Mat A) 748 { 749 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 750 PetscErrorCode ierr; 751 752 PetscFunctionBegin; 753 #if defined(PETSC_USE_LOG) 754 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz); 755 #endif 756 ierr = MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);CHKERRQ(ierr); 757 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 758 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 759 ierr = PetscFree(a->diag);CHKERRQ(ierr); 760 ierr = PetscFree(a->ibdiag);CHKERRQ(ierr); 761 ierr = PetscFree(a->rlen);CHKERRQ(ierr); 762 ierr = PetscFree(a->sliidx);CHKERRQ(ierr); 763 ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr); 764 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 765 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 766 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 767 768 ierr = PetscFree(A->data);CHKERRQ(ierr); 769 770 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 771 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 772 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 773 #if defined(PETSC_HAVE_ELEMENTAL) 774 #endif 775 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);CHKERRQ(ierr); 776 PetscFunctionReturn(0); 777 } 778 779 PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg) 780 { 781 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 782 PetscErrorCode ierr; 783 784 PetscFunctionBegin; 785 switch (op) { 786 case MAT_ROW_ORIENTED: 787 a->roworiented = flg; 788 break; 789 case MAT_KEEP_NONZERO_PATTERN: 790 a->keepnonzeropattern = flg; 791 break; 792 case MAT_NEW_NONZERO_LOCATIONS: 793 a->nonew = (flg ? 0 : 1); 794 break; 795 case MAT_NEW_NONZERO_LOCATION_ERR: 796 a->nonew = (flg ? -1 : 0); 797 break; 798 case MAT_NEW_NONZERO_ALLOCATION_ERR: 799 a->nonew = (flg ? -2 : 0); 800 break; 801 case MAT_UNUSED_NONZERO_LOCATION_ERR: 802 a->nounused = (flg ? -1 : 0); 803 break; 804 case MAT_NEW_DIAGONALS: 805 case MAT_IGNORE_OFF_PROC_ENTRIES: 806 case MAT_USE_HASH_TABLE: 807 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 808 break; 809 case MAT_SPD: 810 case MAT_SYMMETRIC: 811 case MAT_STRUCTURALLY_SYMMETRIC: 812 case MAT_HERMITIAN: 813 case MAT_SYMMETRY_ETERNAL: 814 /* These options are handled directly by MatSetOption() */ 815 break; 816 default: 817 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 818 } 819 PetscFunctionReturn(0); 820 } 821 822 PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v) 823 { 824 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 825 PetscInt i,j,n,shift; 826 PetscScalar *x,zero=0.0; 827 PetscErrorCode ierr; 828 829 PetscFunctionBegin; 830 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 831 if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 832 833 if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) { 834 PetscInt *diag=a->diag; 835 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 836 for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]]; 837 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 ierr = VecSet(v,zero);CHKERRQ(ierr); 842 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 843 for (i=0; i<n; i++) { /* loop over rows */ 844 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 845 x[i] = 0; 846 for (j=0; j<a->rlen[i]; j++) { 847 if (a->colidx[shift+j*8] == i) { 848 x[i] = a->val[shift+j*8]; 849 break; 850 } 851 } 852 } 853 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 854 PetscFunctionReturn(0); 855 } 856 857 PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr) 858 { 859 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 860 const PetscScalar *l,*r; 861 PetscInt i,j,m,n,row; 862 PetscErrorCode ierr; 863 864 PetscFunctionBegin; 865 if (ll) { 866 /* The local size is used so that VecMPI can be passed to this routine 867 by MatDiagonalScale_MPISELL */ 868 ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr); 869 if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 870 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 871 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 872 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 873 a->val[j] *= l[8*i+row]; 874 } 875 } 876 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 877 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 878 } 879 if (rr) { 880 ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr); 881 if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 882 ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr); 883 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 884 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) { 885 a->val[j] *= r[a->colidx[j]]; 886 } 887 } 888 ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr); 889 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 890 } 891 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 892 PetscFunctionReturn(0); 893 } 894 895 extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 896 897 PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 898 { 899 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 900 PetscInt *cp,i,k,low,high,t,row,col,l; 901 PetscInt shift; 902 MatScalar *vp; 903 904 PetscFunctionBegin; 905 for (k=0; k<m; k++) { /* loop over added rows */ 906 row = im[k]; 907 if (row<0) continue; 908 #if defined(PETSC_USE_DEBUG) 909 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 910 #endif 911 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 912 cp = a->colidx+shift; /* pointer to the row */ 913 vp = a->val+shift; /* pointer to the row */ 914 for (l=0; l<n; l++) { /* loop over added rows */ 915 col = in[l]; 916 if (col<0) continue; 917 #if defined(PETSC_USE_DEBUG) 918 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1); 919 #endif 920 high = a->rlen[row]; low = 0; /* assume unsorted */ 921 while (high-low > 5) { 922 t = (low+high)/2; 923 if (*(cp+t*8) > col) high = t; 924 else low = t; 925 } 926 for (i=low; i<high; i++) { 927 if (*(cp+8*i) > col) break; 928 if (*(cp+8*i) == col) { 929 *v++ = *(vp+8*i); 930 goto finished; 931 } 932 } 933 *v++ = 0.0; 934 finished:; 935 } 936 } 937 PetscFunctionReturn(0); 938 } 939 940 PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer) 941 { 942 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 943 PetscInt i,j,m=A->rmap->n,shift; 944 const char *name; 945 PetscViewerFormat format; 946 PetscErrorCode ierr; 947 948 PetscFunctionBegin; 949 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 950 if (format == PETSC_VIEWER_ASCII_MATLAB) { 951 PetscInt nofinalvalue = 0; 952 /* 953 if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) { 954 nofinalvalue = 1; 955 } 956 */ 957 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 958 ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);CHKERRQ(ierr); 959 ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);CHKERRQ(ierr); 960 #if defined(PETSC_USE_COMPLEX) 961 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 962 #else 963 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 964 #endif 965 ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr); 966 967 for (i=0; i<m; i++) { 968 shift = a->sliidx[i>>3]+(i&0x07); 969 for (j=0; j<a->rlen[i]; j++) { 970 #if defined(PETSC_USE_COMPLEX) 971 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 972 #else 973 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);CHKERRQ(ierr); 974 #endif 975 } 976 } 977 /* 978 if (nofinalvalue) { 979 #if defined(PETSC_USE_COMPLEX) 980 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr); 981 #else 982 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr); 983 #endif 984 } 985 */ 986 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 987 ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr); 988 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 989 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 990 PetscFunctionReturn(0); 991 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 992 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 993 for (i=0; i<m; i++) { 994 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 995 shift = a->sliidx[i>>3]+(i&0x07); 996 for (j=0; j<a->rlen[i]; j++) { 997 #if defined(PETSC_USE_COMPLEX) 998 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 999 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1000 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1001 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1002 } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) { 1003 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1004 } 1005 #else 1006 if (a->val[shift+8*j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);} 1007 #endif 1008 } 1009 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1010 } 1011 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1012 } else if (format == PETSC_VIEWER_ASCII_DENSE) { 1013 PetscInt cnt=0,jcnt; 1014 PetscScalar value; 1015 #if defined(PETSC_USE_COMPLEX) 1016 PetscBool realonly=PETSC_TRUE; 1017 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1018 if (PetscImaginaryPart(a->val[i]) != 0.0) { 1019 realonly = PETSC_FALSE; 1020 break; 1021 } 1022 } 1023 #endif 1024 1025 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1026 for (i=0; i<m; i++) { 1027 jcnt = 0; 1028 shift = a->sliidx[i>>3]+(i&0x07); 1029 for (j=0; j<A->cmap->n; j++) { 1030 if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) { 1031 value = a->val[cnt++]; 1032 jcnt++; 1033 } else { 1034 value = 0.0; 1035 } 1036 #if defined(PETSC_USE_COMPLEX) 1037 if (realonly) { 1038 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr); 1039 } else { 1040 ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr); 1041 } 1042 #else 1043 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr); 1044 #endif 1045 } 1046 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1047 } 1048 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1049 } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) { 1050 PetscInt fshift=1; 1051 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1052 #if defined(PETSC_USE_COMPLEX) 1053 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr); 1054 #else 1055 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr); 1056 #endif 1057 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);CHKERRQ(ierr); 1058 for (i=0; i<m; i++) { 1059 shift = a->sliidx[i>>3]+(i&0x07); 1060 for (j=0; j<a->rlen[i]; j++) { 1061 #if defined(PETSC_USE_COMPLEX) 1062 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1063 #else 1064 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1065 #endif 1066 } 1067 } 1068 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1069 } else { 1070 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1071 if (A->factortype) { 1072 for (i=0; i<m; i++) { 1073 shift = a->sliidx[i>>3]+(i&0x07); 1074 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1075 /* L part */ 1076 for (j=shift; j<a->diag[i]; j+=8) { 1077 #if defined(PETSC_USE_COMPLEX) 1078 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) { 1079 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1080 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) { 1081 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1082 } else { 1083 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1084 } 1085 #else 1086 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1087 #endif 1088 } 1089 /* diagonal */ 1090 j = a->diag[i]; 1091 #if defined(PETSC_USE_COMPLEX) 1092 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1093 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));CHKERRQ(ierr); 1094 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1095 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));CHKERRQ(ierr); 1096 } else { 1097 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));CHKERRQ(ierr); 1098 } 1099 #else 1100 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));CHKERRQ(ierr); 1101 #endif 1102 1103 /* U part */ 1104 for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) { 1105 #if defined(PETSC_USE_COMPLEX) 1106 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1107 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1108 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1109 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1110 } else { 1111 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1112 } 1113 #else 1114 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1115 #endif 1116 } 1117 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1118 } 1119 } else { 1120 for (i=0; i<m; i++) { 1121 shift = a->sliidx[i>>3]+(i&0x07); 1122 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1123 for (j=0; j<a->rlen[i]; j++) { 1124 #if defined(PETSC_USE_COMPLEX) 1125 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1126 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1127 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1128 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1129 } else { 1130 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1131 } 1132 #else 1133 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr); 1134 #endif 1135 } 1136 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1137 } 1138 } 1139 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1140 } 1141 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #include <petscdraw.h> 1146 PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa) 1147 { 1148 Mat A=(Mat)Aa; 1149 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1150 PetscInt i,j,m=A->rmap->n,shift; 1151 int color; 1152 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1153 PetscViewer viewer; 1154 PetscViewerFormat format; 1155 PetscErrorCode ierr; 1156 1157 PetscFunctionBegin; 1158 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1159 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1160 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1161 1162 /* loop over matrix elements drawing boxes */ 1163 1164 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1165 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1166 /* Blue for negative, Cyan for zero and Red for positive */ 1167 color = PETSC_DRAW_BLUE; 1168 for (i=0; i<m; i++) { 1169 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1170 y_l = m - i - 1.0; y_r = y_l + 1.0; 1171 for (j=0; j<a->rlen[i]; j++) { 1172 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1173 if (PetscRealPart(a->val[shift+8*j]) >= 0.) continue; 1174 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1175 } 1176 } 1177 color = PETSC_DRAW_CYAN; 1178 for (i=0; i<m; i++) { 1179 shift = a->sliidx[i>>3]+(i&0x07); 1180 y_l = m - i - 1.0; y_r = y_l + 1.0; 1181 for (j=0; j<a->rlen[i]; j++) { 1182 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1183 if (a->val[shift+8*j] != 0.) continue; 1184 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1185 } 1186 } 1187 color = PETSC_DRAW_RED; 1188 for (i=0; i<m; i++) { 1189 shift = a->sliidx[i>>3]+(i&0x07); 1190 y_l = m - i - 1.0; y_r = y_l + 1.0; 1191 for (j=0; j<a->rlen[i]; j++) { 1192 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1193 if (PetscRealPart(a->val[shift+8*j]) <= 0.) continue; 1194 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1195 } 1196 } 1197 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1198 } else { 1199 /* use contour shading to indicate magnitude of values */ 1200 /* first determine max of all nonzero values */ 1201 PetscReal minv=0.0,maxv=0.0; 1202 PetscInt count=0; 1203 PetscDraw popup; 1204 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1205 if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]); 1206 } 1207 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1208 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1209 ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr); 1210 1211 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1212 for (i=0; i<m; i++) { 1213 shift = a->sliidx[i>>3]+(i&0x07); 1214 y_l = m - i - 1.0; 1215 y_r = y_l + 1.0; 1216 for (j=0; j<a->rlen[i]; j++) { 1217 x_l = a->colidx[shift+j*8]; 1218 x_r = x_l + 1.0; 1219 color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv); 1220 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1221 count++; 1222 } 1223 } 1224 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1225 } 1226 PetscFunctionReturn(0); 1227 } 1228 1229 #include <petscdraw.h> 1230 PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer) 1231 { 1232 PetscDraw draw; 1233 PetscReal xr,yr,xl,yl,h,w; 1234 PetscBool isnull; 1235 PetscErrorCode ierr; 1236 1237 PetscFunctionBegin; 1238 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1239 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1240 if (isnull) PetscFunctionReturn(0); 1241 1242 xr = A->cmap->n; yr = A->rmap->n; h = yr/10.0; w = xr/10.0; 1243 xr += w; yr += h; xl = -w; yl = -h; 1244 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1245 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1246 ierr = PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);CHKERRQ(ierr); 1247 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1248 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer) 1253 { 1254 PetscBool iascii,isbinary,isdraw; 1255 PetscErrorCode ierr; 1256 1257 PetscFunctionBegin; 1258 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1259 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1260 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1261 if (iascii) { 1262 ierr = MatView_SeqSELL_ASCII(A,viewer);CHKERRQ(ierr); 1263 } else if (isbinary) { 1264 /* ierr = MatView_SeqSELL_Binary(A,viewer);CHKERRQ(ierr); */ 1265 } else if (isdraw) { 1266 ierr = MatView_SeqSELL_Draw(A,viewer);CHKERRQ(ierr); 1267 } 1268 PetscFunctionReturn(0); 1269 } 1270 1271 PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode) 1272 { 1273 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1274 PetscInt i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k; 1275 MatScalar *vp; 1276 PetscErrorCode ierr; 1277 1278 PetscFunctionBegin; 1279 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1280 /* To do: compress out the unused elements */ 1281 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 1282 ierr = PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);CHKERRQ(ierr); 1283 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr); 1284 ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);CHKERRQ(ierr); 1285 /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */ 1286 for (i=0; i<a->totalslices; ++i) { 1287 shift = a->sliidx[i]; /* starting index of the slice */ 1288 cp = a->colidx+shift; /* pointer to the column indices of the slice */ 1289 vp = a->val+shift; /* pointer to the nonzero values of the slice */ 1290 for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */ 1291 row = 8*i + row_in_slice; 1292 nrow = a->rlen[row]; /* number of nonzeros in row */ 1293 /* 1294 Search for the nearest nonzero. Normally setting the index to zero may cause extra communication. 1295 But if the entire slice are empty, it is fine to use 0 since the index will not be loaded. 1296 */ 1297 lastcol = 0; 1298 if (nrow>0) { /* nonempty row */ 1299 lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */ 1300 } else if (!row_in_slice) { /* first row of the currect slice is empty */ 1301 for (j=1;j<8;j++) { 1302 if (a->rlen[8*i+j]) { 1303 lastcol = cp[j]; 1304 break; 1305 } 1306 } 1307 } else { 1308 if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */ 1309 } 1310 1311 for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) { 1312 cp[8*k+row_in_slice] = lastcol; 1313 vp[8*k+row_in_slice] = (MatScalar)0; 1314 } 1315 } 1316 } 1317 1318 A->info.mallocs += a->reallocs; 1319 a->reallocs = 0; 1320 1321 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info) 1326 { 1327 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1328 1329 PetscFunctionBegin; 1330 info->block_size = 1.0; 1331 info->nz_allocated = (double)a->maxallocmat; 1332 info->nz_used = (double)a->sliidx[a->totalslices]; /* include padding zeros */ 1333 info->nz_unneeded = (double)(a->maxallocmat-a->sliidx[a->totalslices]); 1334 info->assemblies = (double)A->num_ass; 1335 info->mallocs = (double)A->info.mallocs; 1336 info->memory = ((PetscObject)A)->mem; 1337 if (A->factortype) { 1338 info->fill_ratio_given = A->info.fill_ratio_given; 1339 info->fill_ratio_needed = A->info.fill_ratio_needed; 1340 info->factor_mallocs = A->info.factor_mallocs; 1341 } else { 1342 info->fill_ratio_given = 0; 1343 info->fill_ratio_needed = 0; 1344 info->factor_mallocs = 0; 1345 } 1346 PetscFunctionReturn(0); 1347 } 1348 1349 PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1350 { 1351 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1352 PetscInt shift,i,k,l,low,high,t,ii,row,col,nrow; 1353 PetscInt *cp,nonew=a->nonew,lastcol=-1; 1354 MatScalar *vp,value; 1355 PetscErrorCode ierr; 1356 1357 PetscFunctionBegin; 1358 for (k=0; k<m; k++) { /* loop over added rows */ 1359 row = im[k]; 1360 if (row < 0) continue; 1361 #if defined(PETSC_USE_DEBUG) 1362 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 1363 #endif 1364 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1365 cp = a->colidx+shift; /* pointer to the row */ 1366 vp = a->val+shift; /* pointer to the row */ 1367 nrow = a->rlen[row]; 1368 low = 0; 1369 high = nrow; 1370 1371 for (l=0; l<n; l++) { /* loop over added columns */ 1372 col = in[l]; 1373 if (col<0) continue; 1374 #if defined(PETSC_USE_DEBUG) 1375 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1); 1376 #endif 1377 if (a->roworiented) { 1378 value = v[l+k*n]; 1379 } else { 1380 value = v[k+l*m]; 1381 } 1382 if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue; 1383 1384 /* search in this row for the specified colmun, i indicates the column to be set */ 1385 if (col <= lastcol) low = 0; 1386 else high = nrow; 1387 lastcol = col; 1388 while (high-low > 5) { 1389 t = (low+high)/2; 1390 if (*(cp+t*8) > col) high = t; 1391 else low = t; 1392 } 1393 for (i=low; i<high; i++) { 1394 if (*(cp+i*8) > col) break; 1395 if (*(cp+i*8) == col) { 1396 if (is == ADD_VALUES) *(vp+i*8) += value; 1397 else *(vp+i*8) = value; 1398 low = i + 1; 1399 goto noinsert; 1400 } 1401 } 1402 if (value == 0.0 && a->ignorezeroentries) goto noinsert; 1403 if (nonew == 1) goto noinsert; 1404 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1405 /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */ 1406 MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar); 1407 /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */ 1408 for (ii=nrow-1; ii>=i; ii--) { 1409 *(cp+(ii+1)*8) = *(cp+ii*8); 1410 *(vp+(ii+1)*8) = *(vp+ii*8); 1411 } 1412 a->rlen[row]++; 1413 *(cp+i*8) = col; 1414 *(vp+i*8) = value; 1415 a->nz++; 1416 A->nonzerostate++; 1417 low = i+1; high++; nrow++; 1418 noinsert:; 1419 } 1420 a->rlen[row] = nrow; 1421 } 1422 PetscFunctionReturn(0); 1423 } 1424 1425 PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str) 1426 { 1427 PetscErrorCode ierr; 1428 1429 PetscFunctionBegin; 1430 /* If the two matrices have the same copy implementation, use fast copy. */ 1431 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1432 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1433 Mat_SeqSELL *b=(Mat_SeqSELL*)B->data; 1434 1435 if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1436 ierr = PetscMemcpy(b->val,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1437 } else { 1438 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1439 } 1440 PetscFunctionReturn(0); 1441 } 1442 1443 PetscErrorCode MatSetUp_SeqSELL(Mat A) 1444 { 1445 PetscErrorCode ierr; 1446 1447 PetscFunctionBegin; 1448 ierr = MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,0);CHKERRQ(ierr); 1449 PetscFunctionReturn(0); 1450 } 1451 1452 PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[]) 1453 { 1454 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1455 1456 PetscFunctionBegin; 1457 *array = a->val; 1458 PetscFunctionReturn(0); 1459 } 1460 1461 PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[]) 1462 { 1463 PetscFunctionBegin; 1464 PetscFunctionReturn(0); 1465 } 1466 1467 PetscErrorCode MatRealPart_SeqSELL(Mat A) 1468 { 1469 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1470 PetscInt i; 1471 MatScalar *aval=a->val; 1472 1473 PetscFunctionBegin; 1474 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]); 1475 PetscFunctionReturn(0); 1476 } 1477 1478 PetscErrorCode MatImaginaryPart_SeqSELL(Mat A) 1479 { 1480 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1481 PetscInt i; 1482 MatScalar *aval=a->val; 1483 PetscErrorCode ierr; 1484 1485 PetscFunctionBegin; 1486 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]); 1487 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1488 PetscFunctionReturn(0); 1489 } 1490 1491 PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha) 1492 { 1493 Mat_SeqSELL *a=(Mat_SeqSELL*)inA->data; 1494 MatScalar *aval=a->val; 1495 PetscScalar oalpha=alpha; 1496 PetscBLASInt one=1,size; 1497 PetscErrorCode ierr; 1498 1499 PetscFunctionBegin; 1500 ierr = PetscBLASIntCast(a->sliidx[a->totalslices],&size);CHKERRQ(ierr); 1501 PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one)); 1502 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1503 ierr = MatSeqSELLInvalidateDiagonal(inA);CHKERRQ(ierr); 1504 PetscFunctionReturn(0); 1505 } 1506 1507 PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a) 1508 { 1509 Mat_SeqSELL *y=(Mat_SeqSELL*)Y->data; 1510 PetscErrorCode ierr; 1511 1512 PetscFunctionBegin; 1513 if (!Y->preallocated || !y->nz) { 1514 ierr = MatSeqSELLSetPreallocation(Y,1,NULL);CHKERRQ(ierr); 1515 } 1516 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1517 PetscFunctionReturn(0); 1518 } 1519 1520 PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1521 { 1522 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1523 PetscScalar *x,sum,*t; 1524 const MatScalar *idiag=0,*mdiag; 1525 const PetscScalar *b,*xb; 1526 PetscInt n,m=A->rmap->n,i,j,shift; 1527 const PetscInt *diag; 1528 PetscErrorCode ierr; 1529 1530 PetscFunctionBegin; 1531 its = its*lits; 1532 1533 if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */ 1534 if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqSELL(A,omega,fshift);CHKERRQ(ierr);} 1535 a->fshift = fshift; 1536 a->omega = omega; 1537 1538 diag = a->diag; 1539 t = a->ssor_work; 1540 idiag = a->idiag; 1541 mdiag = a->mdiag; 1542 1543 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1544 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1545 /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */ 1546 if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented"); 1547 if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented"); 1548 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 1549 1550 if (flag & SOR_ZERO_INITIAL_GUESS) { 1551 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1552 for (i=0; i<m; i++) { 1553 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1554 sum = b[i]; 1555 n = (diag[i]-shift)/8; 1556 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1557 t[i] = sum; 1558 x[i] = sum*idiag[i]; 1559 } 1560 xb = t; 1561 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1562 } else xb = b; 1563 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1564 for (i=m-1; i>=0; i--) { 1565 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1566 sum = xb[i]; 1567 n = a->rlen[i]-(diag[i]-shift)/8-1; 1568 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1569 if (xb == b) { 1570 x[i] = sum*idiag[i]; 1571 } else { 1572 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1573 } 1574 } 1575 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1576 } 1577 its--; 1578 } 1579 while (its--) { 1580 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1581 for (i=0; i<m; i++) { 1582 /* lower */ 1583 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1584 sum = b[i]; 1585 n = (diag[i]-shift)/8; 1586 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1587 t[i] = sum; /* save application of the lower-triangular part */ 1588 /* upper */ 1589 n = a->rlen[i]-(diag[i]-shift)/8-1; 1590 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1591 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1592 } 1593 xb = t; 1594 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1595 } else xb = b; 1596 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1597 for (i=m-1; i>=0; i--) { 1598 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1599 sum = xb[i]; 1600 if (xb == b) { 1601 /* whole matrix (no checkpointing available) */ 1602 n = a->rlen[i]; 1603 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1604 x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i]; 1605 } else { /* lower-triangular part has been saved, so only apply upper-triangular */ 1606 n = a->rlen[i]-(diag[i]-shift)/8-1; 1607 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1608 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1609 } 1610 } 1611 if (xb == b) { 1612 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1613 } else { 1614 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1615 } 1616 } 1617 } 1618 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1619 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1620 PetscFunctionReturn(0); 1621 } 1622 1623 /* -------------------------------------------------------------------*/ 1624 static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL, 1625 0, 1626 0, 1627 MatMult_SeqSELL, 1628 /* 4*/ MatMultAdd_SeqSELL, 1629 MatMultTranspose_SeqSELL, 1630 MatMultTransposeAdd_SeqSELL, 1631 0, 1632 0, 1633 0, 1634 /* 10*/ 0, 1635 0, 1636 0, 1637 MatSOR_SeqSELL, 1638 0, 1639 /* 15*/ MatGetInfo_SeqSELL, 1640 MatEqual_SeqSELL, 1641 MatGetDiagonal_SeqSELL, 1642 MatDiagonalScale_SeqSELL, 1643 0, 1644 /* 20*/ 0, 1645 MatAssemblyEnd_SeqSELL, 1646 MatSetOption_SeqSELL, 1647 MatZeroEntries_SeqSELL, 1648 /* 24*/ 0, 1649 0, 1650 0, 1651 0, 1652 0, 1653 /* 29*/ MatSetUp_SeqSELL, 1654 0, 1655 0, 1656 0, 1657 0, 1658 /* 34*/ MatDuplicate_SeqSELL, 1659 0, 1660 0, 1661 0, 1662 0, 1663 /* 39*/ 0, 1664 0, 1665 0, 1666 MatGetValues_SeqSELL, 1667 MatCopy_SeqSELL, 1668 /* 44*/ 0, 1669 MatScale_SeqSELL, 1670 MatShift_SeqSELL, 1671 0, 1672 0, 1673 /* 49*/ 0, 1674 0, 1675 0, 1676 0, 1677 0, 1678 /* 54*/ MatFDColoringCreate_SeqXAIJ, 1679 0, 1680 0, 1681 0, 1682 0, 1683 /* 59*/ 0, 1684 MatDestroy_SeqSELL, 1685 MatView_SeqSELL, 1686 0, 1687 0, 1688 /* 64*/ 0, 1689 0, 1690 0, 1691 0, 1692 0, 1693 /* 69*/ 0, 1694 0, 1695 0, 1696 0, 1697 0, 1698 /* 74*/ 0, 1699 MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */ 1700 0, 1701 0, 1702 0, 1703 /* 79*/ 0, 1704 0, 1705 0, 1706 0, 1707 0, 1708 /* 84*/ 0, 1709 0, 1710 0, 1711 0, 1712 0, 1713 /* 89*/ 0, 1714 0, 1715 0, 1716 0, 1717 0, 1718 /* 94*/ 0, 1719 0, 1720 0, 1721 0, 1722 0, 1723 /* 99*/ 0, 1724 0, 1725 0, 1726 MatConjugate_SeqSELL, 1727 0, 1728 /*104*/ 0, 1729 0, 1730 0, 1731 0, 1732 0, 1733 /*109*/ 0, 1734 0, 1735 0, 1736 0, 1737 MatMissingDiagonal_SeqSELL, 1738 /*114*/ 0, 1739 0, 1740 0, 1741 0, 1742 0, 1743 /*119*/ 0, 1744 0, 1745 0, 1746 0, 1747 0, 1748 /*124*/ 0, 1749 0, 1750 0, 1751 0, 1752 0, 1753 /*129*/ 0, 1754 0, 1755 0, 1756 0, 1757 0, 1758 /*134*/ 0, 1759 0, 1760 0, 1761 0, 1762 0, 1763 /*139*/ 0, 1764 0, 1765 0, 1766 MatFDColoringSetUp_SeqXAIJ, 1767 0, 1768 /*144*/0 1769 }; 1770 1771 PetscErrorCode MatStoreValues_SeqSELL(Mat mat) 1772 { 1773 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1774 PetscErrorCode ierr; 1775 1776 PetscFunctionBegin; 1777 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1778 1779 /* allocate space for values if not already there */ 1780 if (!a->saved_values) { 1781 ierr = PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);CHKERRQ(ierr); 1782 ierr = PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));CHKERRQ(ierr); 1783 } 1784 1785 /* copy values over */ 1786 ierr = PetscMemcpy(a->saved_values,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1787 PetscFunctionReturn(0); 1788 } 1789 1790 PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat) 1791 { 1792 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1793 PetscErrorCode ierr; 1794 1795 PetscFunctionBegin; 1796 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1797 if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1798 /* copy values over */ 1799 ierr = PetscMemcpy(a->val,a->saved_values,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1800 PetscFunctionReturn(0); 1801 } 1802 1803 /*@C 1804 MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray() 1805 1806 Not Collective 1807 1808 Input Parameters: 1809 . mat - a MATSEQSELL matrix 1810 . array - pointer to the data 1811 1812 Level: intermediate 1813 1814 .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90() 1815 @*/ 1816 PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array) 1817 { 1818 PetscErrorCode ierr; 1819 1820 PetscFunctionBegin; 1821 ierr = PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 1822 PetscFunctionReturn(0); 1823 } 1824 1825 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B) 1826 { 1827 Mat_SeqSELL *b; 1828 PetscMPIInt size; 1829 PetscErrorCode ierr; 1830 1831 PetscFunctionBegin; 1832 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 1833 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1"); 1834 1835 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1836 1837 B->data = (void*)b; 1838 1839 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1840 1841 b->row = 0; 1842 b->col = 0; 1843 b->icol = 0; 1844 b->reallocs = 0; 1845 b->ignorezeroentries = PETSC_FALSE; 1846 b->roworiented = PETSC_TRUE; 1847 b->nonew = 0; 1848 b->diag = 0; 1849 b->solve_work = 0; 1850 B->spptr = 0; 1851 b->saved_values = 0; 1852 b->idiag = 0; 1853 b->mdiag = 0; 1854 b->ssor_work = 0; 1855 b->omega = 1.0; 1856 b->fshift = 0.0; 1857 b->idiagvalid = PETSC_FALSE; 1858 b->ibdiagvalid = PETSC_FALSE; 1859 b->keepnonzeropattern = PETSC_FALSE; 1860 1861 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);CHKERRQ(ierr); 1862 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);CHKERRQ(ierr); 1863 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);CHKERRQ(ierr); 1864 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);CHKERRQ(ierr); 1865 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);CHKERRQ(ierr); 1866 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);CHKERRQ(ierr); 1867 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);CHKERRQ(ierr); 1868 PetscFunctionReturn(0); 1869 } 1870 1871 /* 1872 Given a matrix generated with MatGetFactor() duplicates all the information in A into B 1873 */ 1874 PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 1875 { 1876 Mat_SeqSELL *c,*a=(Mat_SeqSELL*)A->data; 1877 PetscInt i,m=A->rmap->n; 1878 PetscInt totalslices=a->totalslices; 1879 PetscErrorCode ierr; 1880 1881 PetscFunctionBegin; 1882 c = (Mat_SeqSELL*)C->data; 1883 1884 C->factortype = A->factortype; 1885 c->row = 0; 1886 c->col = 0; 1887 c->icol = 0; 1888 c->reallocs = 0; 1889 1890 C->assembled = PETSC_TRUE; 1891 1892 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 1893 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 1894 1895 ierr = PetscMalloc1(8*totalslices,&c->rlen);CHKERRQ(ierr); 1896 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 1897 ierr = PetscMalloc1(totalslices+1,&c->sliidx);CHKERRQ(ierr); 1898 ierr = PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 1899 1900 for (i=0; i<m; i++) c->rlen[i] = a->rlen[i]; 1901 for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i]; 1902 1903 /* allocate the matrix space */ 1904 if (mallocmatspace) { 1905 ierr = PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);CHKERRQ(ierr); 1906 ierr = PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 1907 1908 c->singlemalloc = PETSC_TRUE; 1909 1910 if (m > 0) { 1911 ierr = PetscMemcpy(c->colidx,a->colidx,(a->maxallocmat)*sizeof(PetscInt));CHKERRQ(ierr); 1912 if (cpvalues == MAT_COPY_VALUES) { 1913 ierr = PetscMemcpy(c->val,a->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 1914 } else { 1915 ierr = PetscMemzero(c->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 1916 } 1917 } 1918 } 1919 1920 c->ignorezeroentries = a->ignorezeroentries; 1921 c->roworiented = a->roworiented; 1922 c->nonew = a->nonew; 1923 if (a->diag) { 1924 ierr = PetscMalloc1(m,&c->diag);CHKERRQ(ierr); 1925 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 1926 for (i=0; i<m; i++) { 1927 c->diag[i] = a->diag[i]; 1928 } 1929 } else c->diag = 0; 1930 1931 c->solve_work = 0; 1932 c->saved_values = 0; 1933 c->idiag = 0; 1934 c->ssor_work = 0; 1935 c->keepnonzeropattern = a->keepnonzeropattern; 1936 c->free_val = PETSC_TRUE; 1937 c->free_colidx = PETSC_TRUE; 1938 1939 c->maxallocmat = a->maxallocmat; 1940 c->maxallocrow = a->maxallocrow; 1941 c->rlenmax = a->rlenmax; 1942 c->nz = a->nz; 1943 C->preallocated = PETSC_TRUE; 1944 1945 c->nonzerorowcnt = a->nonzerorowcnt; 1946 C->nonzerostate = A->nonzerostate; 1947 1948 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 1949 PetscFunctionReturn(0); 1950 } 1951 1952 PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B) 1953 { 1954 PetscErrorCode ierr; 1955 1956 PetscFunctionBegin; 1957 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 1958 ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr); 1959 if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) { 1960 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 1961 } 1962 ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1963 ierr = MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 1964 PetscFunctionReturn(0); 1965 } 1966 1967 /*@C 1968 MatCreateSeqSELL - Creates a sparse matrix in SELL format. 1969 1970 Collective on MPI_Comm 1971 1972 Input Parameters: 1973 + comm - MPI communicator, set to PETSC_COMM_SELF 1974 . m - number of rows 1975 . n - number of columns 1976 . rlenmax - maximum number of nonzeros in a row 1977 - rlen - array containing the number of nonzeros in the various rows 1978 (possibly different for each row) or NULL 1979 1980 Output Parameter: 1981 . A - the matrix 1982 1983 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1984 MatXXXXSetPreallocation() paradgm instead of this routine directly. 1985 [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation] 1986 1987 Notes: 1988 If nnz is given then nz is ignored 1989 1990 Specify the preallocated storage with either rlenmax or rlen (not both). 1991 Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory 1992 allocation. For large problems you MUST preallocate memory or you 1993 will get TERRIBLE performance, see the users' manual chapter on matrices. 1994 1995 Level: intermediate 1996 1997 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatCreateSeqSELLWithArrays() 1998 1999 @*/ 2000 PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A) 2001 { 2002 PetscErrorCode ierr; 2003 2004 PetscFunctionBegin; 2005 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2006 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2007 ierr = MatSetType(*A,MATSEQSELL);CHKERRQ(ierr); 2008 ierr = MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);CHKERRQ(ierr); 2009 PetscFunctionReturn(0); 2010 } 2011 2012 PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg) 2013 { 2014 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data; 2015 PetscInt totalslices=a->totalslices; 2016 PetscErrorCode ierr; 2017 2018 PetscFunctionBegin; 2019 /* If the matrix dimensions are not equal,or no of nonzeros */ 2020 if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) { 2021 *flg = PETSC_FALSE; 2022 PetscFunctionReturn(0); 2023 } 2024 /* if the a->colidx are the same */ 2025 ierr = PetscMemcmp(a->colidx,b->colidx,a->sliidx[totalslices]*sizeof(PetscInt),flg);CHKERRQ(ierr); 2026 if (!*flg) PetscFunctionReturn(0); 2027 /* if a->val are the same */ 2028 ierr = PetscMemcmp(a->val,b->val,a->sliidx[totalslices]*sizeof(PetscScalar),flg);CHKERRQ(ierr); 2029 PetscFunctionReturn(0); 2030 } 2031 2032 PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A) 2033 { 2034 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2035 2036 PetscFunctionBegin; 2037 a->idiagvalid = PETSC_FALSE; 2038 a->ibdiagvalid = PETSC_FALSE; 2039 PetscFunctionReturn(0); 2040 } 2041 2042 PetscErrorCode MatConjugate_SeqSELL(Mat A) 2043 { 2044 #if defined(PETSC_USE_COMPLEX) 2045 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2046 PetscInt i; 2047 PetscScalar *val = a->val; 2048 2049 PetscFunctionBegin; 2050 for (i=0; i<a->sliidx[a->totalslices]; i++) { 2051 val[i] = PetscConj(val[i]); 2052 } 2053 #else 2054 PetscFunctionBegin; 2055 #endif 2056 PetscFunctionReturn(0); 2057 } 2058