1 2 /* 3 Defines the basic matrix operations for the SELL matrix storage format. 4 */ 5 #include <../src/mat/impls/sell/seq/sell.h> /*I "petscmat.h" I*/ 6 #include <petscblaslapack.h> 7 #include <petsc/private/kernels/blocktranspose.h> 8 #if defined(PETSC_HAVE_IMMINTRIN_H) 9 #include <immintrin.h> 10 11 #if !defined(_MM_SCALE_8) 12 #define _MM_SCALE_8 8 13 #endif 14 15 #if defined(__AVX512F__) 16 /* these do not work 17 vec_idx = _mm512_loadunpackhi_epi32(vec_idx,acolidx); 18 vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval); 19 */ 20 #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 21 /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \ 22 vec_idx = _mm256_load_si256((__m256i const*)acolidx); \ 23 vec_vals = _mm512_load_pd(aval); \ 24 vec_x = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \ 25 vec_y = _mm512_fmadd_pd(vec_x,vec_vals,vec_y); 26 27 #endif 28 #endif /* PETSC_HAVE_IMMINTRIN_H */ 29 30 /*@C 31 MatSeqSELLSetPreallocation - For good matrix assembly performance 32 the user should preallocate the matrix storage by setting the parameter nz 33 (or the array nnz). By setting these parameters accurately, performance 34 during matrix assembly can be increased significantly. 35 36 Collective on MPI_Comm 37 38 Input Parameters: 39 + B - The matrix 40 . nz - number of nonzeros per row (same for all rows) 41 - nnz - array containing the number of nonzeros in the various rows 42 (possibly different for each row) or NULL 43 44 Notes: 45 If nnz is given then nz is ignored. 46 47 Specify the preallocated storage with either nz or nnz (not both). 48 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 49 allocation. For large problems you MUST preallocate memory or you 50 will get TERRIBLE performance, see the users' manual chapter on matrices. 51 52 You can call MatGetInfo() to get information on how effective the preallocation was; 53 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 54 You can also run with the option -info and look for messages with the string 55 malloc in them to see if additional memory allocation was needed. 56 57 Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix 58 entries or columns indices. 59 60 The maximum number of nonzeos in any row should be as accuate as possible. 61 If it is underesitmated, you will get bad performance due to reallocation 62 (MatSeqXSELLReallocateSELL). 63 64 Level: intermediate 65 66 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo() 67 68 @*/ 69 PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[]) 70 { 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 75 PetscValidType(B,1); 76 ierr = PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[]) 81 { 82 Mat_SeqSELL *b; 83 PetscInt i,j,totalslices; 84 PetscBool skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE; 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE; 89 if (maxallocrow == MAT_SKIP_ALLOCATION) { 90 skipallocation = PETSC_TRUE; 91 maxallocrow = 0; 92 } 93 94 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 95 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 96 97 /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */ 98 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5; 99 if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow); 100 if (rlen) { 101 for (i=0; i<B->rmap->n; i++) { 102 if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]); 103 if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n); 104 } 105 } 106 107 B->preallocated = PETSC_TRUE; 108 109 b = (Mat_SeqSELL*)B->data; 110 111 totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */ 112 b->totalslices = totalslices; 113 if (!skipallocation) { 114 if (B->rmap->n & 0x07) PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n); 115 116 if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */ 117 ierr = PetscMalloc1(totalslices+1,&b->sliidx);CHKERRQ(ierr); 118 ierr = PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 119 } 120 if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */ 121 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10; 122 else if (maxallocrow < 0) maxallocrow = 1; 123 for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow; 124 } else { 125 maxallocrow = 0; 126 b->sliidx[0] = 0; 127 for (i=1; i<totalslices; i++) { 128 b->sliidx[i] = 0; 129 for (j=0;j<8;j++) { 130 b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]); 131 } 132 maxallocrow = PetscMax(b->sliidx[i],maxallocrow); 133 b->sliidx[i] = b->sliidx[i-1] + 8*b->sliidx[i]; 134 } 135 /* last slice */ 136 b->sliidx[totalslices] = 0; 137 for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]); 138 maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow); 139 b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices]; 140 } 141 142 /* allocate space for val, colidx, rlen */ 143 /* FIXME: should B's old memory be unlogged? */ 144 ierr = MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);CHKERRQ(ierr); 145 /* FIXME: assuming an element of the bit array takes 8 bits */ 146 ierr = PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);CHKERRQ(ierr); 147 ierr = PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 148 /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */ 149 ierr = PetscCalloc1(8*totalslices,&b->rlen);CHKERRQ(ierr); 150 ierr = PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));CHKERRQ(ierr); 151 152 b->singlemalloc = PETSC_TRUE; 153 b->free_val = PETSC_TRUE; 154 b->free_colidx = PETSC_TRUE; 155 } else { 156 b->free_val = PETSC_FALSE; 157 b->free_colidx = PETSC_FALSE; 158 } 159 160 b->nz = 0; 161 b->maxallocrow = maxallocrow; 162 b->rlenmax = maxallocrow; 163 b->maxallocmat = b->sliidx[totalslices]; 164 B->info.nz_unneeded = (double)b->maxallocmat; 165 if (realalloc) { 166 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 167 } 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 172 { 173 Mat B; 174 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 175 PetscInt i,j,row; 176 PetscBool isnonzero; 177 PetscErrorCode ierr; 178 179 PetscFunctionBegin; 180 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 181 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 182 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 183 ierr = MatSeqAIJSetPreallocation(B,0,a->rlen);CHKERRQ(ierr); 184 ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 185 186 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 187 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 188 isnonzero = (PetscBool)((j-a->sliidx[i])/8 < a->rlen[8*i+row]); 189 if (isnonzero) { 190 ierr = MatSetValue(B,8*i+row,a->colidx[j],a->val[j],INSERT_VALUES);CHKERRQ(ierr); 191 } 192 } 193 } 194 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 195 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 196 B->rmap->bs = A->rmap->bs; 197 198 if (reuse == MAT_INPLACE_MATRIX) { 199 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 200 } else { 201 *newmat = B; 202 } 203 PetscFunctionReturn(0); 204 } 205 206 #include <../src/mat/impls/aij/seq/aij.h> 207 208 PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 209 { 210 Mat B; 211 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 212 PetscInt *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols; 213 const PetscInt *cols; 214 const PetscScalar *vals; 215 PetscErrorCode ierr; 216 217 PetscFunctionBegin; 218 if (A->rmap->bs > 1) { 219 ierr = MatConvert_Basic(A,newtype,reuse,newmat);CHKERRQ(ierr); 220 PetscFunctionReturn(0); 221 } 222 /* Can we just use ilen? */ 223 ierr = PetscMalloc1(m,&rowlengths);CHKERRQ(ierr); 224 for (i=0; i<m; i++) { 225 rowlengths[i] = ai[i+1] - ai[i]; 226 } 227 228 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 229 ierr = MatSetSizes(B,m,n,m,n);CHKERRQ(ierr); 230 ierr = MatSetType(B,MATSEQSELL);CHKERRQ(ierr); 231 ierr = MatSeqSELLSetPreallocation(B,0,rowlengths);CHKERRQ(ierr); 232 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 233 234 ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 235 236 for (row=0; row<m; row++) { 237 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 238 ierr = MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 239 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 240 } 241 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 242 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 243 B->rmap->bs = A->rmap->bs; 244 245 if (reuse == MAT_INPLACE_MATRIX) { 246 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 247 } else { 248 *newmat = B; 249 } 250 PetscFunctionReturn(0); 251 } 252 253 PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy) 254 { 255 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 256 PetscScalar *y; 257 const PetscScalar *x; 258 const MatScalar *aval=a->val; 259 PetscInt totalslices=a->totalslices; 260 const PetscInt *acolidx=a->colidx; 261 PetscInt i,j=0; 262 PetscErrorCode ierr; 263 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 264 __m512d vec_x,vec_y,vec_vals; 265 __m256i vec_idx; 266 __mmask8 mask; 267 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 268 __m256i vec_idx2,vec_idx3,vec_idx4; 269 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 270 __m128d vec_x_tmp; 271 __m256d vec_x,vec_y,vec_y2,vec_vals; 272 MatScalar yval; 273 PetscInt r,rows_left,row,nnz_in_row; 274 #else 275 PetscScalar sum[8]; 276 #endif 277 278 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 279 #pragma disjoint(*x,*y,*aval) 280 #endif 281 282 PetscFunctionBegin; 283 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 284 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 285 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 286 for (i=0; i<totalslices; i++) { /* loop over slices */ 287 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 288 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 289 290 vec_y = _mm512_setzero_pd(); 291 vec_y2 = _mm512_setzero_pd(); 292 vec_y3 = _mm512_setzero_pd(); 293 vec_y4 = _mm512_setzero_pd(); 294 295 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 296 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 297 case 3: 298 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 299 acolidx += 8; aval += 8; 300 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 301 acolidx += 8; aval += 8; 302 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 303 acolidx += 8; aval += 8; 304 j += 3; 305 break; 306 case 2: 307 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 308 acolidx += 8; aval += 8; 309 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 310 acolidx += 8; aval += 8; 311 j += 2; 312 break; 313 case 1: 314 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 315 acolidx += 8; aval += 8; 316 j += 1; 317 break; 318 } 319 #pragma novector 320 for (; j<(a->sliidx[i+1]>>3); j+=4) { 321 /* 322 vec_idx = _mm512_loadunpackhi_epi32(vec_idx,acolidx); 323 vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval); 324 */ 325 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 326 acolidx += 8; aval += 8; 327 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 328 acolidx += 8; aval += 8; 329 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 330 acolidx += 8; aval += 8; 331 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 332 acolidx += 8; aval += 8; 333 } 334 335 vec_y = _mm512_add_pd(vec_y,vec_y2); 336 vec_y = _mm512_add_pd(vec_y,vec_y3); 337 vec_y = _mm512_add_pd(vec_y,vec_y4); 338 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 339 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 340 _mm512_mask_store_pd(&y[8*i],mask,vec_y); 341 } else { 342 _mm512_store_pd(&y[8*i],vec_y); 343 } 344 } 345 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 346 for (i=0; i<totalslices; i++) { /* loop over full slices */ 347 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 348 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 349 350 vec_y = _mm256_setzero_pd(); 351 vec_y2 = _mm256_setzero_pd(); 352 353 /* last slice may have padding rows. Don't use vectorization. */ 354 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 355 rows_left = A->rmap->n - 8*i; 356 for (r=0; r<rows_left; ++r) { 357 yval = (MatScalar)0; 358 row = 8*i + r; 359 nnz_in_row = a->rlen[row]; 360 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]]; 361 y[row] = yval; 362 } 363 break; 364 } 365 366 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 367 for (; j<a->sliidx[i+1]; j+=8) { 368 vec_vals = _mm256_loadu_pd(aval); 369 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 370 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 371 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 372 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 373 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 374 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 375 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 376 aval += 4; 377 378 vec_vals = _mm256_loadu_pd(aval); 379 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 380 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 381 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 382 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 383 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 384 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 385 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 386 aval += 4; 387 } 388 389 _mm256_storeu_pd(y + i*8, vec_y); 390 _mm256_storeu_pd(y + i*8 + 4, vec_y2); 391 } 392 393 #else 394 for (i=0; i<totalslices; i++) { /* loop over slices */ 395 for (j=0; j<8; j++) sum[j] = 0.0; 396 397 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 398 sum[0] += aval[j]*x[acolidx[j]]; 399 sum[1] += aval[j+1]*x[acolidx[j+1]]; 400 sum[2] += aval[j+2]*x[acolidx[j+2]]; 401 sum[3] += aval[j+3]*x[acolidx[j+3]]; 402 sum[4] += aval[j+4]*x[acolidx[j+4]]; 403 sum[5] += aval[j+5]*x[acolidx[j+5]]; 404 sum[6] += aval[j+6]*x[acolidx[j+6]]; 405 sum[7] += aval[j+7]*x[acolidx[j+7]]; 406 } 407 if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */ 408 for(j=0;j<(A->rmap->n & 0x07);j++) y[8*i+j] = sum[j]; 409 } else { 410 y[8*i] = sum[0]; 411 y[8*i+1] = sum[1]; 412 y[8*i+2] = sum[2]; 413 y[8*i+3] = sum[3]; 414 y[8*i+4] = sum[4]; 415 y[8*i+5] = sum[5]; 416 y[8*i+6] = sum[6]; 417 y[8*i+7] = sum[7]; 418 } 419 } 420 #endif 421 422 ierr = PetscLogFlops(2.0*a->nz-a->nonzerorowcnt);CHKERRQ(ierr); /* theoretical minimal FLOPs */ 423 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 424 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h> 429 PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz) 430 { 431 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 432 PetscScalar *y,*z; 433 const PetscScalar *x; 434 const MatScalar *aval=a->val; 435 PetscInt totalslices=a->totalslices; 436 const PetscInt *acolidx=a->colidx; 437 PetscInt i,j; 438 PetscErrorCode ierr; 439 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 440 __m512d vec_x,vec_y,vec_z,vec_vals; 441 __m256i vec_idx; 442 __mmask8 mask; 443 #else 444 PetscScalar sum[8]; 445 #endif 446 447 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 448 #pragma disjoint(*x,*y,*aval) 449 #endif 450 451 PetscFunctionBegin; 452 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 453 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 454 for (i=0; i<totalslices; i++) { /* loop over slices */ 455 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 456 vec_y = _mm512_load_pd(&y[8*i]); 457 #pragma novector 458 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 459 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 460 acolidx += 8; aval += 8; 461 } 462 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 463 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 464 _mm512_mask_store_pd(&z[8*i],mask,vec_y); 465 } else { 466 _mm512_store_pd(&z[8*i],vec_y); 467 } 468 #else 469 sum[0] = y[8*i]; 470 sum[1] = y[8*i+1]; 471 sum[2] = y[8*i+2]; 472 sum[3] = y[8*i+3]; 473 sum[4] = y[8*i+4]; 474 sum[5] = y[8*i+5]; 475 sum[6] = y[8*i+6]; 476 sum[7] = y[8*i+7]; 477 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 478 sum[0] += aval[j]*x[acolidx[j]]; 479 sum[1] += aval[j+1]*x[acolidx[j+1]]; 480 sum[2] += aval[j+2]*x[acolidx[j+2]]; 481 sum[3] += aval[j+3]*x[acolidx[j+3]]; 482 sum[4] += aval[j+4]*x[acolidx[j+4]]; 483 sum[5] += aval[j+5]*x[acolidx[j+5]]; 484 sum[6] += aval[j+6]*x[acolidx[j+6]]; 485 sum[7] += aval[j+7]*x[acolidx[j+7]]; 486 } 487 if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */ 488 for(j=0;j<(A->rmap->n & 0x07);j++) z[8*i+j] = sum[j]; 489 } else { 490 z[8*i] = sum[0]; 491 z[8*i+1] = sum[1]; 492 z[8*i+2] = sum[2]; 493 z[8*i+3] = sum[3]; 494 z[8*i+4] = sum[4]; 495 z[8*i+5] = sum[5]; 496 z[8*i+6] = sum[6]; 497 z[8*i+7] = sum[7]; 498 } 499 #endif 500 } 501 502 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 503 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 504 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 505 PetscFunctionReturn(0); 506 } 507 508 PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy) 509 { 510 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 511 PetscScalar *y; 512 const PetscScalar *x; 513 const MatScalar *aval=a->val; 514 const PetscInt *acolidx=a->colidx; 515 PetscInt i,j,row; 516 PetscBool isnonzero; 517 PetscErrorCode ierr; 518 519 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 520 #pragma disjoint(*x,*y,*aval) 521 #endif 522 523 PetscFunctionBegin; 524 if (zz != yy) { ierr = VecCopy(zz,yy);CHKERRQ(ierr); } 525 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 526 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 527 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 528 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 529 isnonzero = (PetscBool)((j-a->sliidx[i])/8 < a->rlen[8*i+row]); 530 if (isnonzero) y[acolidx[j]] += aval[j]*x[8*i+row]; 531 } 532 } 533 ierr = PetscLogFlops(2.0*a->sliidx[a->totalslices]);CHKERRQ(ierr); 534 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 535 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 536 PetscFunctionReturn(0); 537 } 538 539 PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy) 540 { 541 PetscErrorCode ierr; 542 543 PetscFunctionBegin; 544 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 545 ierr = MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);CHKERRQ(ierr); 546 PetscFunctionReturn(0); 547 } 548 549 /* 550 Checks for missing diagonals 551 */ 552 PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool *missing,PetscInt *d) 553 { 554 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 555 PetscInt *diag,i; 556 557 PetscFunctionBegin; 558 *missing = PETSC_FALSE; 559 if (A->rmap->n > 0 && !(a->colidx)) { 560 *missing = PETSC_TRUE; 561 if (d) *d = 0; 562 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 563 } else { 564 diag = a->diag; 565 for (i=0; i<A->rmap->n; i++) { 566 if (diag[i] == -1) { 567 *missing = PETSC_TRUE; 568 if (d) *d = i; 569 PetscInfo1(A,"Matrix is missing diagonal number %D\n",i); 570 break; 571 } 572 } 573 } 574 PetscFunctionReturn(0); 575 } 576 577 PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A) 578 { 579 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 580 PetscInt i,j,m=A->rmap->n,shift; 581 PetscErrorCode ierr; 582 583 PetscFunctionBegin; 584 if (!a->diag) { 585 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 586 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 587 a->free_diag = PETSC_TRUE; 588 } 589 for (i=0; i<m; i++) { /* loop over rows */ 590 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 591 a->diag[i] = -1; 592 for (j=0; j<a->rlen[i]; j++) { 593 if (a->colidx[shift+j*8] == i) { 594 a->diag[i] = shift+j*8; 595 break; 596 } 597 } 598 } 599 PetscFunctionReturn(0); 600 } 601 602 /* 603 Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways 604 */ 605 PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift) 606 { 607 Mat_SeqSELL *a=(Mat_SeqSELL*) A->data; 608 PetscInt i,*diag,m = A->rmap->n; 609 MatScalar *val = a->val; 610 PetscScalar *idiag,*mdiag; 611 PetscErrorCode ierr; 612 613 PetscFunctionBegin; 614 if (a->idiagvalid) PetscFunctionReturn(0); 615 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 616 diag = a->diag; 617 if (!a->idiag) { 618 ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr); 619 ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr); 620 val = a->val; 621 } 622 mdiag = a->mdiag; 623 idiag = a->idiag; 624 625 if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) { 626 for (i=0; i<m; i++) { 627 mdiag[i] = val[diag[i]]; 628 if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */ 629 if (PetscRealPart(fshift)) { 630 ierr = PetscInfo1(A,"Zero diagonal on row %D\n",i);CHKERRQ(ierr); 631 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 632 A->factorerror_zeropivot_value = 0.0; 633 A->factorerror_zeropivot_row = i; 634 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i); 635 } 636 idiag[i] = 1.0/val[diag[i]]; 637 } 638 ierr = PetscLogFlops(m);CHKERRQ(ierr); 639 } else { 640 for (i=0; i<m; i++) { 641 mdiag[i] = val[diag[i]]; 642 idiag[i] = omega/(fshift + val[diag[i]]); 643 } 644 ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr); 645 } 646 a->idiagvalid = PETSC_TRUE; 647 PetscFunctionReturn(0); 648 } 649 650 PetscErrorCode MatZeroEntries_SeqSELL(Mat A) 651 { 652 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 653 PetscErrorCode ierr; 654 655 PetscFunctionBegin; 656 ierr = PetscMemzero(a->val,(a->sliidx[a->totalslices])*sizeof(PetscScalar));CHKERRQ(ierr); 657 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 658 PetscFunctionReturn(0); 659 } 660 661 PetscErrorCode MatDestroy_SeqSELL(Mat A) 662 { 663 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 664 PetscErrorCode ierr; 665 666 PetscFunctionBegin; 667 #if defined(PETSC_USE_LOG) 668 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz); 669 #endif 670 ierr = MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);CHKERRQ(ierr); 671 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 672 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 673 ierr = PetscFree(a->diag);CHKERRQ(ierr); 674 ierr = PetscFree(a->ibdiag);CHKERRQ(ierr); 675 ierr = PetscFree(a->rlen);CHKERRQ(ierr); 676 ierr = PetscFree(a->sliidx);CHKERRQ(ierr); 677 ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr); 678 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 679 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 680 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 681 682 ierr = PetscFree(A->data);CHKERRQ(ierr); 683 684 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 685 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 686 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 687 #if defined(PETSC_HAVE_ELEMENTAL) 688 #endif 689 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);CHKERRQ(ierr); 690 PetscFunctionReturn(0); 691 } 692 693 PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg) 694 { 695 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 696 PetscErrorCode ierr; 697 698 PetscFunctionBegin; 699 switch (op) { 700 case MAT_ROW_ORIENTED: 701 a->roworiented = flg; 702 break; 703 case MAT_KEEP_NONZERO_PATTERN: 704 a->keepnonzeropattern = flg; 705 break; 706 case MAT_NEW_NONZERO_LOCATIONS: 707 a->nonew = (flg ? 0 : 1); 708 break; 709 case MAT_NEW_NONZERO_LOCATION_ERR: 710 a->nonew = (flg ? -1 : 0); 711 break; 712 case MAT_NEW_NONZERO_ALLOCATION_ERR: 713 a->nonew = (flg ? -2 : 0); 714 break; 715 case MAT_UNUSED_NONZERO_LOCATION_ERR: 716 a->nounused = (flg ? -1 : 0); 717 break; 718 case MAT_NEW_DIAGONALS: 719 case MAT_IGNORE_OFF_PROC_ENTRIES: 720 case MAT_USE_HASH_TABLE: 721 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 722 break; 723 case MAT_SPD: 724 case MAT_SYMMETRIC: 725 case MAT_STRUCTURALLY_SYMMETRIC: 726 case MAT_HERMITIAN: 727 case MAT_SYMMETRY_ETERNAL: 728 /* These options are handled directly by MatSetOption() */ 729 break; 730 default: 731 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 732 } 733 PetscFunctionReturn(0); 734 } 735 736 PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v) 737 { 738 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 739 PetscInt i,j,n,shift; 740 PetscScalar *x,zero=0.0; 741 PetscErrorCode ierr; 742 743 PetscFunctionBegin; 744 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 745 if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 746 747 if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) { 748 PetscInt *diag=a->diag; 749 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 750 for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]]; 751 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 752 PetscFunctionReturn(0); 753 } 754 755 ierr = VecSet(v,zero);CHKERRQ(ierr); 756 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 757 for (i=0; i<n; i++) { /* loop over rows */ 758 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 759 x[i] = 0; 760 for (j=0; j<a->rlen[i]; j++) { 761 if (a->colidx[shift+j*8] == i) { 762 x[i] = a->val[shift+j*8]; 763 break; 764 } 765 } 766 } 767 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 768 PetscFunctionReturn(0); 769 } 770 771 PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr) 772 { 773 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 774 const PetscScalar *l,*r; 775 PetscInt i,j,m,n,row; 776 PetscErrorCode ierr; 777 778 PetscFunctionBegin; 779 if (ll) { 780 /* The local size is used so that VecMPI can be passed to this routine 781 by MatDiagonalScale_MPISELL */ 782 ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr); 783 if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 784 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 785 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 786 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 787 a->val[j] *= l[8*i+row]; 788 } 789 } 790 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 791 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 792 } 793 if (rr) { 794 ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr); 795 if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 796 ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr); 797 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 798 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) { 799 a->val[j] *= r[a->colidx[j]]; 800 } 801 } 802 ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr); 803 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 804 } 805 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 806 PetscFunctionReturn(0); 807 } 808 809 extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 810 811 PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 812 { 813 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 814 PetscInt *cp,i,k,low,high,t,row,col,l; 815 PetscInt shift; 816 MatScalar *vp; 817 818 PetscFunctionBegin; 819 for (k=0; k<m; k++) { /* loop over added rows */ 820 row = im[k]; 821 if (row<0) continue; 822 #if defined(PETSC_USE_DEBUG) 823 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 824 #endif 825 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 826 cp = a->colidx+shift; /* pointer to the row */ 827 vp = a->val+shift; /* pointer to the row */ 828 for (l=0; l<n; l++) { /* loop over added rows */ 829 col = in[l]; 830 if (col<0) continue; 831 #if defined(PETSC_USE_DEBUG) 832 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1); 833 #endif 834 high = a->rlen[row]; low = 0; /* assume unsorted */ 835 while (high-low > 5) { 836 t = (low+high)/2; 837 if (*(cp+t*8) > col) high = t; 838 else low = t; 839 } 840 for (i=low; i<high; i++) { 841 if (*(cp+8*i) > col) break; 842 if (*(cp+8*i) == col) { 843 *v++ = *(vp+8*i); 844 goto finished; 845 } 846 } 847 *v++ = 0.0; 848 finished:; 849 } 850 } 851 PetscFunctionReturn(0); 852 } 853 854 PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer) 855 { 856 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 857 PetscInt i,j,m=A->rmap->n,shift; 858 const char *name; 859 PetscViewerFormat format; 860 PetscErrorCode ierr; 861 862 PetscFunctionBegin; 863 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 864 if (format == PETSC_VIEWER_ASCII_MATLAB) { 865 PetscInt nofinalvalue = 0; 866 /* 867 if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) { 868 nofinalvalue = 1; 869 } 870 */ 871 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 872 ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);CHKERRQ(ierr); 873 ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);CHKERRQ(ierr); 874 #if defined(PETSC_USE_COMPLEX) 875 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 876 #else 877 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 878 #endif 879 ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr); 880 881 for (i=0; i<m; i++) { 882 shift = a->sliidx[i>>3]+(i&0x07); 883 for (j=0; j<a->rlen[i]; j++) { 884 #if defined(PETSC_USE_COMPLEX) 885 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 886 #else 887 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);CHKERRQ(ierr); 888 #endif 889 } 890 } 891 /* 892 if (nofinalvalue) { 893 #if defined(PETSC_USE_COMPLEX) 894 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr); 895 #else 896 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr); 897 #endif 898 } 899 */ 900 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 901 ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr); 902 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 903 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 904 PetscFunctionReturn(0); 905 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 906 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 907 for (i=0; i<m; i++) { 908 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 909 shift = a->sliidx[i>>3]+(i&0x07); 910 for (j=0; j<a->rlen[i]; j++) { 911 #if defined(PETSC_USE_COMPLEX) 912 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 913 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 914 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 915 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 916 } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) { 917 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 918 } 919 #else 920 if (a->val[shift+8*j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);} 921 #endif 922 } 923 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 924 } 925 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 926 } else if (format == PETSC_VIEWER_ASCII_DENSE) { 927 PetscInt cnt=0,jcnt; 928 PetscScalar value; 929 #if defined(PETSC_USE_COMPLEX) 930 PetscBool realonly=PETSC_TRUE; 931 for (i=0; i<a->sliidx[a->totalslices]; i++) { 932 if (PetscImaginaryPart(a->val[i]) != 0.0) { 933 realonly = PETSC_FALSE; 934 break; 935 } 936 } 937 #endif 938 939 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 940 for (i=0; i<m; i++) { 941 jcnt = 0; 942 shift = a->sliidx[i>>3]+(i&0x07); 943 for (j=0; j<A->cmap->n; j++) { 944 if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) { 945 value = a->val[cnt++]; 946 jcnt++; 947 } else { 948 value = 0.0; 949 } 950 #if defined(PETSC_USE_COMPLEX) 951 if (realonly) { 952 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr); 953 } else { 954 ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr); 955 } 956 #else 957 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr); 958 #endif 959 } 960 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 961 } 962 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 963 } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) { 964 PetscInt fshift=1; 965 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 966 #if defined(PETSC_USE_COMPLEX) 967 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr); 968 #else 969 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr); 970 #endif 971 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);CHKERRQ(ierr); 972 for (i=0; i<m; i++) { 973 shift = a->sliidx[i>>3]+(i&0x07); 974 for (j=0; j<a->rlen[i]; j++) { 975 #if defined(PETSC_USE_COMPLEX) 976 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 977 #else 978 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);CHKERRQ(ierr); 979 #endif 980 } 981 } 982 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 983 } else { 984 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 985 if (A->factortype) { 986 for (i=0; i<m; i++) { 987 shift = a->sliidx[i>>3]+(i&0x07); 988 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 989 /* L part */ 990 for (j=shift; j<a->diag[i]; j+=8) { 991 #if defined(PETSC_USE_COMPLEX) 992 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) { 993 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 994 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) { 995 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 996 } else { 997 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 998 } 999 #else 1000 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1001 #endif 1002 } 1003 /* diagonal */ 1004 j = a->diag[i]; 1005 #if defined(PETSC_USE_COMPLEX) 1006 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1007 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));CHKERRQ(ierr); 1008 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1009 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));CHKERRQ(ierr); 1010 } else { 1011 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));CHKERRQ(ierr); 1012 } 1013 #else 1014 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));CHKERRQ(ierr); 1015 #endif 1016 1017 /* U part */ 1018 for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) { 1019 #if defined(PETSC_USE_COMPLEX) 1020 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1021 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1022 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1023 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1024 } else { 1025 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1026 } 1027 #else 1028 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1029 #endif 1030 } 1031 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1032 } 1033 } else { 1034 for (i=0; i<m; i++) { 1035 shift = a->sliidx[i>>3]+(i&0x07); 1036 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1037 for (j=0; j<a->rlen[i]; j++) { 1038 #if defined(PETSC_USE_COMPLEX) 1039 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1040 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1041 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1042 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1043 } else { 1044 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1045 } 1046 #else 1047 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr); 1048 #endif 1049 } 1050 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1051 } 1052 } 1053 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1054 } 1055 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1056 PetscFunctionReturn(0); 1057 } 1058 1059 #include <petscdraw.h> 1060 PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa) 1061 { 1062 Mat A=(Mat)Aa; 1063 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1064 PetscInt i,j,m=A->rmap->n,shift; 1065 int color; 1066 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1067 PetscViewer viewer; 1068 PetscViewerFormat format; 1069 PetscErrorCode ierr; 1070 1071 PetscFunctionBegin; 1072 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1073 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1074 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1075 1076 /* loop over matrix elements drawing boxes */ 1077 1078 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1079 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1080 /* Blue for negative, Cyan for zero and Red for positive */ 1081 color = PETSC_DRAW_BLUE; 1082 for (i=0; i<m; i++) { 1083 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1084 y_l = m - i - 1.0; y_r = y_l + 1.0; 1085 for (j=0; j<a->rlen[i]; j++) { 1086 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1087 if (PetscRealPart(a->val[shift+8*j]) >= 0.) continue; 1088 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1089 } 1090 } 1091 color = PETSC_DRAW_CYAN; 1092 for (i=0; i<m; i++) { 1093 shift = a->sliidx[i>>3]+(i&0x07); 1094 y_l = m - i - 1.0; y_r = y_l + 1.0; 1095 for (j=0; j<a->rlen[i]; j++) { 1096 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1097 if (a->val[shift+8*j] != 0.) continue; 1098 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1099 } 1100 } 1101 color = PETSC_DRAW_RED; 1102 for (i=0; i<m; i++) { 1103 shift = a->sliidx[i>>3]+(i&0x07); 1104 y_l = m - i - 1.0; y_r = y_l + 1.0; 1105 for (j=0; j<a->rlen[i]; j++) { 1106 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1107 if (PetscRealPart(a->val[shift+8*j]) <= 0.) continue; 1108 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1109 } 1110 } 1111 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1112 } else { 1113 /* use contour shading to indicate magnitude of values */ 1114 /* first determine max of all nonzero values */ 1115 PetscReal minv=0.0,maxv=0.0; 1116 PetscInt count=0; 1117 PetscDraw popup; 1118 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1119 if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]); 1120 } 1121 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1122 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1123 ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr); 1124 1125 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1126 for (i=0; i<m; i++) { 1127 shift = a->sliidx[i>>3]+(i&0x07); 1128 y_l = m - i - 1.0; 1129 y_r = y_l + 1.0; 1130 for (j=0; j<a->rlen[i]; j++) { 1131 x_l = a->colidx[shift+j*8]; 1132 x_r = x_l + 1.0; 1133 color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv); 1134 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1135 count++; 1136 } 1137 } 1138 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1139 } 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #include <petscdraw.h> 1144 PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer) 1145 { 1146 PetscDraw draw; 1147 PetscReal xr,yr,xl,yl,h,w; 1148 PetscBool isnull; 1149 PetscErrorCode ierr; 1150 1151 PetscFunctionBegin; 1152 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1153 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1154 if (isnull) PetscFunctionReturn(0); 1155 1156 xr = A->cmap->n; yr = A->rmap->n; h = yr/10.0; w = xr/10.0; 1157 xr += w; yr += h; xl = -w; yl = -h; 1158 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1159 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1160 ierr = PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);CHKERRQ(ierr); 1161 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1162 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1163 PetscFunctionReturn(0); 1164 } 1165 1166 PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer) 1167 { 1168 PetscBool iascii,isbinary,isdraw; 1169 PetscErrorCode ierr; 1170 1171 PetscFunctionBegin; 1172 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1173 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1174 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1175 if (iascii) { 1176 ierr = MatView_SeqSELL_ASCII(A,viewer);CHKERRQ(ierr); 1177 } else if (isbinary) { 1178 /* ierr = MatView_SeqSELL_Binary(A,viewer);CHKERRQ(ierr); */ 1179 } else if (isdraw) { 1180 ierr = MatView_SeqSELL_Draw(A,viewer);CHKERRQ(ierr); 1181 } 1182 PetscFunctionReturn(0); 1183 } 1184 1185 PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode) 1186 { 1187 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1188 PetscInt i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k; 1189 MatScalar *vp; 1190 PetscErrorCode ierr; 1191 1192 PetscFunctionBegin; 1193 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1194 /* To do: compress out the unused elements */ 1195 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 1196 ierr = PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);CHKERRQ(ierr); 1197 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr); 1198 ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);CHKERRQ(ierr); 1199 /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */ 1200 for (i=0; i<a->totalslices; ++i) { 1201 shift = a->sliidx[i]; /* starting index of the slice */ 1202 cp = a->colidx+shift; /* pointer to the column indices of the slice */ 1203 vp = a->val+shift; /* pointer to the nonzero values of the slice */ 1204 for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */ 1205 row = 8*i + row_in_slice; 1206 nrow = a->rlen[row]; /* number of nonzeros in row */ 1207 /* 1208 Search for the nearest nonzero. Normally setting the index to zero may cause extra communication. 1209 But if the entire slice are empty, it is fine to use 0 since the index will not be loaded. 1210 */ 1211 lastcol = 0; 1212 if (nrow>0) { /* nonempty row */ 1213 lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */ 1214 } else if (!row_in_slice) { /* first row of the currect slice is empty */ 1215 for (j=1;j<8;j++) { 1216 if (a->rlen[8*i+j]) { 1217 lastcol = cp[j]; 1218 break; 1219 } 1220 } 1221 } else { 1222 if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */ 1223 } 1224 1225 for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) { 1226 cp[8*k+row_in_slice] = lastcol; 1227 vp[8*k+row_in_slice] = (MatScalar)0; 1228 } 1229 } 1230 } 1231 1232 A->info.mallocs += a->reallocs; 1233 a->reallocs = 0; 1234 1235 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1236 PetscFunctionReturn(0); 1237 } 1238 1239 PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info) 1240 { 1241 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1242 1243 PetscFunctionBegin; 1244 info->block_size = 1.0; 1245 info->nz_allocated = (double)a->maxallocmat; 1246 info->nz_used = (double)a->sliidx[a->totalslices]; /* include padding zeros */ 1247 info->nz_unneeded = (double)(a->maxallocmat-a->sliidx[a->totalslices]); 1248 info->assemblies = (double)A->num_ass; 1249 info->mallocs = (double)A->info.mallocs; 1250 info->memory = ((PetscObject)A)->mem; 1251 if (A->factortype) { 1252 info->fill_ratio_given = A->info.fill_ratio_given; 1253 info->fill_ratio_needed = A->info.fill_ratio_needed; 1254 info->factor_mallocs = A->info.factor_mallocs; 1255 } else { 1256 info->fill_ratio_given = 0; 1257 info->fill_ratio_needed = 0; 1258 info->factor_mallocs = 0; 1259 } 1260 PetscFunctionReturn(0); 1261 } 1262 1263 PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1264 { 1265 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1266 PetscInt shift,i,k,l,low,high,t,ii,row,col,nrow; 1267 PetscInt *cp,nonew=a->nonew,lastcol=-1; 1268 MatScalar *vp,value; 1269 PetscErrorCode ierr; 1270 1271 PetscFunctionBegin; 1272 for (k=0; k<m; k++) { /* loop over added rows */ 1273 row = im[k]; 1274 if (row < 0) continue; 1275 #if defined(PETSC_USE_DEBUG) 1276 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 1277 #endif 1278 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1279 cp = a->colidx+shift; /* pointer to the row */ 1280 vp = a->val+shift; /* pointer to the row */ 1281 nrow = a->rlen[row]; 1282 low = 0; 1283 high = nrow; 1284 1285 for (l=0; l<n; l++) { /* loop over added columns */ 1286 col = in[l]; 1287 if (col<0) continue; 1288 #if defined(PETSC_USE_DEBUG) 1289 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1); 1290 #endif 1291 if (a->roworiented) { 1292 value = v[l+k*n]; 1293 } else { 1294 value = v[k+l*m]; 1295 } 1296 if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue; 1297 1298 /* search in this row for the specified colmun, i indicates the column to be set */ 1299 if (col <= lastcol) low = 0; 1300 else high = nrow; 1301 lastcol = col; 1302 while (high-low > 5) { 1303 t = (low+high)/2; 1304 if (*(cp+t*8) > col) high = t; 1305 else low = t; 1306 } 1307 for (i=low; i<high; i++) { 1308 if (*(cp+i*8) > col) break; 1309 if (*(cp+i*8) == col) { 1310 if (is == ADD_VALUES) *(vp+i*8) += value; 1311 else *(vp+i*8) = value; 1312 low = i + 1; 1313 goto noinsert; 1314 } 1315 } 1316 if (value == 0.0 && a->ignorezeroentries) goto noinsert; 1317 if (nonew == 1) goto noinsert; 1318 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1319 /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */ 1320 MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar); 1321 /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */ 1322 for (ii=nrow-1; ii>=i; ii--) { 1323 *(cp+(ii+1)*8) = *(cp+ii*8); 1324 *(vp+(ii+1)*8) = *(vp+ii*8); 1325 } 1326 a->rlen[row]++; 1327 *(cp+i*8) = col; 1328 *(vp+i*8) = value; 1329 a->nz++; 1330 A->nonzerostate++; 1331 low = i+1; high++; nrow++; 1332 noinsert:; 1333 } 1334 a->rlen[row] = nrow; 1335 } 1336 PetscFunctionReturn(0); 1337 } 1338 1339 PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str) 1340 { 1341 PetscErrorCode ierr; 1342 1343 PetscFunctionBegin; 1344 /* If the two matrices have the same copy implementation, use fast copy. */ 1345 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1346 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1347 Mat_SeqSELL *b=(Mat_SeqSELL*)B->data; 1348 1349 if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1350 ierr = PetscMemcpy(b->val,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1351 } else { 1352 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1353 } 1354 PetscFunctionReturn(0); 1355 } 1356 1357 PetscErrorCode MatSetUp_SeqSELL(Mat A) 1358 { 1359 PetscErrorCode ierr; 1360 1361 PetscFunctionBegin; 1362 ierr = MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,0);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[]) 1367 { 1368 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1369 1370 PetscFunctionBegin; 1371 *array = a->val; 1372 PetscFunctionReturn(0); 1373 } 1374 1375 PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[]) 1376 { 1377 PetscFunctionBegin; 1378 PetscFunctionReturn(0); 1379 } 1380 1381 PetscErrorCode MatRealPart_SeqSELL(Mat A) 1382 { 1383 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1384 PetscInt i; 1385 MatScalar *aval=a->val; 1386 1387 PetscFunctionBegin; 1388 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]); 1389 PetscFunctionReturn(0); 1390 } 1391 1392 PetscErrorCode MatImaginaryPart_SeqSELL(Mat A) 1393 { 1394 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1395 PetscInt i; 1396 MatScalar *aval=a->val; 1397 PetscErrorCode ierr; 1398 1399 PetscFunctionBegin; 1400 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]); 1401 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1402 PetscFunctionReturn(0); 1403 } 1404 1405 PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha) 1406 { 1407 Mat_SeqSELL *a=(Mat_SeqSELL*)inA->data; 1408 MatScalar *aval=a->val; 1409 PetscScalar oalpha=alpha; 1410 PetscBLASInt one=1,size; 1411 PetscErrorCode ierr; 1412 1413 PetscFunctionBegin; 1414 ierr = PetscBLASIntCast(a->sliidx[a->totalslices],&size);CHKERRQ(ierr); 1415 PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one)); 1416 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1417 ierr = MatSeqSELLInvalidateDiagonal(inA);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a) 1422 { 1423 Mat_SeqSELL *y=(Mat_SeqSELL*)Y->data; 1424 PetscErrorCode ierr; 1425 1426 PetscFunctionBegin; 1427 if (!Y->preallocated || !y->nz) { 1428 ierr = MatSeqSELLSetPreallocation(Y,1,NULL);CHKERRQ(ierr); 1429 } 1430 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1431 PetscFunctionReturn(0); 1432 } 1433 1434 PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1435 { 1436 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1437 PetscScalar *x,sum,*t; 1438 const MatScalar *idiag=0,*mdiag; 1439 const PetscScalar *b,*xb; 1440 PetscInt n,m=A->rmap->n,i,j,shift; 1441 const PetscInt *diag; 1442 PetscErrorCode ierr; 1443 1444 PetscFunctionBegin; 1445 its = its*lits; 1446 1447 if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */ 1448 if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqSELL(A,omega,fshift);CHKERRQ(ierr);} 1449 a->fshift = fshift; 1450 a->omega = omega; 1451 1452 diag = a->diag; 1453 t = a->ssor_work; 1454 idiag = a->idiag; 1455 mdiag = a->mdiag; 1456 1457 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1458 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1459 /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */ 1460 if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented"); 1461 if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented"); 1462 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 1463 1464 if (flag & SOR_ZERO_INITIAL_GUESS) { 1465 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1466 for (i=0; i<m; i++) { 1467 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1468 sum = b[i]; 1469 n = (diag[i]-shift)/8; 1470 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1471 t[i] = sum; 1472 x[i] = sum*idiag[i]; 1473 } 1474 xb = t; 1475 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1476 } else xb = b; 1477 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1478 for (i=m-1; i>=0; i--) { 1479 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1480 sum = xb[i]; 1481 n = a->rlen[i]-(diag[i]-shift)/8-1; 1482 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1483 if (xb == b) { 1484 x[i] = sum*idiag[i]; 1485 } else { 1486 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1487 } 1488 } 1489 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1490 } 1491 its--; 1492 } 1493 while (its--) { 1494 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1495 for (i=0; i<m; i++) { 1496 /* lower */ 1497 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1498 sum = b[i]; 1499 n = (diag[i]-shift)/8; 1500 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1501 t[i] = sum; /* save application of the lower-triangular part */ 1502 /* upper */ 1503 n = a->rlen[i]-(diag[i]-shift)/8-1; 1504 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1505 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1506 } 1507 xb = t; 1508 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1509 } else xb = b; 1510 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1511 for (i=m-1; i>=0; i--) { 1512 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1513 sum = xb[i]; 1514 if (xb == b) { 1515 /* whole matrix (no checkpointing available) */ 1516 n = a->rlen[i]; 1517 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1518 x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i]; 1519 } else { /* lower-triangular part has been saved, so only apply upper-triangular */ 1520 n = a->rlen[i]-(diag[i]-shift)/8-1; 1521 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1522 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1523 } 1524 } 1525 if (xb == b) { 1526 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1527 } else { 1528 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1529 } 1530 } 1531 } 1532 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1533 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1534 PetscFunctionReturn(0); 1535 } 1536 1537 /* -------------------------------------------------------------------*/ 1538 static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL, 1539 0, 1540 0, 1541 MatMult_SeqSELL, 1542 /* 4*/ MatMultAdd_SeqSELL, 1543 MatMultTranspose_SeqSELL, 1544 MatMultTransposeAdd_SeqSELL, 1545 0, 1546 0, 1547 0, 1548 /* 10*/ 0, 1549 0, 1550 0, 1551 MatSOR_SeqSELL, 1552 0, 1553 /* 15*/ MatGetInfo_SeqSELL, 1554 MatEqual_SeqSELL, 1555 MatGetDiagonal_SeqSELL, 1556 MatDiagonalScale_SeqSELL, 1557 0, 1558 /* 20*/ 0, 1559 MatAssemblyEnd_SeqSELL, 1560 MatSetOption_SeqSELL, 1561 MatZeroEntries_SeqSELL, 1562 /* 24*/ 0, 1563 0, 1564 0, 1565 0, 1566 0, 1567 /* 29*/ MatSetUp_SeqSELL, 1568 0, 1569 0, 1570 0, 1571 0, 1572 /* 34*/ MatDuplicate_SeqSELL, 1573 0, 1574 0, 1575 0, 1576 0, 1577 /* 39*/ 0, 1578 0, 1579 0, 1580 MatGetValues_SeqSELL, 1581 MatCopy_SeqSELL, 1582 /* 44*/ 0, 1583 MatScale_SeqSELL, 1584 MatShift_SeqSELL, 1585 0, 1586 0, 1587 /* 49*/ 0, 1588 0, 1589 0, 1590 0, 1591 0, 1592 /* 54*/ MatFDColoringCreate_SeqXAIJ, 1593 0, 1594 0, 1595 0, 1596 0, 1597 /* 59*/ 0, 1598 MatDestroy_SeqSELL, 1599 MatView_SeqSELL, 1600 0, 1601 0, 1602 /* 64*/ 0, 1603 0, 1604 0, 1605 0, 1606 0, 1607 /* 69*/ 0, 1608 0, 1609 0, 1610 0, 1611 0, 1612 /* 74*/ 0, 1613 MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */ 1614 0, 1615 0, 1616 0, 1617 /* 79*/ 0, 1618 0, 1619 0, 1620 0, 1621 0, 1622 /* 84*/ 0, 1623 0, 1624 0, 1625 0, 1626 0, 1627 /* 89*/ 0, 1628 0, 1629 0, 1630 0, 1631 0, 1632 /* 94*/ 0, 1633 0, 1634 0, 1635 0, 1636 0, 1637 /* 99*/ 0, 1638 0, 1639 0, 1640 MatConjugate_SeqSELL, 1641 0, 1642 /*104*/ 0, 1643 0, 1644 0, 1645 0, 1646 0, 1647 /*109*/ 0, 1648 0, 1649 0, 1650 0, 1651 MatMissingDiagonal_SeqSELL, 1652 /*114*/ 0, 1653 0, 1654 0, 1655 0, 1656 0, 1657 /*119*/ 0, 1658 0, 1659 0, 1660 0, 1661 0, 1662 /*124*/ 0, 1663 0, 1664 0, 1665 0, 1666 0, 1667 /*129*/ 0, 1668 0, 1669 0, 1670 0, 1671 0, 1672 /*134*/ 0, 1673 0, 1674 0, 1675 0, 1676 0, 1677 /*139*/ 0, 1678 0, 1679 0, 1680 MatFDColoringSetUp_SeqXAIJ, 1681 0, 1682 /*144*/0 1683 }; 1684 1685 PetscErrorCode MatStoreValues_SeqSELL(Mat mat) 1686 { 1687 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1688 PetscErrorCode ierr; 1689 1690 PetscFunctionBegin; 1691 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1692 1693 /* allocate space for values if not already there */ 1694 if (!a->saved_values) { 1695 ierr = PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);CHKERRQ(ierr); 1696 ierr = PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));CHKERRQ(ierr); 1697 } 1698 1699 /* copy values over */ 1700 ierr = PetscMemcpy(a->saved_values,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1701 PetscFunctionReturn(0); 1702 } 1703 1704 PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat) 1705 { 1706 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1707 PetscErrorCode ierr; 1708 1709 PetscFunctionBegin; 1710 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1711 if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1712 /* copy values over */ 1713 ierr = PetscMemcpy(a->val,a->saved_values,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1714 PetscFunctionReturn(0); 1715 } 1716 1717 /*@C 1718 MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray() 1719 1720 Not Collective 1721 1722 Input Parameters: 1723 . mat - a MATSEQSELL matrix 1724 . array - pointer to the data 1725 1726 Level: intermediate 1727 1728 .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90() 1729 @*/ 1730 PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array) 1731 { 1732 PetscErrorCode ierr; 1733 1734 PetscFunctionBegin; 1735 ierr = PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 1736 PetscFunctionReturn(0); 1737 } 1738 1739 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B) 1740 { 1741 Mat_SeqSELL *b; 1742 PetscMPIInt size; 1743 PetscErrorCode ierr; 1744 1745 PetscFunctionBegin; 1746 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 1747 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1"); 1748 1749 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1750 1751 B->data = (void*)b; 1752 1753 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1754 1755 b->row = 0; 1756 b->col = 0; 1757 b->icol = 0; 1758 b->reallocs = 0; 1759 b->ignorezeroentries = PETSC_FALSE; 1760 b->roworiented = PETSC_TRUE; 1761 b->nonew = 0; 1762 b->diag = 0; 1763 b->solve_work = 0; 1764 B->spptr = 0; 1765 b->saved_values = 0; 1766 b->idiag = 0; 1767 b->mdiag = 0; 1768 b->ssor_work = 0; 1769 b->omega = 1.0; 1770 b->fshift = 0.0; 1771 b->idiagvalid = PETSC_FALSE; 1772 b->ibdiagvalid = PETSC_FALSE; 1773 b->keepnonzeropattern = PETSC_FALSE; 1774 1775 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);CHKERRQ(ierr); 1776 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);CHKERRQ(ierr); 1777 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);CHKERRQ(ierr); 1778 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);CHKERRQ(ierr); 1779 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);CHKERRQ(ierr); 1780 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);CHKERRQ(ierr); 1781 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);CHKERRQ(ierr); 1782 PetscFunctionReturn(0); 1783 } 1784 1785 /* 1786 Given a matrix generated with MatGetFactor() duplicates all the information in A into B 1787 */ 1788 PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 1789 { 1790 Mat_SeqSELL *c,*a=(Mat_SeqSELL*)A->data; 1791 PetscInt i,m=A->rmap->n; 1792 PetscInt totalslices=a->totalslices; 1793 PetscErrorCode ierr; 1794 1795 PetscFunctionBegin; 1796 c = (Mat_SeqSELL*)C->data; 1797 1798 C->factortype = A->factortype; 1799 c->row = 0; 1800 c->col = 0; 1801 c->icol = 0; 1802 c->reallocs = 0; 1803 1804 C->assembled = PETSC_TRUE; 1805 1806 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 1807 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 1808 1809 ierr = PetscMalloc1(8*totalslices,&c->rlen);CHKERRQ(ierr); 1810 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 1811 ierr = PetscMalloc1(totalslices+1,&c->sliidx);CHKERRQ(ierr); 1812 ierr = PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 1813 1814 for (i=0; i<m; i++) c->rlen[i] = a->rlen[i]; 1815 for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i]; 1816 1817 /* allocate the matrix space */ 1818 if (mallocmatspace) { 1819 ierr = PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);CHKERRQ(ierr); 1820 ierr = PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 1821 1822 c->singlemalloc = PETSC_TRUE; 1823 1824 if (m > 0) { 1825 ierr = PetscMemcpy(c->colidx,a->colidx,(a->maxallocmat)*sizeof(PetscInt));CHKERRQ(ierr); 1826 if (cpvalues == MAT_COPY_VALUES) { 1827 ierr = PetscMemcpy(c->val,a->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 1828 } else { 1829 ierr = PetscMemzero(c->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 1830 } 1831 } 1832 } 1833 1834 c->ignorezeroentries = a->ignorezeroentries; 1835 c->roworiented = a->roworiented; 1836 c->nonew = a->nonew; 1837 if (a->diag) { 1838 ierr = PetscMalloc1(m,&c->diag);CHKERRQ(ierr); 1839 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 1840 for (i=0; i<m; i++) { 1841 c->diag[i] = a->diag[i]; 1842 } 1843 } else c->diag = 0; 1844 1845 c->solve_work = 0; 1846 c->saved_values = 0; 1847 c->idiag = 0; 1848 c->ssor_work = 0; 1849 c->keepnonzeropattern = a->keepnonzeropattern; 1850 c->free_val = PETSC_TRUE; 1851 c->free_colidx = PETSC_TRUE; 1852 1853 c->maxallocmat = a->maxallocmat; 1854 c->maxallocrow = a->maxallocrow; 1855 c->rlenmax = a->rlenmax; 1856 c->nz = a->nz; 1857 C->preallocated = PETSC_TRUE; 1858 1859 c->nonzerorowcnt = a->nonzerorowcnt; 1860 C->nonzerostate = A->nonzerostate; 1861 1862 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 1863 PetscFunctionReturn(0); 1864 } 1865 1866 PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B) 1867 { 1868 PetscErrorCode ierr; 1869 1870 PetscFunctionBegin; 1871 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 1872 ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr); 1873 if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) { 1874 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 1875 } 1876 ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1877 ierr = MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 1878 PetscFunctionReturn(0); 1879 } 1880 1881 /*@C 1882 MatCreateSeqSELL - Creates a sparse matrix in SELL format. 1883 1884 Collective on MPI_Comm 1885 1886 Input Parameters: 1887 + comm - MPI communicator, set to PETSC_COMM_SELF 1888 . m - number of rows 1889 . n - number of columns 1890 . rlenmax - maximum number of nonzeros in a row 1891 - rlen - array containing the number of nonzeros in the various rows 1892 (possibly different for each row) or NULL 1893 1894 Output Parameter: 1895 . A - the matrix 1896 1897 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1898 MatXXXXSetPreallocation() paradgm instead of this routine directly. 1899 [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation] 1900 1901 Notes: 1902 If nnz is given then nz is ignored 1903 1904 Specify the preallocated storage with either rlenmax or rlen (not both). 1905 Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory 1906 allocation. For large problems you MUST preallocate memory or you 1907 will get TERRIBLE performance, see the users' manual chapter on matrices. 1908 1909 Level: intermediate 1910 1911 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatCreateSeqSELLWithArrays() 1912 1913 @*/ 1914 PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A) 1915 { 1916 PetscErrorCode ierr; 1917 1918 PetscFunctionBegin; 1919 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1920 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1921 ierr = MatSetType(*A,MATSEQSELL);CHKERRQ(ierr); 1922 ierr = MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);CHKERRQ(ierr); 1923 PetscFunctionReturn(0); 1924 } 1925 1926 PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg) 1927 { 1928 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data; 1929 PetscInt totalslices=a->totalslices; 1930 PetscErrorCode ierr; 1931 1932 PetscFunctionBegin; 1933 /* If the matrix dimensions are not equal,or no of nonzeros */ 1934 if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) { 1935 *flg = PETSC_FALSE; 1936 PetscFunctionReturn(0); 1937 } 1938 /* if the a->colidx are the same */ 1939 ierr = PetscMemcmp(a->colidx,b->colidx,a->sliidx[totalslices]*sizeof(PetscInt),flg);CHKERRQ(ierr); 1940 if (!*flg) PetscFunctionReturn(0); 1941 /* if a->val are the same */ 1942 ierr = PetscMemcmp(a->val,b->val,a->sliidx[totalslices]*sizeof(PetscScalar),flg);CHKERRQ(ierr); 1943 PetscFunctionReturn(0); 1944 } 1945 1946 PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A) 1947 { 1948 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1949 1950 PetscFunctionBegin; 1951 a->idiagvalid = PETSC_FALSE; 1952 a->ibdiagvalid = PETSC_FALSE; 1953 PetscFunctionReturn(0); 1954 } 1955 1956 PetscErrorCode MatConjugate_SeqSELL(Mat A) 1957 { 1958 #if defined(PETSC_USE_COMPLEX) 1959 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1960 PetscInt i; 1961 PetscScalar *val = a->val; 1962 1963 PetscFunctionBegin; 1964 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1965 val[i] = PetscConj(val[i]); 1966 } 1967 #else 1968 PetscFunctionBegin; 1969 #endif 1970 PetscFunctionReturn(0); 1971 } 1972