1*d4002b98SHong Zhang 2*d4002b98SHong Zhang /* 3*d4002b98SHong Zhang Defines the basic matrix operations for the SELL matrix storage format. 4*d4002b98SHong Zhang */ 5*d4002b98SHong Zhang #include <../src/mat/impls/sell/seq/sell.h> /*I "petscmat.h" I*/ 6*d4002b98SHong Zhang #include <petscblaslapack.h> 7*d4002b98SHong Zhang #include <petsc/private/kernels/blocktranspose.h> 8*d4002b98SHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) 9*d4002b98SHong Zhang #include <immintrin.h> 10*d4002b98SHong Zhang 11*d4002b98SHong Zhang #if !defined(_MM_SCALE_8) 12*d4002b98SHong Zhang #define _MM_SCALE_8 8 13*d4002b98SHong Zhang #endif 14*d4002b98SHong Zhang 15*d4002b98SHong Zhang #if defined(__AVX512F__) 16*d4002b98SHong Zhang /* these do not work 17*d4002b98SHong Zhang vec_idx = _mm512_loadunpackhi_epi32(vec_idx,acolidx); 18*d4002b98SHong Zhang vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval); 19*d4002b98SHong Zhang */ 20*d4002b98SHong Zhang #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 21*d4002b98SHong Zhang /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \ 22*d4002b98SHong Zhang vec_idx = _mm256_load_si256((__m256i const*)acolidx); \ 23*d4002b98SHong Zhang vec_vals = _mm512_load_pd(aval); \ 24*d4002b98SHong Zhang vec_x = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \ 25*d4002b98SHong Zhang vec_y = _mm512_fmadd_pd(vec_x,vec_vals,vec_y); 26*d4002b98SHong Zhang 27*d4002b98SHong Zhang #endif 28*d4002b98SHong Zhang #endif /* PETSC_HAVE_IMMINTRIN_H */ 29*d4002b98SHong Zhang 30*d4002b98SHong Zhang /*@C 31*d4002b98SHong Zhang MatSeqSELLSetPreallocation - For good matrix assembly performance 32*d4002b98SHong Zhang the user should preallocate the matrix storage by setting the parameter nz 33*d4002b98SHong Zhang (or the array nnz). By setting these parameters accurately, performance 34*d4002b98SHong Zhang during matrix assembly can be increased significantly. 35*d4002b98SHong Zhang 36*d4002b98SHong Zhang Collective on MPI_Comm 37*d4002b98SHong Zhang 38*d4002b98SHong Zhang Input Parameters: 39*d4002b98SHong Zhang + B - The matrix 40*d4002b98SHong Zhang . nz - number of nonzeros per row (same for all rows) 41*d4002b98SHong Zhang - nnz - array containing the number of nonzeros in the various rows 42*d4002b98SHong Zhang (possibly different for each row) or NULL 43*d4002b98SHong Zhang 44*d4002b98SHong Zhang Notes: 45*d4002b98SHong Zhang If nnz is given then nz is ignored. 46*d4002b98SHong Zhang 47*d4002b98SHong Zhang Specify the preallocated storage with either nz or nnz (not both). 48*d4002b98SHong Zhang Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 49*d4002b98SHong Zhang allocation. For large problems you MUST preallocate memory or you 50*d4002b98SHong Zhang will get TERRIBLE performance, see the users' manual chapter on matrices. 51*d4002b98SHong Zhang 52*d4002b98SHong Zhang You can call MatGetInfo() to get information on how effective the preallocation was; 53*d4002b98SHong Zhang for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 54*d4002b98SHong Zhang You can also run with the option -info and look for messages with the string 55*d4002b98SHong Zhang malloc in them to see if additional memory allocation was needed. 56*d4002b98SHong Zhang 57*d4002b98SHong Zhang Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix 58*d4002b98SHong Zhang entries or columns indices. 59*d4002b98SHong Zhang 60*d4002b98SHong Zhang The maximum number of nonzeos in any row should be as accuate as possible. 61*d4002b98SHong Zhang If it is underesitmated, you will get bad performance due to reallocation 62*d4002b98SHong Zhang (MatSeqXSELLReallocateSELL). 63*d4002b98SHong Zhang 64*d4002b98SHong Zhang Level: intermediate 65*d4002b98SHong Zhang 66*d4002b98SHong Zhang .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo() 67*d4002b98SHong Zhang 68*d4002b98SHong Zhang @*/ 69*d4002b98SHong Zhang PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[]) 70*d4002b98SHong Zhang { 71*d4002b98SHong Zhang PetscErrorCode ierr; 72*d4002b98SHong Zhang 73*d4002b98SHong Zhang PetscFunctionBegin; 74*d4002b98SHong Zhang PetscValidHeaderSpecific(B,MAT_CLASSID,1); 75*d4002b98SHong Zhang PetscValidType(B,1); 76*d4002b98SHong Zhang ierr = PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));CHKERRQ(ierr); 77*d4002b98SHong Zhang PetscFunctionReturn(0); 78*d4002b98SHong Zhang } 79*d4002b98SHong Zhang 80*d4002b98SHong Zhang PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[]) 81*d4002b98SHong Zhang { 82*d4002b98SHong Zhang Mat_SeqSELL *b; 83*d4002b98SHong Zhang PetscInt i,j,totalslices; 84*d4002b98SHong Zhang PetscBool skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE; 85*d4002b98SHong Zhang PetscErrorCode ierr; 86*d4002b98SHong Zhang 87*d4002b98SHong Zhang PetscFunctionBegin; 88*d4002b98SHong Zhang if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE; 89*d4002b98SHong Zhang if (maxallocrow == MAT_SKIP_ALLOCATION) { 90*d4002b98SHong Zhang skipallocation = PETSC_TRUE; 91*d4002b98SHong Zhang maxallocrow = 0; 92*d4002b98SHong Zhang } 93*d4002b98SHong Zhang 94*d4002b98SHong Zhang ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 95*d4002b98SHong Zhang ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 96*d4002b98SHong Zhang 97*d4002b98SHong Zhang /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */ 98*d4002b98SHong Zhang if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5; 99*d4002b98SHong Zhang if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow); 100*d4002b98SHong Zhang if (rlen) { 101*d4002b98SHong Zhang for (i=0; i<B->rmap->n; i++) { 102*d4002b98SHong Zhang if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]); 103*d4002b98SHong Zhang if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n); 104*d4002b98SHong Zhang } 105*d4002b98SHong Zhang } 106*d4002b98SHong Zhang 107*d4002b98SHong Zhang B->preallocated = PETSC_TRUE; 108*d4002b98SHong Zhang 109*d4002b98SHong Zhang b = (Mat_SeqSELL*)B->data; 110*d4002b98SHong Zhang 111*d4002b98SHong Zhang totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */ 112*d4002b98SHong Zhang b->totalslices = totalslices; 113*d4002b98SHong Zhang if (!skipallocation) { 114*d4002b98SHong Zhang if (B->rmap->n & 0x07) PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n); 115*d4002b98SHong Zhang 116*d4002b98SHong Zhang if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */ 117*d4002b98SHong Zhang ierr = PetscMalloc1(totalslices+1,&b->sliidx);CHKERRQ(ierr); 118*d4002b98SHong Zhang ierr = PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 119*d4002b98SHong Zhang } 120*d4002b98SHong Zhang if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */ 121*d4002b98SHong Zhang if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10; 122*d4002b98SHong Zhang else if (maxallocrow < 0) maxallocrow = 1; 123*d4002b98SHong Zhang for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow; 124*d4002b98SHong Zhang } else { 125*d4002b98SHong Zhang maxallocrow = 0; 126*d4002b98SHong Zhang b->sliidx[0] = 0; 127*d4002b98SHong Zhang for (i=1; i<totalslices; i++) { 128*d4002b98SHong Zhang b->sliidx[i] = 0; 129*d4002b98SHong Zhang for (j=0;j<8;j++) { 130*d4002b98SHong Zhang b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]); 131*d4002b98SHong Zhang } 132*d4002b98SHong Zhang maxallocrow = PetscMax(b->sliidx[i],maxallocrow); 133*d4002b98SHong Zhang b->sliidx[i] = b->sliidx[i-1] + 8*b->sliidx[i]; 134*d4002b98SHong Zhang } 135*d4002b98SHong Zhang /* last slice */ 136*d4002b98SHong Zhang b->sliidx[totalslices] = 0; 137*d4002b98SHong Zhang for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]); 138*d4002b98SHong Zhang maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow); 139*d4002b98SHong Zhang b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices]; 140*d4002b98SHong Zhang } 141*d4002b98SHong Zhang 142*d4002b98SHong Zhang /* allocate space for val, colidx, rlen */ 143*d4002b98SHong Zhang /* FIXME: should B's old memory be unlogged? */ 144*d4002b98SHong Zhang ierr = MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);CHKERRQ(ierr); 145*d4002b98SHong Zhang /* FIXME: assuming an element of the bit array takes 8 bits */ 146*d4002b98SHong Zhang ierr = PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);CHKERRQ(ierr); 147*d4002b98SHong Zhang ierr = PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 148*d4002b98SHong Zhang /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */ 149*d4002b98SHong Zhang ierr = PetscCalloc1(8*totalslices,&b->rlen);CHKERRQ(ierr); 150*d4002b98SHong Zhang ierr = PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));CHKERRQ(ierr); 151*d4002b98SHong Zhang 152*d4002b98SHong Zhang b->singlemalloc = PETSC_TRUE; 153*d4002b98SHong Zhang b->free_val = PETSC_TRUE; 154*d4002b98SHong Zhang b->free_colidx = PETSC_TRUE; 155*d4002b98SHong Zhang } else { 156*d4002b98SHong Zhang b->free_val = PETSC_FALSE; 157*d4002b98SHong Zhang b->free_colidx = PETSC_FALSE; 158*d4002b98SHong Zhang } 159*d4002b98SHong Zhang 160*d4002b98SHong Zhang b->nz = 0; 161*d4002b98SHong Zhang b->maxallocrow = maxallocrow; 162*d4002b98SHong Zhang b->rlenmax = maxallocrow; 163*d4002b98SHong Zhang b->maxallocmat = b->sliidx[totalslices]; 164*d4002b98SHong Zhang B->info.nz_unneeded = (double)b->maxallocmat; 165*d4002b98SHong Zhang if (realalloc) { 166*d4002b98SHong Zhang ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 167*d4002b98SHong Zhang } 168*d4002b98SHong Zhang PetscFunctionReturn(0); 169*d4002b98SHong Zhang } 170*d4002b98SHong Zhang 171*d4002b98SHong Zhang PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 172*d4002b98SHong Zhang { 173*d4002b98SHong Zhang Mat B; 174*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 175*d4002b98SHong Zhang PetscInt i,j,row; 176*d4002b98SHong Zhang PetscBool isnonzero; 177*d4002b98SHong Zhang PetscErrorCode ierr; 178*d4002b98SHong Zhang 179*d4002b98SHong Zhang PetscFunctionBegin; 180*d4002b98SHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 181*d4002b98SHong Zhang ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 182*d4002b98SHong Zhang ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 183*d4002b98SHong Zhang ierr = MatSeqAIJSetPreallocation(B,0,a->rlen);CHKERRQ(ierr); 184*d4002b98SHong Zhang ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 185*d4002b98SHong Zhang 186*d4002b98SHong Zhang for (i=0; i<a->totalslices; i++) { /* loop over slices */ 187*d4002b98SHong Zhang for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 188*d4002b98SHong Zhang isnonzero = (PetscBool)((j-a->sliidx[i])/8 < a->rlen[8*i+row]); 189*d4002b98SHong Zhang if (isnonzero) { 190*d4002b98SHong Zhang ierr = MatSetValue(B,8*i+row,a->colidx[j],a->val[j],INSERT_VALUES);CHKERRQ(ierr); 191*d4002b98SHong Zhang } 192*d4002b98SHong Zhang } 193*d4002b98SHong Zhang } 194*d4002b98SHong Zhang ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 195*d4002b98SHong Zhang ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 196*d4002b98SHong Zhang B->rmap->bs = A->rmap->bs; 197*d4002b98SHong Zhang 198*d4002b98SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 199*d4002b98SHong Zhang ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 200*d4002b98SHong Zhang } else { 201*d4002b98SHong Zhang *newmat = B; 202*d4002b98SHong Zhang } 203*d4002b98SHong Zhang PetscFunctionReturn(0); 204*d4002b98SHong Zhang } 205*d4002b98SHong Zhang 206*d4002b98SHong Zhang #include <../src/mat/impls/aij/seq/aij.h> 207*d4002b98SHong Zhang 208*d4002b98SHong Zhang PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 209*d4002b98SHong Zhang { 210*d4002b98SHong Zhang Mat B; 211*d4002b98SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 212*d4002b98SHong Zhang PetscInt *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols; 213*d4002b98SHong Zhang const PetscInt *cols; 214*d4002b98SHong Zhang const PetscScalar *vals; 215*d4002b98SHong Zhang PetscErrorCode ierr; 216*d4002b98SHong Zhang 217*d4002b98SHong Zhang PetscFunctionBegin; 218*d4002b98SHong Zhang if (A->rmap->bs > 1) { 219*d4002b98SHong Zhang ierr = MatConvert_Basic(A,newtype,reuse,newmat);CHKERRQ(ierr); 220*d4002b98SHong Zhang PetscFunctionReturn(0); 221*d4002b98SHong Zhang } 222*d4002b98SHong Zhang /* Can we just use ilen? */ 223*d4002b98SHong Zhang ierr = PetscMalloc1(m,&rowlengths);CHKERRQ(ierr); 224*d4002b98SHong Zhang for (i=0; i<m; i++) { 225*d4002b98SHong Zhang rowlengths[i] = ai[i+1] - ai[i]; 226*d4002b98SHong Zhang } 227*d4002b98SHong Zhang 228*d4002b98SHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 229*d4002b98SHong Zhang ierr = MatSetSizes(B,m,n,m,n);CHKERRQ(ierr); 230*d4002b98SHong Zhang ierr = MatSetType(B,MATSEQSELL);CHKERRQ(ierr); 231*d4002b98SHong Zhang ierr = MatSeqSELLSetPreallocation(B,0,rowlengths);CHKERRQ(ierr); 232*d4002b98SHong Zhang ierr = PetscFree(rowlengths);CHKERRQ(ierr); 233*d4002b98SHong Zhang 234*d4002b98SHong Zhang ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 235*d4002b98SHong Zhang 236*d4002b98SHong Zhang for (row=0; row<m; row++) { 237*d4002b98SHong Zhang ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 238*d4002b98SHong Zhang ierr = MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 239*d4002b98SHong Zhang ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 240*d4002b98SHong Zhang } 241*d4002b98SHong Zhang ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 242*d4002b98SHong Zhang ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 243*d4002b98SHong Zhang B->rmap->bs = A->rmap->bs; 244*d4002b98SHong Zhang 245*d4002b98SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 246*d4002b98SHong Zhang ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 247*d4002b98SHong Zhang } else { 248*d4002b98SHong Zhang *newmat = B; 249*d4002b98SHong Zhang } 250*d4002b98SHong Zhang PetscFunctionReturn(0); 251*d4002b98SHong Zhang } 252*d4002b98SHong Zhang 253*d4002b98SHong Zhang PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy) 254*d4002b98SHong Zhang { 255*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 256*d4002b98SHong Zhang PetscScalar *y; 257*d4002b98SHong Zhang const PetscScalar *x; 258*d4002b98SHong Zhang const MatScalar *aval=a->val; 259*d4002b98SHong Zhang PetscInt totalslices=a->totalslices; 260*d4002b98SHong Zhang const PetscInt *acolidx=a->colidx; 261*d4002b98SHong Zhang PetscInt i,j=0; 262*d4002b98SHong Zhang PetscErrorCode ierr; 263*d4002b98SHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 264*d4002b98SHong Zhang __m512d vec_x,vec_y,vec_vals; 265*d4002b98SHong Zhang __m256i vec_idx; 266*d4002b98SHong Zhang __mmask8 mask; 267*d4002b98SHong Zhang __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 268*d4002b98SHong Zhang __m256i vec_idx2,vec_idx3,vec_idx4; 269*d4002b98SHong Zhang #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 270*d4002b98SHong Zhang __m128d vec_x_tmp; 271*d4002b98SHong Zhang __m256d vec_x,vec_y,vec_y2,vec_vals; 272*d4002b98SHong Zhang MatScalar yval; 273*d4002b98SHong Zhang PetscInt r,rows_left,row,nnz_in_row; 274*d4002b98SHong Zhang #else 275*d4002b98SHong Zhang PetscScalar sum[8]; 276*d4002b98SHong Zhang #endif 277*d4002b98SHong Zhang 278*d4002b98SHong Zhang #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 279*d4002b98SHong Zhang #pragma disjoint(*x,*y,*aval) 280*d4002b98SHong Zhang #endif 281*d4002b98SHong Zhang 282*d4002b98SHong Zhang PetscFunctionBegin; 283*d4002b98SHong Zhang ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 284*d4002b98SHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 285*d4002b98SHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 286*d4002b98SHong Zhang for (i=0; i<totalslices; i++) { /* loop over slices */ 287*d4002b98SHong Zhang PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 288*d4002b98SHong Zhang PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 289*d4002b98SHong Zhang 290*d4002b98SHong Zhang vec_y = _mm512_setzero_pd(); 291*d4002b98SHong Zhang vec_y2 = _mm512_setzero_pd(); 292*d4002b98SHong Zhang vec_y3 = _mm512_setzero_pd(); 293*d4002b98SHong Zhang vec_y4 = _mm512_setzero_pd(); 294*d4002b98SHong Zhang 295*d4002b98SHong Zhang j = a->sliidx[i]>>3; /* index of the bit array; 8 bits are read at each time, corresponding to a slice columnn */ 296*d4002b98SHong Zhang switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 297*d4002b98SHong Zhang case 3: 298*d4002b98SHong Zhang AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 299*d4002b98SHong Zhang acolidx += 8; aval += 8; 300*d4002b98SHong Zhang AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 301*d4002b98SHong Zhang acolidx += 8; aval += 8; 302*d4002b98SHong Zhang AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 303*d4002b98SHong Zhang acolidx += 8; aval += 8; 304*d4002b98SHong Zhang j += 3; 305*d4002b98SHong Zhang break; 306*d4002b98SHong Zhang case 2: 307*d4002b98SHong Zhang AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 308*d4002b98SHong Zhang acolidx += 8; aval += 8; 309*d4002b98SHong Zhang AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 310*d4002b98SHong Zhang acolidx += 8; aval += 8; 311*d4002b98SHong Zhang j += 2; 312*d4002b98SHong Zhang break; 313*d4002b98SHong Zhang case 1: 314*d4002b98SHong Zhang AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 315*d4002b98SHong Zhang acolidx += 8; aval += 8; 316*d4002b98SHong Zhang j += 1; 317*d4002b98SHong Zhang break; 318*d4002b98SHong Zhang } 319*d4002b98SHong Zhang #pragma novector 320*d4002b98SHong Zhang for (; j<(a->sliidx[i+1]>>3); j+=4) { 321*d4002b98SHong Zhang /* 322*d4002b98SHong Zhang vec_idx = _mm512_loadunpackhi_epi32(vec_idx,acolidx); 323*d4002b98SHong Zhang vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval); 324*d4002b98SHong Zhang */ 325*d4002b98SHong Zhang AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 326*d4002b98SHong Zhang acolidx += 8; aval += 8; 327*d4002b98SHong Zhang AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 328*d4002b98SHong Zhang acolidx += 8; aval += 8; 329*d4002b98SHong Zhang AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 330*d4002b98SHong Zhang acolidx += 8; aval += 8; 331*d4002b98SHong Zhang AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 332*d4002b98SHong Zhang acolidx += 8; aval += 8; 333*d4002b98SHong Zhang } 334*d4002b98SHong Zhang 335*d4002b98SHong Zhang vec_y = _mm512_add_pd(vec_y,vec_y2); 336*d4002b98SHong Zhang vec_y = _mm512_add_pd(vec_y,vec_y3); 337*d4002b98SHong Zhang vec_y = _mm512_add_pd(vec_y,vec_y4); 338*d4002b98SHong Zhang if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 339*d4002b98SHong Zhang mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 340*d4002b98SHong Zhang _mm512_mask_store_pd(&y[8*i],mask,vec_y); 341*d4002b98SHong Zhang } else { 342*d4002b98SHong Zhang _mm512_store_pd(&y[8*i],vec_y); 343*d4002b98SHong Zhang } 344*d4002b98SHong Zhang } 345*d4002b98SHong Zhang #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 346*d4002b98SHong Zhang for (i=0; i<totalslices; i++) { /* loop over full slices */ 347*d4002b98SHong Zhang PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 348*d4002b98SHong Zhang PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 349*d4002b98SHong Zhang 350*d4002b98SHong Zhang vec_y = _mm256_setzero_pd(); 351*d4002b98SHong Zhang vec_y2 = _mm256_setzero_pd(); 352*d4002b98SHong Zhang 353*d4002b98SHong Zhang /* last slice may have padding rows. Don't use vectorization. */ 354*d4002b98SHong Zhang if (i == totalslices-1 && (A->rmap->n & 0x07)) { 355*d4002b98SHong Zhang rows_left = A->rmap->n - 8*i; 356*d4002b98SHong Zhang for (r=0; r<rows_left; ++r) { 357*d4002b98SHong Zhang yval = (MatScalar)0; 358*d4002b98SHong Zhang row = 8*i + r; 359*d4002b98SHong Zhang nnz_in_row = a->rlen[row]; 360*d4002b98SHong Zhang for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]]; 361*d4002b98SHong Zhang y[row] = yval; 362*d4002b98SHong Zhang } 363*d4002b98SHong Zhang break; 364*d4002b98SHong Zhang } 365*d4002b98SHong Zhang 366*d4002b98SHong Zhang /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 367*d4002b98SHong Zhang for (; j<a->sliidx[i+1]; j+=8) { 368*d4002b98SHong Zhang vec_vals = _mm256_loadu_pd(aval); 369*d4002b98SHong Zhang vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 370*d4002b98SHong Zhang vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 371*d4002b98SHong Zhang vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 372*d4002b98SHong Zhang vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 373*d4002b98SHong Zhang vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 374*d4002b98SHong Zhang vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 375*d4002b98SHong Zhang vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 376*d4002b98SHong Zhang aval += 4; 377*d4002b98SHong Zhang 378*d4002b98SHong Zhang vec_vals = _mm256_loadu_pd(aval); 379*d4002b98SHong Zhang vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 380*d4002b98SHong Zhang vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 381*d4002b98SHong Zhang vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 382*d4002b98SHong Zhang vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 383*d4002b98SHong Zhang vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 384*d4002b98SHong Zhang vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 385*d4002b98SHong Zhang vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 386*d4002b98SHong Zhang aval += 4; 387*d4002b98SHong Zhang } 388*d4002b98SHong Zhang 389*d4002b98SHong Zhang _mm256_storeu_pd(y + i*8, vec_y); 390*d4002b98SHong Zhang _mm256_storeu_pd(y + i*8 + 4, vec_y2); 391*d4002b98SHong Zhang } 392*d4002b98SHong Zhang 393*d4002b98SHong Zhang #else 394*d4002b98SHong Zhang for (i=0; i<totalslices; i++) { /* loop over slices */ 395*d4002b98SHong Zhang for (j=0; j<8; j++) sum[j] = 0.0; 396*d4002b98SHong Zhang 397*d4002b98SHong Zhang for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 398*d4002b98SHong Zhang sum[0] += aval[j]*x[acolidx[j]]; 399*d4002b98SHong Zhang sum[1] += aval[j+1]*x[acolidx[j+1]]; 400*d4002b98SHong Zhang sum[2] += aval[j+2]*x[acolidx[j+2]]; 401*d4002b98SHong Zhang sum[3] += aval[j+3]*x[acolidx[j+3]]; 402*d4002b98SHong Zhang sum[4] += aval[j+4]*x[acolidx[j+4]]; 403*d4002b98SHong Zhang sum[5] += aval[j+5]*x[acolidx[j+5]]; 404*d4002b98SHong Zhang sum[6] += aval[j+6]*x[acolidx[j+6]]; 405*d4002b98SHong Zhang sum[7] += aval[j+7]*x[acolidx[j+7]]; 406*d4002b98SHong Zhang } 407*d4002b98SHong Zhang if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */ 408*d4002b98SHong Zhang for(j=0;j<(A->rmap->n & 0x07);j++) y[8*i+j] = sum[j]; 409*d4002b98SHong Zhang } else { 410*d4002b98SHong Zhang y[8*i] = sum[0]; 411*d4002b98SHong Zhang y[8*i+1] = sum[1]; 412*d4002b98SHong Zhang y[8*i+2] = sum[2]; 413*d4002b98SHong Zhang y[8*i+3] = sum[3]; 414*d4002b98SHong Zhang y[8*i+4] = sum[4]; 415*d4002b98SHong Zhang y[8*i+5] = sum[5]; 416*d4002b98SHong Zhang y[8*i+6] = sum[6]; 417*d4002b98SHong Zhang y[8*i+7] = sum[7]; 418*d4002b98SHong Zhang } 419*d4002b98SHong Zhang } 420*d4002b98SHong Zhang #endif 421*d4002b98SHong Zhang 422*d4002b98SHong Zhang ierr = PetscLogFlops(2.0*a->nz-a->nonzerorowcnt);CHKERRQ(ierr); /* theoretical minimal FLOPs */ 423*d4002b98SHong Zhang ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 424*d4002b98SHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 425*d4002b98SHong Zhang PetscFunctionReturn(0); 426*d4002b98SHong Zhang } 427*d4002b98SHong Zhang 428*d4002b98SHong Zhang #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h> 429*d4002b98SHong Zhang PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz) 430*d4002b98SHong Zhang { 431*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 432*d4002b98SHong Zhang PetscScalar *y,*z; 433*d4002b98SHong Zhang const PetscScalar *x; 434*d4002b98SHong Zhang const MatScalar *aval=a->val; 435*d4002b98SHong Zhang PetscInt totalslices=a->totalslices; 436*d4002b98SHong Zhang const PetscInt *acolidx=a->colidx; 437*d4002b98SHong Zhang PetscInt i,j; 438*d4002b98SHong Zhang PetscErrorCode ierr; 439*d4002b98SHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 440*d4002b98SHong Zhang __m512d vec_x,vec_y,vec_z,vec_vals; 441*d4002b98SHong Zhang __m256i vec_idx; 442*d4002b98SHong Zhang __mmask8 mask; 443*d4002b98SHong Zhang #else 444*d4002b98SHong Zhang PetscScalar sum[8]; 445*d4002b98SHong Zhang #endif 446*d4002b98SHong Zhang 447*d4002b98SHong Zhang #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 448*d4002b98SHong Zhang #pragma disjoint(*x,*y,*aval) 449*d4002b98SHong Zhang #endif 450*d4002b98SHong Zhang 451*d4002b98SHong Zhang PetscFunctionBegin; 452*d4002b98SHong Zhang ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 453*d4002b98SHong Zhang ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 454*d4002b98SHong Zhang for (i=0; i<totalslices; i++) { /* loop over slices */ 455*d4002b98SHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 456*d4002b98SHong Zhang vec_y = _mm512_load_pd(&y[8*i]); 457*d4002b98SHong Zhang #pragma novector 458*d4002b98SHong Zhang for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 459*d4002b98SHong Zhang AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 460*d4002b98SHong Zhang acolidx += 8; aval += 8; 461*d4002b98SHong Zhang } 462*d4002b98SHong Zhang if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 463*d4002b98SHong Zhang mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 464*d4002b98SHong Zhang _mm512_mask_store_pd(&z[8*i],mask,vec_y); 465*d4002b98SHong Zhang } else { 466*d4002b98SHong Zhang _mm512_store_pd(&z[8*i],vec_y); 467*d4002b98SHong Zhang } 468*d4002b98SHong Zhang #else 469*d4002b98SHong Zhang sum[0] = y[8*i]; 470*d4002b98SHong Zhang sum[1] = y[8*i+1]; 471*d4002b98SHong Zhang sum[2] = y[8*i+2]; 472*d4002b98SHong Zhang sum[3] = y[8*i+3]; 473*d4002b98SHong Zhang sum[4] = y[8*i+4]; 474*d4002b98SHong Zhang sum[5] = y[8*i+5]; 475*d4002b98SHong Zhang sum[6] = y[8*i+6]; 476*d4002b98SHong Zhang sum[7] = y[8*i+7]; 477*d4002b98SHong Zhang for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 478*d4002b98SHong Zhang sum[0] += aval[j]*x[acolidx[j]]; 479*d4002b98SHong Zhang sum[1] += aval[j+1]*x[acolidx[j+1]]; 480*d4002b98SHong Zhang sum[2] += aval[j+2]*x[acolidx[j+2]]; 481*d4002b98SHong Zhang sum[3] += aval[j+3]*x[acolidx[j+3]]; 482*d4002b98SHong Zhang sum[4] += aval[j+4]*x[acolidx[j+4]]; 483*d4002b98SHong Zhang sum[5] += aval[j+5]*x[acolidx[j+5]]; 484*d4002b98SHong Zhang sum[6] += aval[j+6]*x[acolidx[j+6]]; 485*d4002b98SHong Zhang sum[7] += aval[j+7]*x[acolidx[j+7]]; 486*d4002b98SHong Zhang } 487*d4002b98SHong Zhang if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */ 488*d4002b98SHong Zhang for(j=0;j<(A->rmap->n & 0x07);j++) z[8*i+j] = sum[j]; 489*d4002b98SHong Zhang } else { 490*d4002b98SHong Zhang z[8*i] = sum[0]; 491*d4002b98SHong Zhang z[8*i+1] = sum[1]; 492*d4002b98SHong Zhang z[8*i+2] = sum[2]; 493*d4002b98SHong Zhang z[8*i+3] = sum[3]; 494*d4002b98SHong Zhang z[8*i+4] = sum[4]; 495*d4002b98SHong Zhang z[8*i+5] = sum[5]; 496*d4002b98SHong Zhang z[8*i+6] = sum[6]; 497*d4002b98SHong Zhang z[8*i+7] = sum[7]; 498*d4002b98SHong Zhang } 499*d4002b98SHong Zhang #endif 500*d4002b98SHong Zhang } 501*d4002b98SHong Zhang 502*d4002b98SHong Zhang ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 503*d4002b98SHong Zhang ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 504*d4002b98SHong Zhang ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 505*d4002b98SHong Zhang PetscFunctionReturn(0); 506*d4002b98SHong Zhang } 507*d4002b98SHong Zhang 508*d4002b98SHong Zhang PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy) 509*d4002b98SHong Zhang { 510*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 511*d4002b98SHong Zhang PetscScalar *y; 512*d4002b98SHong Zhang const PetscScalar *x; 513*d4002b98SHong Zhang const MatScalar *aval=a->val; 514*d4002b98SHong Zhang const PetscInt *acolidx=a->colidx; 515*d4002b98SHong Zhang PetscInt i,j,row; 516*d4002b98SHong Zhang PetscBool isnonzero; 517*d4002b98SHong Zhang PetscErrorCode ierr; 518*d4002b98SHong Zhang 519*d4002b98SHong Zhang #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 520*d4002b98SHong Zhang #pragma disjoint(*x,*y,*aval) 521*d4002b98SHong Zhang #endif 522*d4002b98SHong Zhang 523*d4002b98SHong Zhang PetscFunctionBegin; 524*d4002b98SHong Zhang if (zz != yy) { ierr = VecCopy(zz,yy);CHKERRQ(ierr); } 525*d4002b98SHong Zhang ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 526*d4002b98SHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 527*d4002b98SHong Zhang for (i=0; i<a->totalslices; i++) { /* loop over slices */ 528*d4002b98SHong Zhang for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 529*d4002b98SHong Zhang isnonzero = (PetscBool)((j-a->sliidx[i])/8 < a->rlen[8*i+row]); 530*d4002b98SHong Zhang if (isnonzero) y[acolidx[j]] += aval[j]*x[8*i+row]; 531*d4002b98SHong Zhang } 532*d4002b98SHong Zhang } 533*d4002b98SHong Zhang ierr = PetscLogFlops(2.0*a->sliidx[a->totalslices]);CHKERRQ(ierr); 534*d4002b98SHong Zhang ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 535*d4002b98SHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 536*d4002b98SHong Zhang PetscFunctionReturn(0); 537*d4002b98SHong Zhang } 538*d4002b98SHong Zhang 539*d4002b98SHong Zhang PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy) 540*d4002b98SHong Zhang { 541*d4002b98SHong Zhang PetscErrorCode ierr; 542*d4002b98SHong Zhang 543*d4002b98SHong Zhang PetscFunctionBegin; 544*d4002b98SHong Zhang ierr = VecSet(yy,0.0);CHKERRQ(ierr); 545*d4002b98SHong Zhang ierr = MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);CHKERRQ(ierr); 546*d4002b98SHong Zhang PetscFunctionReturn(0); 547*d4002b98SHong Zhang } 548*d4002b98SHong Zhang 549*d4002b98SHong Zhang /* 550*d4002b98SHong Zhang Checks for missing diagonals 551*d4002b98SHong Zhang */ 552*d4002b98SHong Zhang PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool *missing,PetscInt *d) 553*d4002b98SHong Zhang { 554*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 555*d4002b98SHong Zhang PetscInt *diag,i; 556*d4002b98SHong Zhang 557*d4002b98SHong Zhang PetscFunctionBegin; 558*d4002b98SHong Zhang *missing = PETSC_FALSE; 559*d4002b98SHong Zhang if (A->rmap->n > 0 && !(a->colidx)) { 560*d4002b98SHong Zhang *missing = PETSC_TRUE; 561*d4002b98SHong Zhang if (d) *d = 0; 562*d4002b98SHong Zhang PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 563*d4002b98SHong Zhang } else { 564*d4002b98SHong Zhang diag = a->diag; 565*d4002b98SHong Zhang for (i=0; i<A->rmap->n; i++) { 566*d4002b98SHong Zhang if (diag[i] == -1) { 567*d4002b98SHong Zhang *missing = PETSC_TRUE; 568*d4002b98SHong Zhang if (d) *d = i; 569*d4002b98SHong Zhang PetscInfo1(A,"Matrix is missing diagonal number %D\n",i); 570*d4002b98SHong Zhang break; 571*d4002b98SHong Zhang } 572*d4002b98SHong Zhang } 573*d4002b98SHong Zhang } 574*d4002b98SHong Zhang PetscFunctionReturn(0); 575*d4002b98SHong Zhang } 576*d4002b98SHong Zhang 577*d4002b98SHong Zhang PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A) 578*d4002b98SHong Zhang { 579*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 580*d4002b98SHong Zhang PetscInt i,j,m=A->rmap->n,shift; 581*d4002b98SHong Zhang PetscErrorCode ierr; 582*d4002b98SHong Zhang 583*d4002b98SHong Zhang PetscFunctionBegin; 584*d4002b98SHong Zhang if (!a->diag) { 585*d4002b98SHong Zhang ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 586*d4002b98SHong Zhang ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 587*d4002b98SHong Zhang a->free_diag = PETSC_TRUE; 588*d4002b98SHong Zhang } 589*d4002b98SHong Zhang for (i=0; i<m; i++) { /* loop over rows */ 590*d4002b98SHong Zhang shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 591*d4002b98SHong Zhang a->diag[i] = -1; 592*d4002b98SHong Zhang for (j=0; j<a->rlen[i]; j++) { 593*d4002b98SHong Zhang if (a->colidx[shift+j*8] == i) { 594*d4002b98SHong Zhang a->diag[i] = shift+j*8; 595*d4002b98SHong Zhang break; 596*d4002b98SHong Zhang } 597*d4002b98SHong Zhang } 598*d4002b98SHong Zhang } 599*d4002b98SHong Zhang PetscFunctionReturn(0); 600*d4002b98SHong Zhang } 601*d4002b98SHong Zhang 602*d4002b98SHong Zhang /* 603*d4002b98SHong Zhang Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways 604*d4002b98SHong Zhang */ 605*d4002b98SHong Zhang PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift) 606*d4002b98SHong Zhang { 607*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*) A->data; 608*d4002b98SHong Zhang PetscInt i,*diag,m = A->rmap->n; 609*d4002b98SHong Zhang MatScalar *val = a->val; 610*d4002b98SHong Zhang PetscScalar *idiag,*mdiag; 611*d4002b98SHong Zhang PetscErrorCode ierr; 612*d4002b98SHong Zhang 613*d4002b98SHong Zhang PetscFunctionBegin; 614*d4002b98SHong Zhang if (a->idiagvalid) PetscFunctionReturn(0); 615*d4002b98SHong Zhang ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 616*d4002b98SHong Zhang diag = a->diag; 617*d4002b98SHong Zhang if (!a->idiag) { 618*d4002b98SHong Zhang ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr); 619*d4002b98SHong Zhang ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr); 620*d4002b98SHong Zhang val = a->val; 621*d4002b98SHong Zhang } 622*d4002b98SHong Zhang mdiag = a->mdiag; 623*d4002b98SHong Zhang idiag = a->idiag; 624*d4002b98SHong Zhang 625*d4002b98SHong Zhang if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) { 626*d4002b98SHong Zhang for (i=0; i<m; i++) { 627*d4002b98SHong Zhang mdiag[i] = val[diag[i]]; 628*d4002b98SHong Zhang if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */ 629*d4002b98SHong Zhang if (PetscRealPart(fshift)) { 630*d4002b98SHong Zhang ierr = PetscInfo1(A,"Zero diagonal on row %D\n",i);CHKERRQ(ierr); 631*d4002b98SHong Zhang A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 632*d4002b98SHong Zhang A->factorerror_zeropivot_value = 0.0; 633*d4002b98SHong Zhang A->factorerror_zeropivot_row = i; 634*d4002b98SHong Zhang } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i); 635*d4002b98SHong Zhang } 636*d4002b98SHong Zhang idiag[i] = 1.0/val[diag[i]]; 637*d4002b98SHong Zhang } 638*d4002b98SHong Zhang ierr = PetscLogFlops(m);CHKERRQ(ierr); 639*d4002b98SHong Zhang } else { 640*d4002b98SHong Zhang for (i=0; i<m; i++) { 641*d4002b98SHong Zhang mdiag[i] = val[diag[i]]; 642*d4002b98SHong Zhang idiag[i] = omega/(fshift + val[diag[i]]); 643*d4002b98SHong Zhang } 644*d4002b98SHong Zhang ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr); 645*d4002b98SHong Zhang } 646*d4002b98SHong Zhang a->idiagvalid = PETSC_TRUE; 647*d4002b98SHong Zhang PetscFunctionReturn(0); 648*d4002b98SHong Zhang } 649*d4002b98SHong Zhang 650*d4002b98SHong Zhang PetscErrorCode MatZeroEntries_SeqSELL(Mat A) 651*d4002b98SHong Zhang { 652*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 653*d4002b98SHong Zhang PetscErrorCode ierr; 654*d4002b98SHong Zhang 655*d4002b98SHong Zhang PetscFunctionBegin; 656*d4002b98SHong Zhang ierr = PetscMemzero(a->val,(a->sliidx[a->totalslices])*sizeof(PetscScalar));CHKERRQ(ierr); 657*d4002b98SHong Zhang ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 658*d4002b98SHong Zhang PetscFunctionReturn(0); 659*d4002b98SHong Zhang } 660*d4002b98SHong Zhang 661*d4002b98SHong Zhang PetscErrorCode MatDestroy_SeqSELL(Mat A) 662*d4002b98SHong Zhang { 663*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 664*d4002b98SHong Zhang PetscErrorCode ierr; 665*d4002b98SHong Zhang 666*d4002b98SHong Zhang PetscFunctionBegin; 667*d4002b98SHong Zhang #if defined(PETSC_USE_LOG) 668*d4002b98SHong Zhang PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz); 669*d4002b98SHong Zhang #endif 670*d4002b98SHong Zhang ierr = MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);CHKERRQ(ierr); 671*d4002b98SHong Zhang ierr = ISDestroy(&a->row);CHKERRQ(ierr); 672*d4002b98SHong Zhang ierr = ISDestroy(&a->col);CHKERRQ(ierr); 673*d4002b98SHong Zhang ierr = PetscFree(a->diag);CHKERRQ(ierr); 674*d4002b98SHong Zhang ierr = PetscFree(a->ibdiag);CHKERRQ(ierr); 675*d4002b98SHong Zhang ierr = PetscFree(a->rlen);CHKERRQ(ierr); 676*d4002b98SHong Zhang ierr = PetscFree(a->sliidx);CHKERRQ(ierr); 677*d4002b98SHong Zhang ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr); 678*d4002b98SHong Zhang ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 679*d4002b98SHong Zhang ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 680*d4002b98SHong Zhang ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 681*d4002b98SHong Zhang 682*d4002b98SHong Zhang ierr = PetscFree(A->data);CHKERRQ(ierr); 683*d4002b98SHong Zhang 684*d4002b98SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 685*d4002b98SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 686*d4002b98SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 687*d4002b98SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 688*d4002b98SHong Zhang #endif 689*d4002b98SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);CHKERRQ(ierr); 690*d4002b98SHong Zhang PetscFunctionReturn(0); 691*d4002b98SHong Zhang } 692*d4002b98SHong Zhang 693*d4002b98SHong Zhang PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg) 694*d4002b98SHong Zhang { 695*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 696*d4002b98SHong Zhang PetscErrorCode ierr; 697*d4002b98SHong Zhang 698*d4002b98SHong Zhang PetscFunctionBegin; 699*d4002b98SHong Zhang switch (op) { 700*d4002b98SHong Zhang case MAT_ROW_ORIENTED: 701*d4002b98SHong Zhang a->roworiented = flg; 702*d4002b98SHong Zhang break; 703*d4002b98SHong Zhang case MAT_KEEP_NONZERO_PATTERN: 704*d4002b98SHong Zhang a->keepnonzeropattern = flg; 705*d4002b98SHong Zhang break; 706*d4002b98SHong Zhang case MAT_NEW_NONZERO_LOCATIONS: 707*d4002b98SHong Zhang a->nonew = (flg ? 0 : 1); 708*d4002b98SHong Zhang break; 709*d4002b98SHong Zhang case MAT_NEW_NONZERO_LOCATION_ERR: 710*d4002b98SHong Zhang a->nonew = (flg ? -1 : 0); 711*d4002b98SHong Zhang break; 712*d4002b98SHong Zhang case MAT_NEW_NONZERO_ALLOCATION_ERR: 713*d4002b98SHong Zhang a->nonew = (flg ? -2 : 0); 714*d4002b98SHong Zhang break; 715*d4002b98SHong Zhang case MAT_UNUSED_NONZERO_LOCATION_ERR: 716*d4002b98SHong Zhang a->nounused = (flg ? -1 : 0); 717*d4002b98SHong Zhang break; 718*d4002b98SHong Zhang case MAT_NEW_DIAGONALS: 719*d4002b98SHong Zhang case MAT_IGNORE_OFF_PROC_ENTRIES: 720*d4002b98SHong Zhang case MAT_USE_HASH_TABLE: 721*d4002b98SHong Zhang ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 722*d4002b98SHong Zhang break; 723*d4002b98SHong Zhang case MAT_SPD: 724*d4002b98SHong Zhang case MAT_SYMMETRIC: 725*d4002b98SHong Zhang case MAT_STRUCTURALLY_SYMMETRIC: 726*d4002b98SHong Zhang case MAT_HERMITIAN: 727*d4002b98SHong Zhang case MAT_SYMMETRY_ETERNAL: 728*d4002b98SHong Zhang /* These options are handled directly by MatSetOption() */ 729*d4002b98SHong Zhang break; 730*d4002b98SHong Zhang default: 731*d4002b98SHong Zhang SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 732*d4002b98SHong Zhang } 733*d4002b98SHong Zhang PetscFunctionReturn(0); 734*d4002b98SHong Zhang } 735*d4002b98SHong Zhang 736*d4002b98SHong Zhang PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v) 737*d4002b98SHong Zhang { 738*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 739*d4002b98SHong Zhang PetscInt i,j,n,shift; 740*d4002b98SHong Zhang PetscScalar *x,zero=0.0; 741*d4002b98SHong Zhang PetscErrorCode ierr; 742*d4002b98SHong Zhang 743*d4002b98SHong Zhang PetscFunctionBegin; 744*d4002b98SHong Zhang ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 745*d4002b98SHong Zhang if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 746*d4002b98SHong Zhang 747*d4002b98SHong Zhang if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) { 748*d4002b98SHong Zhang PetscInt *diag=a->diag; 749*d4002b98SHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 750*d4002b98SHong Zhang for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]]; 751*d4002b98SHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 752*d4002b98SHong Zhang PetscFunctionReturn(0); 753*d4002b98SHong Zhang } 754*d4002b98SHong Zhang 755*d4002b98SHong Zhang ierr = VecSet(v,zero);CHKERRQ(ierr); 756*d4002b98SHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 757*d4002b98SHong Zhang for (i=0; i<n; i++) { /* loop over rows */ 758*d4002b98SHong Zhang shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 759*d4002b98SHong Zhang x[i] = 0; 760*d4002b98SHong Zhang for (j=0; j<a->rlen[i]; j++) { 761*d4002b98SHong Zhang if (a->colidx[shift+j*8] == i) { 762*d4002b98SHong Zhang x[i] = a->val[shift+j*8]; 763*d4002b98SHong Zhang break; 764*d4002b98SHong Zhang } 765*d4002b98SHong Zhang } 766*d4002b98SHong Zhang } 767*d4002b98SHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 768*d4002b98SHong Zhang PetscFunctionReturn(0); 769*d4002b98SHong Zhang } 770*d4002b98SHong Zhang 771*d4002b98SHong Zhang PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr) 772*d4002b98SHong Zhang { 773*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 774*d4002b98SHong Zhang const PetscScalar *l,*r; 775*d4002b98SHong Zhang PetscInt i,j,m,n,row; 776*d4002b98SHong Zhang PetscErrorCode ierr; 777*d4002b98SHong Zhang 778*d4002b98SHong Zhang PetscFunctionBegin; 779*d4002b98SHong Zhang if (ll) { 780*d4002b98SHong Zhang /* The local size is used so that VecMPI can be passed to this routine 781*d4002b98SHong Zhang by MatDiagonalScale_MPISELL */ 782*d4002b98SHong Zhang ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr); 783*d4002b98SHong Zhang if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 784*d4002b98SHong Zhang ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 785*d4002b98SHong Zhang for (i=0; i<a->totalslices; i++) { /* loop over slices */ 786*d4002b98SHong Zhang for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 787*d4002b98SHong Zhang a->val[j] *= l[8*i+row]; 788*d4002b98SHong Zhang } 789*d4002b98SHong Zhang } 790*d4002b98SHong Zhang ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 791*d4002b98SHong Zhang ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 792*d4002b98SHong Zhang } 793*d4002b98SHong Zhang if (rr) { 794*d4002b98SHong Zhang ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr); 795*d4002b98SHong Zhang if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 796*d4002b98SHong Zhang ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr); 797*d4002b98SHong Zhang for (i=0; i<a->totalslices; i++) { /* loop over slices */ 798*d4002b98SHong Zhang for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) { 799*d4002b98SHong Zhang a->val[j] *= r[a->colidx[j]]; 800*d4002b98SHong Zhang } 801*d4002b98SHong Zhang } 802*d4002b98SHong Zhang ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr); 803*d4002b98SHong Zhang ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 804*d4002b98SHong Zhang } 805*d4002b98SHong Zhang ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 806*d4002b98SHong Zhang PetscFunctionReturn(0); 807*d4002b98SHong Zhang } 808*d4002b98SHong Zhang 809*d4002b98SHong Zhang extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 810*d4002b98SHong Zhang 811*d4002b98SHong Zhang PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 812*d4002b98SHong Zhang { 813*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 814*d4002b98SHong Zhang PetscInt *cp,i,k,low,high,t,row,col,l; 815*d4002b98SHong Zhang PetscInt shift; 816*d4002b98SHong Zhang MatScalar *vp; 817*d4002b98SHong Zhang 818*d4002b98SHong Zhang PetscFunctionBegin; 819*d4002b98SHong Zhang for (k=0; k<m; k++) { /* loop over added rows */ 820*d4002b98SHong Zhang row = im[k]; 821*d4002b98SHong Zhang if (row<0) continue; 822*d4002b98SHong Zhang #if defined(PETSC_USE_DEBUG) 823*d4002b98SHong Zhang if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 824*d4002b98SHong Zhang #endif 825*d4002b98SHong Zhang shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 826*d4002b98SHong Zhang cp = a->colidx+shift; /* pointer to the row */ 827*d4002b98SHong Zhang vp = a->val+shift; /* pointer to the row */ 828*d4002b98SHong Zhang for (l=0; l<n; l++) { /* loop over added rows */ 829*d4002b98SHong Zhang col = in[l]; 830*d4002b98SHong Zhang if (col<0) continue; 831*d4002b98SHong Zhang #if defined(PETSC_USE_DEBUG) 832*d4002b98SHong Zhang if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1); 833*d4002b98SHong Zhang #endif 834*d4002b98SHong Zhang high = a->rlen[row]; low = 0; /* assume unsorted */ 835*d4002b98SHong Zhang while (high-low > 5) { 836*d4002b98SHong Zhang t = (low+high)/2; 837*d4002b98SHong Zhang if (*(cp+t*8) > col) high = t; 838*d4002b98SHong Zhang else low = t; 839*d4002b98SHong Zhang } 840*d4002b98SHong Zhang for (i=low; i<high; i++) { 841*d4002b98SHong Zhang if (*(cp+8*i) > col) break; 842*d4002b98SHong Zhang if (*(cp+8*i) == col) { 843*d4002b98SHong Zhang *v++ = *(vp+8*i); 844*d4002b98SHong Zhang goto finished; 845*d4002b98SHong Zhang } 846*d4002b98SHong Zhang } 847*d4002b98SHong Zhang *v++ = 0.0; 848*d4002b98SHong Zhang finished:; 849*d4002b98SHong Zhang } 850*d4002b98SHong Zhang } 851*d4002b98SHong Zhang PetscFunctionReturn(0); 852*d4002b98SHong Zhang } 853*d4002b98SHong Zhang 854*d4002b98SHong Zhang PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer) 855*d4002b98SHong Zhang { 856*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 857*d4002b98SHong Zhang PetscInt i,j,m=A->rmap->n,shift; 858*d4002b98SHong Zhang const char *name; 859*d4002b98SHong Zhang PetscViewerFormat format; 860*d4002b98SHong Zhang PetscErrorCode ierr; 861*d4002b98SHong Zhang 862*d4002b98SHong Zhang PetscFunctionBegin; 863*d4002b98SHong Zhang ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 864*d4002b98SHong Zhang if (format == PETSC_VIEWER_ASCII_MATLAB) { 865*d4002b98SHong Zhang PetscInt nofinalvalue = 0; 866*d4002b98SHong Zhang /* 867*d4002b98SHong Zhang if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) { 868*d4002b98SHong Zhang nofinalvalue = 1; 869*d4002b98SHong Zhang } 870*d4002b98SHong Zhang */ 871*d4002b98SHong Zhang ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 872*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);CHKERRQ(ierr); 873*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);CHKERRQ(ierr); 874*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX) 875*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 876*d4002b98SHong Zhang #else 877*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 878*d4002b98SHong Zhang #endif 879*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr); 880*d4002b98SHong Zhang 881*d4002b98SHong Zhang for (i=0; i<m; i++) { 882*d4002b98SHong Zhang shift = a->sliidx[i>>3]+(i&0x07); 883*d4002b98SHong Zhang for (j=0; j<a->rlen[i]; j++) { 884*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX) 885*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 886*d4002b98SHong Zhang #else 887*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);CHKERRQ(ierr); 888*d4002b98SHong Zhang #endif 889*d4002b98SHong Zhang } 890*d4002b98SHong Zhang } 891*d4002b98SHong Zhang /* 892*d4002b98SHong Zhang if (nofinalvalue) { 893*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX) 894*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr); 895*d4002b98SHong Zhang #else 896*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr); 897*d4002b98SHong Zhang #endif 898*d4002b98SHong Zhang } 899*d4002b98SHong Zhang */ 900*d4002b98SHong Zhang ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 901*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr); 902*d4002b98SHong Zhang ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 903*d4002b98SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 904*d4002b98SHong Zhang PetscFunctionReturn(0); 905*d4002b98SHong Zhang } else if (format == PETSC_VIEWER_ASCII_COMMON) { 906*d4002b98SHong Zhang ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 907*d4002b98SHong Zhang for (i=0; i<m; i++) { 908*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 909*d4002b98SHong Zhang shift = a->sliidx[i>>3]+(i&0x07); 910*d4002b98SHong Zhang for (j=0; j<a->rlen[i]; j++) { 911*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX) 912*d4002b98SHong Zhang if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 913*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 914*d4002b98SHong Zhang } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 915*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 916*d4002b98SHong Zhang } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) { 917*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 918*d4002b98SHong Zhang } 919*d4002b98SHong Zhang #else 920*d4002b98SHong Zhang if (a->val[shift+8*j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);} 921*d4002b98SHong Zhang #endif 922*d4002b98SHong Zhang } 923*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 924*d4002b98SHong Zhang } 925*d4002b98SHong Zhang ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 926*d4002b98SHong Zhang } else if (format == PETSC_VIEWER_ASCII_DENSE) { 927*d4002b98SHong Zhang PetscInt cnt=0,jcnt; 928*d4002b98SHong Zhang PetscScalar value; 929*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX) 930*d4002b98SHong Zhang PetscBool realonly=PETSC_TRUE; 931*d4002b98SHong Zhang for (i=0; i<a->sliidx[a->totalslices]; i++) { 932*d4002b98SHong Zhang if (PetscImaginaryPart(a->val[i]) != 0.0) { 933*d4002b98SHong Zhang realonly = PETSC_FALSE; 934*d4002b98SHong Zhang break; 935*d4002b98SHong Zhang } 936*d4002b98SHong Zhang } 937*d4002b98SHong Zhang #endif 938*d4002b98SHong Zhang 939*d4002b98SHong Zhang ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 940*d4002b98SHong Zhang for (i=0; i<m; i++) { 941*d4002b98SHong Zhang jcnt = 0; 942*d4002b98SHong Zhang shift = a->sliidx[i>>3]+(i&0x07); 943*d4002b98SHong Zhang for (j=0; j<A->cmap->n; j++) { 944*d4002b98SHong Zhang if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) { 945*d4002b98SHong Zhang value = a->val[cnt++]; 946*d4002b98SHong Zhang jcnt++; 947*d4002b98SHong Zhang } else { 948*d4002b98SHong Zhang value = 0.0; 949*d4002b98SHong Zhang } 950*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX) 951*d4002b98SHong Zhang if (realonly) { 952*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr); 953*d4002b98SHong Zhang } else { 954*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr); 955*d4002b98SHong Zhang } 956*d4002b98SHong Zhang #else 957*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr); 958*d4002b98SHong Zhang #endif 959*d4002b98SHong Zhang } 960*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 961*d4002b98SHong Zhang } 962*d4002b98SHong Zhang ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 963*d4002b98SHong Zhang } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) { 964*d4002b98SHong Zhang PetscInt fshift=1; 965*d4002b98SHong Zhang ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 966*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX) 967*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr); 968*d4002b98SHong Zhang #else 969*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr); 970*d4002b98SHong Zhang #endif 971*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);CHKERRQ(ierr); 972*d4002b98SHong Zhang for (i=0; i<m; i++) { 973*d4002b98SHong Zhang shift = a->sliidx[i>>3]+(i&0x07); 974*d4002b98SHong Zhang for (j=0; j<a->rlen[i]; j++) { 975*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX) 976*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 977*d4002b98SHong Zhang #else 978*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);CHKERRQ(ierr); 979*d4002b98SHong Zhang #endif 980*d4002b98SHong Zhang } 981*d4002b98SHong Zhang } 982*d4002b98SHong Zhang ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 983*d4002b98SHong Zhang } else { 984*d4002b98SHong Zhang ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 985*d4002b98SHong Zhang if (A->factortype) { 986*d4002b98SHong Zhang for (i=0; i<m; i++) { 987*d4002b98SHong Zhang shift = a->sliidx[i>>3]+(i&0x07); 988*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 989*d4002b98SHong Zhang /* L part */ 990*d4002b98SHong Zhang for (j=shift; j<a->diag[i]; j+=8) { 991*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX) 992*d4002b98SHong Zhang if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) { 993*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 994*d4002b98SHong Zhang } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) { 995*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 996*d4002b98SHong Zhang } else { 997*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 998*d4002b98SHong Zhang } 999*d4002b98SHong Zhang #else 1000*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1001*d4002b98SHong Zhang #endif 1002*d4002b98SHong Zhang } 1003*d4002b98SHong Zhang /* diagonal */ 1004*d4002b98SHong Zhang j = a->diag[i]; 1005*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX) 1006*d4002b98SHong Zhang if (PetscImaginaryPart(a->val[j]) > 0.0) { 1007*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));CHKERRQ(ierr); 1008*d4002b98SHong Zhang } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1009*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));CHKERRQ(ierr); 1010*d4002b98SHong Zhang } else { 1011*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));CHKERRQ(ierr); 1012*d4002b98SHong Zhang } 1013*d4002b98SHong Zhang #else 1014*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));CHKERRQ(ierr); 1015*d4002b98SHong Zhang #endif 1016*d4002b98SHong Zhang 1017*d4002b98SHong Zhang /* U part */ 1018*d4002b98SHong Zhang for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) { 1019*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX) 1020*d4002b98SHong Zhang if (PetscImaginaryPart(a->val[j]) > 0.0) { 1021*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1022*d4002b98SHong Zhang } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1023*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1024*d4002b98SHong Zhang } else { 1025*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1026*d4002b98SHong Zhang } 1027*d4002b98SHong Zhang #else 1028*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1029*d4002b98SHong Zhang #endif 1030*d4002b98SHong Zhang } 1031*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1032*d4002b98SHong Zhang } 1033*d4002b98SHong Zhang } else { 1034*d4002b98SHong Zhang for (i=0; i<m; i++) { 1035*d4002b98SHong Zhang shift = a->sliidx[i>>3]+(i&0x07); 1036*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1037*d4002b98SHong Zhang for (j=0; j<a->rlen[i]; j++) { 1038*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX) 1039*d4002b98SHong Zhang if (PetscImaginaryPart(a->val[j]) > 0.0) { 1040*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1041*d4002b98SHong Zhang } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1042*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1043*d4002b98SHong Zhang } else { 1044*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1045*d4002b98SHong Zhang } 1046*d4002b98SHong Zhang #else 1047*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr); 1048*d4002b98SHong Zhang #endif 1049*d4002b98SHong Zhang } 1050*d4002b98SHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1051*d4002b98SHong Zhang } 1052*d4002b98SHong Zhang } 1053*d4002b98SHong Zhang ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1054*d4002b98SHong Zhang } 1055*d4002b98SHong Zhang ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1056*d4002b98SHong Zhang PetscFunctionReturn(0); 1057*d4002b98SHong Zhang } 1058*d4002b98SHong Zhang 1059*d4002b98SHong Zhang #include <petscdraw.h> 1060*d4002b98SHong Zhang PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa) 1061*d4002b98SHong Zhang { 1062*d4002b98SHong Zhang Mat A=(Mat)Aa; 1063*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1064*d4002b98SHong Zhang PetscInt i,j,m=A->rmap->n,shift; 1065*d4002b98SHong Zhang int color; 1066*d4002b98SHong Zhang PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1067*d4002b98SHong Zhang PetscViewer viewer; 1068*d4002b98SHong Zhang PetscViewerFormat format; 1069*d4002b98SHong Zhang PetscErrorCode ierr; 1070*d4002b98SHong Zhang 1071*d4002b98SHong Zhang PetscFunctionBegin; 1072*d4002b98SHong Zhang ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1073*d4002b98SHong Zhang ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1074*d4002b98SHong Zhang ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1075*d4002b98SHong Zhang 1076*d4002b98SHong Zhang /* loop over matrix elements drawing boxes */ 1077*d4002b98SHong Zhang 1078*d4002b98SHong Zhang if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1079*d4002b98SHong Zhang ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1080*d4002b98SHong Zhang /* Blue for negative, Cyan for zero and Red for positive */ 1081*d4002b98SHong Zhang color = PETSC_DRAW_BLUE; 1082*d4002b98SHong Zhang for (i=0; i<m; i++) { 1083*d4002b98SHong Zhang shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1084*d4002b98SHong Zhang y_l = m - i - 1.0; y_r = y_l + 1.0; 1085*d4002b98SHong Zhang for (j=0; j<a->rlen[i]; j++) { 1086*d4002b98SHong Zhang x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1087*d4002b98SHong Zhang if (PetscRealPart(a->val[shift+8*j]) >= 0.) continue; 1088*d4002b98SHong Zhang ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1089*d4002b98SHong Zhang } 1090*d4002b98SHong Zhang } 1091*d4002b98SHong Zhang color = PETSC_DRAW_CYAN; 1092*d4002b98SHong Zhang for (i=0; i<m; i++) { 1093*d4002b98SHong Zhang shift = a->sliidx[i>>3]+(i&0x07); 1094*d4002b98SHong Zhang y_l = m - i - 1.0; y_r = y_l + 1.0; 1095*d4002b98SHong Zhang for (j=0; j<a->rlen[i]; j++) { 1096*d4002b98SHong Zhang x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1097*d4002b98SHong Zhang if (a->val[shift+8*j] != 0.) continue; 1098*d4002b98SHong Zhang ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1099*d4002b98SHong Zhang } 1100*d4002b98SHong Zhang } 1101*d4002b98SHong Zhang color = PETSC_DRAW_RED; 1102*d4002b98SHong Zhang for (i=0; i<m; i++) { 1103*d4002b98SHong Zhang shift = a->sliidx[i>>3]+(i&0x07); 1104*d4002b98SHong Zhang y_l = m - i - 1.0; y_r = y_l + 1.0; 1105*d4002b98SHong Zhang for (j=0; j<a->rlen[i]; j++) { 1106*d4002b98SHong Zhang x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1107*d4002b98SHong Zhang if (PetscRealPart(a->val[shift+8*j]) <= 0.) continue; 1108*d4002b98SHong Zhang ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1109*d4002b98SHong Zhang } 1110*d4002b98SHong Zhang } 1111*d4002b98SHong Zhang ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1112*d4002b98SHong Zhang } else { 1113*d4002b98SHong Zhang /* use contour shading to indicate magnitude of values */ 1114*d4002b98SHong Zhang /* first determine max of all nonzero values */ 1115*d4002b98SHong Zhang PetscReal minv=0.0,maxv=0.0; 1116*d4002b98SHong Zhang PetscInt count=0; 1117*d4002b98SHong Zhang PetscDraw popup; 1118*d4002b98SHong Zhang for (i=0; i<a->sliidx[a->totalslices]; i++) { 1119*d4002b98SHong Zhang if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]); 1120*d4002b98SHong Zhang } 1121*d4002b98SHong Zhang if (minv >= maxv) maxv = minv + PETSC_SMALL; 1122*d4002b98SHong Zhang ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1123*d4002b98SHong Zhang ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr); 1124*d4002b98SHong Zhang 1125*d4002b98SHong Zhang ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1126*d4002b98SHong Zhang for (i=0; i<m; i++) { 1127*d4002b98SHong Zhang shift = a->sliidx[i>>3]+(i&0x07); 1128*d4002b98SHong Zhang y_l = m - i - 1.0; 1129*d4002b98SHong Zhang y_r = y_l + 1.0; 1130*d4002b98SHong Zhang for (j=0; j<a->rlen[i]; j++) { 1131*d4002b98SHong Zhang x_l = a->colidx[shift+j*8]; 1132*d4002b98SHong Zhang x_r = x_l + 1.0; 1133*d4002b98SHong Zhang color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv); 1134*d4002b98SHong Zhang ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1135*d4002b98SHong Zhang count++; 1136*d4002b98SHong Zhang } 1137*d4002b98SHong Zhang } 1138*d4002b98SHong Zhang ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1139*d4002b98SHong Zhang } 1140*d4002b98SHong Zhang PetscFunctionReturn(0); 1141*d4002b98SHong Zhang } 1142*d4002b98SHong Zhang 1143*d4002b98SHong Zhang #include <petscdraw.h> 1144*d4002b98SHong Zhang PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer) 1145*d4002b98SHong Zhang { 1146*d4002b98SHong Zhang PetscDraw draw; 1147*d4002b98SHong Zhang PetscReal xr,yr,xl,yl,h,w; 1148*d4002b98SHong Zhang PetscBool isnull; 1149*d4002b98SHong Zhang PetscErrorCode ierr; 1150*d4002b98SHong Zhang 1151*d4002b98SHong Zhang PetscFunctionBegin; 1152*d4002b98SHong Zhang ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1153*d4002b98SHong Zhang ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1154*d4002b98SHong Zhang if (isnull) PetscFunctionReturn(0); 1155*d4002b98SHong Zhang 1156*d4002b98SHong Zhang xr = A->cmap->n; yr = A->rmap->n; h = yr/10.0; w = xr/10.0; 1157*d4002b98SHong Zhang xr += w; yr += h; xl = -w; yl = -h; 1158*d4002b98SHong Zhang ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1159*d4002b98SHong Zhang ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1160*d4002b98SHong Zhang ierr = PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);CHKERRQ(ierr); 1161*d4002b98SHong Zhang ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1162*d4002b98SHong Zhang ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1163*d4002b98SHong Zhang PetscFunctionReturn(0); 1164*d4002b98SHong Zhang } 1165*d4002b98SHong Zhang 1166*d4002b98SHong Zhang PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer) 1167*d4002b98SHong Zhang { 1168*d4002b98SHong Zhang PetscBool iascii,isbinary,isdraw; 1169*d4002b98SHong Zhang PetscErrorCode ierr; 1170*d4002b98SHong Zhang 1171*d4002b98SHong Zhang PetscFunctionBegin; 1172*d4002b98SHong Zhang ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1173*d4002b98SHong Zhang ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1174*d4002b98SHong Zhang ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1175*d4002b98SHong Zhang if (iascii) { 1176*d4002b98SHong Zhang ierr = MatView_SeqSELL_ASCII(A,viewer);CHKERRQ(ierr); 1177*d4002b98SHong Zhang } else if (isbinary) { 1178*d4002b98SHong Zhang /* ierr = MatView_SeqSELL_Binary(A,viewer);CHKERRQ(ierr); */ 1179*d4002b98SHong Zhang } else if (isdraw) { 1180*d4002b98SHong Zhang ierr = MatView_SeqSELL_Draw(A,viewer);CHKERRQ(ierr); 1181*d4002b98SHong Zhang } 1182*d4002b98SHong Zhang PetscFunctionReturn(0); 1183*d4002b98SHong Zhang } 1184*d4002b98SHong Zhang 1185*d4002b98SHong Zhang PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode) 1186*d4002b98SHong Zhang { 1187*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1188*d4002b98SHong Zhang PetscInt i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k; 1189*d4002b98SHong Zhang MatScalar *vp; 1190*d4002b98SHong Zhang PetscErrorCode ierr; 1191*d4002b98SHong Zhang 1192*d4002b98SHong Zhang PetscFunctionBegin; 1193*d4002b98SHong Zhang if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1194*d4002b98SHong Zhang /* To do: compress out the unused elements */ 1195*d4002b98SHong Zhang ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 1196*d4002b98SHong Zhang ierr = PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);CHKERRQ(ierr); 1197*d4002b98SHong Zhang ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr); 1198*d4002b98SHong Zhang ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);CHKERRQ(ierr); 1199*d4002b98SHong Zhang /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */ 1200*d4002b98SHong Zhang for (i=0; i<a->totalslices; ++i) { 1201*d4002b98SHong Zhang shift = a->sliidx[i]; /* starting index of the slice */ 1202*d4002b98SHong Zhang cp = a->colidx+shift; /* pointer to the column indices of the slice */ 1203*d4002b98SHong Zhang vp = a->val+shift; /* pointer to the nonzero values of the slice */ 1204*d4002b98SHong Zhang for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */ 1205*d4002b98SHong Zhang row = 8*i + row_in_slice; 1206*d4002b98SHong Zhang nrow = a->rlen[row]; /* number of nonzeros in row */ 1207*d4002b98SHong Zhang /* 1208*d4002b98SHong Zhang Search for the nearest nonzero. Normally setting the index to zero may cause extra communication. 1209*d4002b98SHong Zhang But if the entire slice are empty, it is fine to use 0 since the index will not be loaded. 1210*d4002b98SHong Zhang */ 1211*d4002b98SHong Zhang lastcol = 0; 1212*d4002b98SHong Zhang if (nrow>0) { /* nonempty row */ 1213*d4002b98SHong Zhang lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */ 1214*d4002b98SHong Zhang } else if (!row_in_slice) { /* first row of the currect slice is empty */ 1215*d4002b98SHong Zhang for (j=1;j<8;j++) { 1216*d4002b98SHong Zhang if (a->rlen[8*i+j]) { 1217*d4002b98SHong Zhang lastcol = cp[j]; 1218*d4002b98SHong Zhang break; 1219*d4002b98SHong Zhang } 1220*d4002b98SHong Zhang } 1221*d4002b98SHong Zhang } else { 1222*d4002b98SHong Zhang if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */ 1223*d4002b98SHong Zhang } 1224*d4002b98SHong Zhang 1225*d4002b98SHong Zhang for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) { 1226*d4002b98SHong Zhang cp[8*k+row_in_slice] = lastcol; 1227*d4002b98SHong Zhang vp[8*k+row_in_slice] = (MatScalar)0; 1228*d4002b98SHong Zhang } 1229*d4002b98SHong Zhang } 1230*d4002b98SHong Zhang } 1231*d4002b98SHong Zhang 1232*d4002b98SHong Zhang A->info.mallocs += a->reallocs; 1233*d4002b98SHong Zhang a->reallocs = 0; 1234*d4002b98SHong Zhang 1235*d4002b98SHong Zhang ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1236*d4002b98SHong Zhang PetscFunctionReturn(0); 1237*d4002b98SHong Zhang } 1238*d4002b98SHong Zhang 1239*d4002b98SHong Zhang PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info) 1240*d4002b98SHong Zhang { 1241*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1242*d4002b98SHong Zhang 1243*d4002b98SHong Zhang PetscFunctionBegin; 1244*d4002b98SHong Zhang info->block_size = 1.0; 1245*d4002b98SHong Zhang info->nz_allocated = (double)a->maxallocmat; 1246*d4002b98SHong Zhang info->nz_used = (double)a->sliidx[a->totalslices]; /* include padding zeros */ 1247*d4002b98SHong Zhang info->nz_unneeded = (double)(a->maxallocmat-a->sliidx[a->totalslices]); 1248*d4002b98SHong Zhang info->assemblies = (double)A->num_ass; 1249*d4002b98SHong Zhang info->mallocs = (double)A->info.mallocs; 1250*d4002b98SHong Zhang info->memory = ((PetscObject)A)->mem; 1251*d4002b98SHong Zhang if (A->factortype) { 1252*d4002b98SHong Zhang info->fill_ratio_given = A->info.fill_ratio_given; 1253*d4002b98SHong Zhang info->fill_ratio_needed = A->info.fill_ratio_needed; 1254*d4002b98SHong Zhang info->factor_mallocs = A->info.factor_mallocs; 1255*d4002b98SHong Zhang } else { 1256*d4002b98SHong Zhang info->fill_ratio_given = 0; 1257*d4002b98SHong Zhang info->fill_ratio_needed = 0; 1258*d4002b98SHong Zhang info->factor_mallocs = 0; 1259*d4002b98SHong Zhang } 1260*d4002b98SHong Zhang PetscFunctionReturn(0); 1261*d4002b98SHong Zhang } 1262*d4002b98SHong Zhang 1263*d4002b98SHong Zhang PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1264*d4002b98SHong Zhang { 1265*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1266*d4002b98SHong Zhang PetscInt shift,i,k,l,low,high,t,ii,row,col,nrow; 1267*d4002b98SHong Zhang PetscInt *cp,nonew=a->nonew,lastcol=-1; 1268*d4002b98SHong Zhang MatScalar *vp,value; 1269*d4002b98SHong Zhang PetscErrorCode ierr; 1270*d4002b98SHong Zhang 1271*d4002b98SHong Zhang PetscFunctionBegin; 1272*d4002b98SHong Zhang for (k=0; k<m; k++) { /* loop over added rows */ 1273*d4002b98SHong Zhang row = im[k]; 1274*d4002b98SHong Zhang if (row < 0) continue; 1275*d4002b98SHong Zhang #if defined(PETSC_USE_DEBUG) 1276*d4002b98SHong Zhang if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 1277*d4002b98SHong Zhang #endif 1278*d4002b98SHong Zhang shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1279*d4002b98SHong Zhang cp = a->colidx+shift; /* pointer to the row */ 1280*d4002b98SHong Zhang vp = a->val+shift; /* pointer to the row */ 1281*d4002b98SHong Zhang nrow = a->rlen[row]; 1282*d4002b98SHong Zhang low = 0; 1283*d4002b98SHong Zhang high = nrow; 1284*d4002b98SHong Zhang 1285*d4002b98SHong Zhang for (l=0; l<n; l++) { /* loop over added columns */ 1286*d4002b98SHong Zhang col = in[l]; 1287*d4002b98SHong Zhang if (col<0) continue; 1288*d4002b98SHong Zhang #if defined(PETSC_USE_DEBUG) 1289*d4002b98SHong Zhang if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1); 1290*d4002b98SHong Zhang #endif 1291*d4002b98SHong Zhang if (a->roworiented) { 1292*d4002b98SHong Zhang value = v[l+k*n]; 1293*d4002b98SHong Zhang } else { 1294*d4002b98SHong Zhang value = v[k+l*m]; 1295*d4002b98SHong Zhang } 1296*d4002b98SHong Zhang if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue; 1297*d4002b98SHong Zhang 1298*d4002b98SHong Zhang /* search in this row for the specified colmun, i indicates the column to be set */ 1299*d4002b98SHong Zhang if (col <= lastcol) low = 0; 1300*d4002b98SHong Zhang else high = nrow; 1301*d4002b98SHong Zhang lastcol = col; 1302*d4002b98SHong Zhang while (high-low > 5) { 1303*d4002b98SHong Zhang t = (low+high)/2; 1304*d4002b98SHong Zhang if (*(cp+t*8) > col) high = t; 1305*d4002b98SHong Zhang else low = t; 1306*d4002b98SHong Zhang } 1307*d4002b98SHong Zhang for (i=low; i<high; i++) { 1308*d4002b98SHong Zhang if (*(cp+i*8) > col) break; 1309*d4002b98SHong Zhang if (*(cp+i*8) == col) { 1310*d4002b98SHong Zhang if (is == ADD_VALUES) *(vp+i*8) += value; 1311*d4002b98SHong Zhang else *(vp+i*8) = value; 1312*d4002b98SHong Zhang low = i + 1; 1313*d4002b98SHong Zhang goto noinsert; 1314*d4002b98SHong Zhang } 1315*d4002b98SHong Zhang } 1316*d4002b98SHong Zhang if (value == 0.0 && a->ignorezeroentries) goto noinsert; 1317*d4002b98SHong Zhang if (nonew == 1) goto noinsert; 1318*d4002b98SHong Zhang if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1319*d4002b98SHong Zhang /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */ 1320*d4002b98SHong Zhang MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar); 1321*d4002b98SHong Zhang /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */ 1322*d4002b98SHong Zhang for (ii=nrow-1; ii>=i; ii--) { 1323*d4002b98SHong Zhang *(cp+(ii+1)*8) = *(cp+ii*8); 1324*d4002b98SHong Zhang *(vp+(ii+1)*8) = *(vp+ii*8); 1325*d4002b98SHong Zhang } 1326*d4002b98SHong Zhang a->rlen[row]++; 1327*d4002b98SHong Zhang *(cp+i*8) = col; 1328*d4002b98SHong Zhang *(vp+i*8) = value; 1329*d4002b98SHong Zhang a->nz++; 1330*d4002b98SHong Zhang A->nonzerostate++; 1331*d4002b98SHong Zhang low = i+1; high++; nrow++; 1332*d4002b98SHong Zhang noinsert:; 1333*d4002b98SHong Zhang } 1334*d4002b98SHong Zhang a->rlen[row] = nrow; 1335*d4002b98SHong Zhang } 1336*d4002b98SHong Zhang PetscFunctionReturn(0); 1337*d4002b98SHong Zhang } 1338*d4002b98SHong Zhang 1339*d4002b98SHong Zhang PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str) 1340*d4002b98SHong Zhang { 1341*d4002b98SHong Zhang PetscErrorCode ierr; 1342*d4002b98SHong Zhang 1343*d4002b98SHong Zhang PetscFunctionBegin; 1344*d4002b98SHong Zhang /* If the two matrices have the same copy implementation, use fast copy. */ 1345*d4002b98SHong Zhang if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1346*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1347*d4002b98SHong Zhang Mat_SeqSELL *b=(Mat_SeqSELL*)B->data; 1348*d4002b98SHong Zhang 1349*d4002b98SHong Zhang if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1350*d4002b98SHong Zhang ierr = PetscMemcpy(b->val,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1351*d4002b98SHong Zhang } else { 1352*d4002b98SHong Zhang ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1353*d4002b98SHong Zhang } 1354*d4002b98SHong Zhang PetscFunctionReturn(0); 1355*d4002b98SHong Zhang } 1356*d4002b98SHong Zhang 1357*d4002b98SHong Zhang PetscErrorCode MatSetUp_SeqSELL(Mat A) 1358*d4002b98SHong Zhang { 1359*d4002b98SHong Zhang PetscErrorCode ierr; 1360*d4002b98SHong Zhang 1361*d4002b98SHong Zhang PetscFunctionBegin; 1362*d4002b98SHong Zhang ierr = MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,0);CHKERRQ(ierr); 1363*d4002b98SHong Zhang PetscFunctionReturn(0); 1364*d4002b98SHong Zhang } 1365*d4002b98SHong Zhang 1366*d4002b98SHong Zhang PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[]) 1367*d4002b98SHong Zhang { 1368*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1369*d4002b98SHong Zhang 1370*d4002b98SHong Zhang PetscFunctionBegin; 1371*d4002b98SHong Zhang *array = a->val; 1372*d4002b98SHong Zhang PetscFunctionReturn(0); 1373*d4002b98SHong Zhang } 1374*d4002b98SHong Zhang 1375*d4002b98SHong Zhang PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[]) 1376*d4002b98SHong Zhang { 1377*d4002b98SHong Zhang PetscFunctionBegin; 1378*d4002b98SHong Zhang PetscFunctionReturn(0); 1379*d4002b98SHong Zhang } 1380*d4002b98SHong Zhang 1381*d4002b98SHong Zhang PetscErrorCode MatRealPart_SeqSELL(Mat A) 1382*d4002b98SHong Zhang { 1383*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1384*d4002b98SHong Zhang PetscInt i; 1385*d4002b98SHong Zhang MatScalar *aval=a->val; 1386*d4002b98SHong Zhang 1387*d4002b98SHong Zhang PetscFunctionBegin; 1388*d4002b98SHong Zhang for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]); 1389*d4002b98SHong Zhang PetscFunctionReturn(0); 1390*d4002b98SHong Zhang } 1391*d4002b98SHong Zhang 1392*d4002b98SHong Zhang PetscErrorCode MatImaginaryPart_SeqSELL(Mat A) 1393*d4002b98SHong Zhang { 1394*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1395*d4002b98SHong Zhang PetscInt i; 1396*d4002b98SHong Zhang MatScalar *aval=a->val; 1397*d4002b98SHong Zhang PetscErrorCode ierr; 1398*d4002b98SHong Zhang 1399*d4002b98SHong Zhang PetscFunctionBegin; 1400*d4002b98SHong Zhang for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]); 1401*d4002b98SHong Zhang ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1402*d4002b98SHong Zhang PetscFunctionReturn(0); 1403*d4002b98SHong Zhang } 1404*d4002b98SHong Zhang 1405*d4002b98SHong Zhang PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha) 1406*d4002b98SHong Zhang { 1407*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)inA->data; 1408*d4002b98SHong Zhang MatScalar *aval=a->val; 1409*d4002b98SHong Zhang PetscScalar oalpha=alpha; 1410*d4002b98SHong Zhang PetscBLASInt one=1,size; 1411*d4002b98SHong Zhang PetscErrorCode ierr; 1412*d4002b98SHong Zhang 1413*d4002b98SHong Zhang PetscFunctionBegin; 1414*d4002b98SHong Zhang ierr = PetscBLASIntCast(a->sliidx[a->totalslices],&size);CHKERRQ(ierr); 1415*d4002b98SHong Zhang PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one)); 1416*d4002b98SHong Zhang ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1417*d4002b98SHong Zhang ierr = MatSeqSELLInvalidateDiagonal(inA);CHKERRQ(ierr); 1418*d4002b98SHong Zhang PetscFunctionReturn(0); 1419*d4002b98SHong Zhang } 1420*d4002b98SHong Zhang 1421*d4002b98SHong Zhang PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a) 1422*d4002b98SHong Zhang { 1423*d4002b98SHong Zhang Mat_SeqSELL *y=(Mat_SeqSELL*)Y->data; 1424*d4002b98SHong Zhang PetscErrorCode ierr; 1425*d4002b98SHong Zhang 1426*d4002b98SHong Zhang PetscFunctionBegin; 1427*d4002b98SHong Zhang if (!Y->preallocated || !y->nz) { 1428*d4002b98SHong Zhang ierr = MatSeqSELLSetPreallocation(Y,1,NULL);CHKERRQ(ierr); 1429*d4002b98SHong Zhang } 1430*d4002b98SHong Zhang ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1431*d4002b98SHong Zhang PetscFunctionReturn(0); 1432*d4002b98SHong Zhang } 1433*d4002b98SHong Zhang 1434*d4002b98SHong Zhang PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1435*d4002b98SHong Zhang { 1436*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1437*d4002b98SHong Zhang PetscScalar *x,sum,*t; 1438*d4002b98SHong Zhang const MatScalar *idiag=0,*mdiag; 1439*d4002b98SHong Zhang const PetscScalar *b,*xb; 1440*d4002b98SHong Zhang PetscInt n,m=A->rmap->n,i,j,shift; 1441*d4002b98SHong Zhang const PetscInt *diag; 1442*d4002b98SHong Zhang PetscErrorCode ierr; 1443*d4002b98SHong Zhang 1444*d4002b98SHong Zhang PetscFunctionBegin; 1445*d4002b98SHong Zhang its = its*lits; 1446*d4002b98SHong Zhang 1447*d4002b98SHong Zhang if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */ 1448*d4002b98SHong Zhang if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqSELL(A,omega,fshift);CHKERRQ(ierr);} 1449*d4002b98SHong Zhang a->fshift = fshift; 1450*d4002b98SHong Zhang a->omega = omega; 1451*d4002b98SHong Zhang 1452*d4002b98SHong Zhang diag = a->diag; 1453*d4002b98SHong Zhang t = a->ssor_work; 1454*d4002b98SHong Zhang idiag = a->idiag; 1455*d4002b98SHong Zhang mdiag = a->mdiag; 1456*d4002b98SHong Zhang 1457*d4002b98SHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1458*d4002b98SHong Zhang ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1459*d4002b98SHong Zhang /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */ 1460*d4002b98SHong Zhang if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented"); 1461*d4002b98SHong Zhang if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented"); 1462*d4002b98SHong Zhang if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 1463*d4002b98SHong Zhang 1464*d4002b98SHong Zhang if (flag & SOR_ZERO_INITIAL_GUESS) { 1465*d4002b98SHong Zhang if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1466*d4002b98SHong Zhang for (i=0; i<m; i++) { 1467*d4002b98SHong Zhang shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1468*d4002b98SHong Zhang sum = b[i]; 1469*d4002b98SHong Zhang n = (diag[i]-shift)/8; 1470*d4002b98SHong Zhang for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1471*d4002b98SHong Zhang t[i] = sum; 1472*d4002b98SHong Zhang x[i] = sum*idiag[i]; 1473*d4002b98SHong Zhang } 1474*d4002b98SHong Zhang xb = t; 1475*d4002b98SHong Zhang ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1476*d4002b98SHong Zhang } else xb = b; 1477*d4002b98SHong Zhang if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1478*d4002b98SHong Zhang for (i=m-1; i>=0; i--) { 1479*d4002b98SHong Zhang shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1480*d4002b98SHong Zhang sum = xb[i]; 1481*d4002b98SHong Zhang n = a->rlen[i]-(diag[i]-shift)/8-1; 1482*d4002b98SHong Zhang for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1483*d4002b98SHong Zhang if (xb == b) { 1484*d4002b98SHong Zhang x[i] = sum*idiag[i]; 1485*d4002b98SHong Zhang } else { 1486*d4002b98SHong Zhang x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1487*d4002b98SHong Zhang } 1488*d4002b98SHong Zhang } 1489*d4002b98SHong Zhang ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1490*d4002b98SHong Zhang } 1491*d4002b98SHong Zhang its--; 1492*d4002b98SHong Zhang } 1493*d4002b98SHong Zhang while (its--) { 1494*d4002b98SHong Zhang if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1495*d4002b98SHong Zhang for (i=0; i<m; i++) { 1496*d4002b98SHong Zhang /* lower */ 1497*d4002b98SHong Zhang shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1498*d4002b98SHong Zhang sum = b[i]; 1499*d4002b98SHong Zhang n = (diag[i]-shift)/8; 1500*d4002b98SHong Zhang for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1501*d4002b98SHong Zhang t[i] = sum; /* save application of the lower-triangular part */ 1502*d4002b98SHong Zhang /* upper */ 1503*d4002b98SHong Zhang n = a->rlen[i]-(diag[i]-shift)/8-1; 1504*d4002b98SHong Zhang for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1505*d4002b98SHong Zhang x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1506*d4002b98SHong Zhang } 1507*d4002b98SHong Zhang xb = t; 1508*d4002b98SHong Zhang ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1509*d4002b98SHong Zhang } else xb = b; 1510*d4002b98SHong Zhang if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1511*d4002b98SHong Zhang for (i=m-1; i>=0; i--) { 1512*d4002b98SHong Zhang shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1513*d4002b98SHong Zhang sum = xb[i]; 1514*d4002b98SHong Zhang if (xb == b) { 1515*d4002b98SHong Zhang /* whole matrix (no checkpointing available) */ 1516*d4002b98SHong Zhang n = a->rlen[i]; 1517*d4002b98SHong Zhang for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1518*d4002b98SHong Zhang x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i]; 1519*d4002b98SHong Zhang } else { /* lower-triangular part has been saved, so only apply upper-triangular */ 1520*d4002b98SHong Zhang n = a->rlen[i]-(diag[i]-shift)/8-1; 1521*d4002b98SHong Zhang for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1522*d4002b98SHong Zhang x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1523*d4002b98SHong Zhang } 1524*d4002b98SHong Zhang } 1525*d4002b98SHong Zhang if (xb == b) { 1526*d4002b98SHong Zhang ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1527*d4002b98SHong Zhang } else { 1528*d4002b98SHong Zhang ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1529*d4002b98SHong Zhang } 1530*d4002b98SHong Zhang } 1531*d4002b98SHong Zhang } 1532*d4002b98SHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1533*d4002b98SHong Zhang ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1534*d4002b98SHong Zhang PetscFunctionReturn(0); 1535*d4002b98SHong Zhang } 1536*d4002b98SHong Zhang 1537*d4002b98SHong Zhang /* -------------------------------------------------------------------*/ 1538*d4002b98SHong Zhang static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL, 1539*d4002b98SHong Zhang 0, 1540*d4002b98SHong Zhang 0, 1541*d4002b98SHong Zhang MatMult_SeqSELL, 1542*d4002b98SHong Zhang /* 4*/ MatMultAdd_SeqSELL, 1543*d4002b98SHong Zhang MatMultTranspose_SeqSELL, 1544*d4002b98SHong Zhang MatMultTransposeAdd_SeqSELL, 1545*d4002b98SHong Zhang 0, 1546*d4002b98SHong Zhang 0, 1547*d4002b98SHong Zhang 0, 1548*d4002b98SHong Zhang /* 10*/ 0, 1549*d4002b98SHong Zhang 0, 1550*d4002b98SHong Zhang 0, 1551*d4002b98SHong Zhang MatSOR_SeqSELL, 1552*d4002b98SHong Zhang 0, 1553*d4002b98SHong Zhang /* 15*/ MatGetInfo_SeqSELL, 1554*d4002b98SHong Zhang MatEqual_SeqSELL, 1555*d4002b98SHong Zhang MatGetDiagonal_SeqSELL, 1556*d4002b98SHong Zhang MatDiagonalScale_SeqSELL, 1557*d4002b98SHong Zhang 0, 1558*d4002b98SHong Zhang /* 20*/ 0, 1559*d4002b98SHong Zhang MatAssemblyEnd_SeqSELL, 1560*d4002b98SHong Zhang MatSetOption_SeqSELL, 1561*d4002b98SHong Zhang MatZeroEntries_SeqSELL, 1562*d4002b98SHong Zhang /* 24*/ 0, 1563*d4002b98SHong Zhang 0, 1564*d4002b98SHong Zhang 0, 1565*d4002b98SHong Zhang 0, 1566*d4002b98SHong Zhang 0, 1567*d4002b98SHong Zhang /* 29*/ MatSetUp_SeqSELL, 1568*d4002b98SHong Zhang 0, 1569*d4002b98SHong Zhang 0, 1570*d4002b98SHong Zhang 0, 1571*d4002b98SHong Zhang 0, 1572*d4002b98SHong Zhang /* 34*/ MatDuplicate_SeqSELL, 1573*d4002b98SHong Zhang 0, 1574*d4002b98SHong Zhang 0, 1575*d4002b98SHong Zhang 0, 1576*d4002b98SHong Zhang 0, 1577*d4002b98SHong Zhang /* 39*/ 0, 1578*d4002b98SHong Zhang 0, 1579*d4002b98SHong Zhang 0, 1580*d4002b98SHong Zhang MatGetValues_SeqSELL, 1581*d4002b98SHong Zhang MatCopy_SeqSELL, 1582*d4002b98SHong Zhang /* 44*/ 0, 1583*d4002b98SHong Zhang MatScale_SeqSELL, 1584*d4002b98SHong Zhang MatShift_SeqSELL, 1585*d4002b98SHong Zhang 0, 1586*d4002b98SHong Zhang 0, 1587*d4002b98SHong Zhang /* 49*/ 0, 1588*d4002b98SHong Zhang 0, 1589*d4002b98SHong Zhang 0, 1590*d4002b98SHong Zhang 0, 1591*d4002b98SHong Zhang 0, 1592*d4002b98SHong Zhang /* 54*/ MatFDColoringCreate_SeqXAIJ, 1593*d4002b98SHong Zhang 0, 1594*d4002b98SHong Zhang 0, 1595*d4002b98SHong Zhang 0, 1596*d4002b98SHong Zhang 0, 1597*d4002b98SHong Zhang /* 59*/ 0, 1598*d4002b98SHong Zhang MatDestroy_SeqSELL, 1599*d4002b98SHong Zhang MatView_SeqSELL, 1600*d4002b98SHong Zhang 0, 1601*d4002b98SHong Zhang 0, 1602*d4002b98SHong Zhang /* 64*/ 0, 1603*d4002b98SHong Zhang 0, 1604*d4002b98SHong Zhang 0, 1605*d4002b98SHong Zhang 0, 1606*d4002b98SHong Zhang 0, 1607*d4002b98SHong Zhang /* 69*/ 0, 1608*d4002b98SHong Zhang 0, 1609*d4002b98SHong Zhang 0, 1610*d4002b98SHong Zhang 0, 1611*d4002b98SHong Zhang 0, 1612*d4002b98SHong Zhang /* 74*/ 0, 1613*d4002b98SHong Zhang MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */ 1614*d4002b98SHong Zhang 0, 1615*d4002b98SHong Zhang 0, 1616*d4002b98SHong Zhang 0, 1617*d4002b98SHong Zhang /* 79*/ 0, 1618*d4002b98SHong Zhang 0, 1619*d4002b98SHong Zhang 0, 1620*d4002b98SHong Zhang 0, 1621*d4002b98SHong Zhang 0, 1622*d4002b98SHong Zhang /* 84*/ 0, 1623*d4002b98SHong Zhang 0, 1624*d4002b98SHong Zhang 0, 1625*d4002b98SHong Zhang 0, 1626*d4002b98SHong Zhang 0, 1627*d4002b98SHong Zhang /* 89*/ 0, 1628*d4002b98SHong Zhang 0, 1629*d4002b98SHong Zhang 0, 1630*d4002b98SHong Zhang 0, 1631*d4002b98SHong Zhang 0, 1632*d4002b98SHong Zhang /* 94*/ 0, 1633*d4002b98SHong Zhang 0, 1634*d4002b98SHong Zhang 0, 1635*d4002b98SHong Zhang 0, 1636*d4002b98SHong Zhang 0, 1637*d4002b98SHong Zhang /* 99*/ 0, 1638*d4002b98SHong Zhang 0, 1639*d4002b98SHong Zhang 0, 1640*d4002b98SHong Zhang MatConjugate_SeqSELL, 1641*d4002b98SHong Zhang 0, 1642*d4002b98SHong Zhang /*104*/ 0, 1643*d4002b98SHong Zhang 0, 1644*d4002b98SHong Zhang 0, 1645*d4002b98SHong Zhang 0, 1646*d4002b98SHong Zhang 0, 1647*d4002b98SHong Zhang /*109*/ 0, 1648*d4002b98SHong Zhang 0, 1649*d4002b98SHong Zhang 0, 1650*d4002b98SHong Zhang 0, 1651*d4002b98SHong Zhang MatMissingDiagonal_SeqSELL, 1652*d4002b98SHong Zhang /*114*/ 0, 1653*d4002b98SHong Zhang 0, 1654*d4002b98SHong Zhang 0, 1655*d4002b98SHong Zhang 0, 1656*d4002b98SHong Zhang 0, 1657*d4002b98SHong Zhang /*119*/ 0, 1658*d4002b98SHong Zhang 0, 1659*d4002b98SHong Zhang 0, 1660*d4002b98SHong Zhang 0, 1661*d4002b98SHong Zhang 0, 1662*d4002b98SHong Zhang /*124*/ 0, 1663*d4002b98SHong Zhang 0, 1664*d4002b98SHong Zhang 0, 1665*d4002b98SHong Zhang 0, 1666*d4002b98SHong Zhang 0, 1667*d4002b98SHong Zhang /*129*/ 0, 1668*d4002b98SHong Zhang 0, 1669*d4002b98SHong Zhang 0, 1670*d4002b98SHong Zhang 0, 1671*d4002b98SHong Zhang 0, 1672*d4002b98SHong Zhang /*134*/ 0, 1673*d4002b98SHong Zhang 0, 1674*d4002b98SHong Zhang 0, 1675*d4002b98SHong Zhang 0, 1676*d4002b98SHong Zhang 0, 1677*d4002b98SHong Zhang /*139*/ 0, 1678*d4002b98SHong Zhang 0, 1679*d4002b98SHong Zhang 0, 1680*d4002b98SHong Zhang MatFDColoringSetUp_SeqXAIJ, 1681*d4002b98SHong Zhang 0, 1682*d4002b98SHong Zhang /*144*/0 1683*d4002b98SHong Zhang }; 1684*d4002b98SHong Zhang 1685*d4002b98SHong Zhang PetscErrorCode MatStoreValues_SeqSELL(Mat mat) 1686*d4002b98SHong Zhang { 1687*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1688*d4002b98SHong Zhang PetscErrorCode ierr; 1689*d4002b98SHong Zhang 1690*d4002b98SHong Zhang PetscFunctionBegin; 1691*d4002b98SHong Zhang if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1692*d4002b98SHong Zhang 1693*d4002b98SHong Zhang /* allocate space for values if not already there */ 1694*d4002b98SHong Zhang if (!a->saved_values) { 1695*d4002b98SHong Zhang ierr = PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);CHKERRQ(ierr); 1696*d4002b98SHong Zhang ierr = PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));CHKERRQ(ierr); 1697*d4002b98SHong Zhang } 1698*d4002b98SHong Zhang 1699*d4002b98SHong Zhang /* copy values over */ 1700*d4002b98SHong Zhang ierr = PetscMemcpy(a->saved_values,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1701*d4002b98SHong Zhang PetscFunctionReturn(0); 1702*d4002b98SHong Zhang } 1703*d4002b98SHong Zhang 1704*d4002b98SHong Zhang PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat) 1705*d4002b98SHong Zhang { 1706*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1707*d4002b98SHong Zhang PetscErrorCode ierr; 1708*d4002b98SHong Zhang 1709*d4002b98SHong Zhang PetscFunctionBegin; 1710*d4002b98SHong Zhang if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1711*d4002b98SHong Zhang if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1712*d4002b98SHong Zhang /* copy values over */ 1713*d4002b98SHong Zhang ierr = PetscMemcpy(a->val,a->saved_values,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1714*d4002b98SHong Zhang PetscFunctionReturn(0); 1715*d4002b98SHong Zhang } 1716*d4002b98SHong Zhang 1717*d4002b98SHong Zhang /*@C 1718*d4002b98SHong Zhang MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray() 1719*d4002b98SHong Zhang 1720*d4002b98SHong Zhang Not Collective 1721*d4002b98SHong Zhang 1722*d4002b98SHong Zhang Input Parameters: 1723*d4002b98SHong Zhang . mat - a MATSEQSELL matrix 1724*d4002b98SHong Zhang . array - pointer to the data 1725*d4002b98SHong Zhang 1726*d4002b98SHong Zhang Level: intermediate 1727*d4002b98SHong Zhang 1728*d4002b98SHong Zhang .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90() 1729*d4002b98SHong Zhang @*/ 1730*d4002b98SHong Zhang PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array) 1731*d4002b98SHong Zhang { 1732*d4002b98SHong Zhang PetscErrorCode ierr; 1733*d4002b98SHong Zhang 1734*d4002b98SHong Zhang PetscFunctionBegin; 1735*d4002b98SHong Zhang ierr = PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 1736*d4002b98SHong Zhang PetscFunctionReturn(0); 1737*d4002b98SHong Zhang } 1738*d4002b98SHong Zhang 1739*d4002b98SHong Zhang PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B) 1740*d4002b98SHong Zhang { 1741*d4002b98SHong Zhang Mat_SeqSELL *b; 1742*d4002b98SHong Zhang PetscMPIInt size; 1743*d4002b98SHong Zhang PetscErrorCode ierr; 1744*d4002b98SHong Zhang 1745*d4002b98SHong Zhang PetscFunctionBegin; 1746*d4002b98SHong Zhang ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 1747*d4002b98SHong Zhang if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1"); 1748*d4002b98SHong Zhang 1749*d4002b98SHong Zhang ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1750*d4002b98SHong Zhang 1751*d4002b98SHong Zhang B->data = (void*)b; 1752*d4002b98SHong Zhang 1753*d4002b98SHong Zhang ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1754*d4002b98SHong Zhang 1755*d4002b98SHong Zhang b->row = 0; 1756*d4002b98SHong Zhang b->col = 0; 1757*d4002b98SHong Zhang b->icol = 0; 1758*d4002b98SHong Zhang b->reallocs = 0; 1759*d4002b98SHong Zhang b->ignorezeroentries = PETSC_FALSE; 1760*d4002b98SHong Zhang b->roworiented = PETSC_TRUE; 1761*d4002b98SHong Zhang b->nonew = 0; 1762*d4002b98SHong Zhang b->diag = 0; 1763*d4002b98SHong Zhang b->solve_work = 0; 1764*d4002b98SHong Zhang B->spptr = 0; 1765*d4002b98SHong Zhang b->saved_values = 0; 1766*d4002b98SHong Zhang b->idiag = 0; 1767*d4002b98SHong Zhang b->mdiag = 0; 1768*d4002b98SHong Zhang b->ssor_work = 0; 1769*d4002b98SHong Zhang b->omega = 1.0; 1770*d4002b98SHong Zhang b->fshift = 0.0; 1771*d4002b98SHong Zhang b->idiagvalid = PETSC_FALSE; 1772*d4002b98SHong Zhang b->ibdiagvalid = PETSC_FALSE; 1773*d4002b98SHong Zhang b->keepnonzeropattern = PETSC_FALSE; 1774*d4002b98SHong Zhang 1775*d4002b98SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);CHKERRQ(ierr); 1776*d4002b98SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);CHKERRQ(ierr); 1777*d4002b98SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);CHKERRQ(ierr); 1778*d4002b98SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);CHKERRQ(ierr); 1779*d4002b98SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);CHKERRQ(ierr); 1780*d4002b98SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);CHKERRQ(ierr); 1781*d4002b98SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);CHKERRQ(ierr); 1782*d4002b98SHong Zhang PetscFunctionReturn(0); 1783*d4002b98SHong Zhang } 1784*d4002b98SHong Zhang 1785*d4002b98SHong Zhang /* 1786*d4002b98SHong Zhang Given a matrix generated with MatGetFactor() duplicates all the information in A into B 1787*d4002b98SHong Zhang */ 1788*d4002b98SHong Zhang PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 1789*d4002b98SHong Zhang { 1790*d4002b98SHong Zhang Mat_SeqSELL *c,*a=(Mat_SeqSELL*)A->data; 1791*d4002b98SHong Zhang PetscInt i,m=A->rmap->n; 1792*d4002b98SHong Zhang PetscInt totalslices=a->totalslices; 1793*d4002b98SHong Zhang PetscErrorCode ierr; 1794*d4002b98SHong Zhang 1795*d4002b98SHong Zhang PetscFunctionBegin; 1796*d4002b98SHong Zhang c = (Mat_SeqSELL*)C->data; 1797*d4002b98SHong Zhang 1798*d4002b98SHong Zhang C->factortype = A->factortype; 1799*d4002b98SHong Zhang c->row = 0; 1800*d4002b98SHong Zhang c->col = 0; 1801*d4002b98SHong Zhang c->icol = 0; 1802*d4002b98SHong Zhang c->reallocs = 0; 1803*d4002b98SHong Zhang 1804*d4002b98SHong Zhang C->assembled = PETSC_TRUE; 1805*d4002b98SHong Zhang 1806*d4002b98SHong Zhang ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 1807*d4002b98SHong Zhang ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 1808*d4002b98SHong Zhang 1809*d4002b98SHong Zhang ierr = PetscMalloc1(8*totalslices,&c->rlen);CHKERRQ(ierr); 1810*d4002b98SHong Zhang ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 1811*d4002b98SHong Zhang ierr = PetscMalloc1(totalslices+1,&c->sliidx);CHKERRQ(ierr); 1812*d4002b98SHong Zhang ierr = PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 1813*d4002b98SHong Zhang 1814*d4002b98SHong Zhang for (i=0; i<m; i++) c->rlen[i] = a->rlen[i]; 1815*d4002b98SHong Zhang for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i]; 1816*d4002b98SHong Zhang 1817*d4002b98SHong Zhang /* allocate the matrix space */ 1818*d4002b98SHong Zhang if (mallocmatspace) { 1819*d4002b98SHong Zhang ierr = PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);CHKERRQ(ierr); 1820*d4002b98SHong Zhang ierr = PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 1821*d4002b98SHong Zhang 1822*d4002b98SHong Zhang c->singlemalloc = PETSC_TRUE; 1823*d4002b98SHong Zhang 1824*d4002b98SHong Zhang if (m > 0) { 1825*d4002b98SHong Zhang ierr = PetscMemcpy(c->colidx,a->colidx,(a->maxallocmat)*sizeof(PetscInt));CHKERRQ(ierr); 1826*d4002b98SHong Zhang if (cpvalues == MAT_COPY_VALUES) { 1827*d4002b98SHong Zhang ierr = PetscMemcpy(c->val,a->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 1828*d4002b98SHong Zhang } else { 1829*d4002b98SHong Zhang ierr = PetscMemzero(c->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 1830*d4002b98SHong Zhang } 1831*d4002b98SHong Zhang } 1832*d4002b98SHong Zhang } 1833*d4002b98SHong Zhang 1834*d4002b98SHong Zhang c->ignorezeroentries = a->ignorezeroentries; 1835*d4002b98SHong Zhang c->roworiented = a->roworiented; 1836*d4002b98SHong Zhang c->nonew = a->nonew; 1837*d4002b98SHong Zhang if (a->diag) { 1838*d4002b98SHong Zhang ierr = PetscMalloc1(m,&c->diag);CHKERRQ(ierr); 1839*d4002b98SHong Zhang ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 1840*d4002b98SHong Zhang for (i=0; i<m; i++) { 1841*d4002b98SHong Zhang c->diag[i] = a->diag[i]; 1842*d4002b98SHong Zhang } 1843*d4002b98SHong Zhang } else c->diag = 0; 1844*d4002b98SHong Zhang 1845*d4002b98SHong Zhang c->solve_work = 0; 1846*d4002b98SHong Zhang c->saved_values = 0; 1847*d4002b98SHong Zhang c->idiag = 0; 1848*d4002b98SHong Zhang c->ssor_work = 0; 1849*d4002b98SHong Zhang c->keepnonzeropattern = a->keepnonzeropattern; 1850*d4002b98SHong Zhang c->free_val = PETSC_TRUE; 1851*d4002b98SHong Zhang c->free_colidx = PETSC_TRUE; 1852*d4002b98SHong Zhang 1853*d4002b98SHong Zhang c->maxallocmat = a->maxallocmat; 1854*d4002b98SHong Zhang c->maxallocrow = a->maxallocrow; 1855*d4002b98SHong Zhang c->rlenmax = a->rlenmax; 1856*d4002b98SHong Zhang c->nz = a->nz; 1857*d4002b98SHong Zhang C->preallocated = PETSC_TRUE; 1858*d4002b98SHong Zhang 1859*d4002b98SHong Zhang c->nonzerorowcnt = a->nonzerorowcnt; 1860*d4002b98SHong Zhang C->nonzerostate = A->nonzerostate; 1861*d4002b98SHong Zhang 1862*d4002b98SHong Zhang ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 1863*d4002b98SHong Zhang PetscFunctionReturn(0); 1864*d4002b98SHong Zhang } 1865*d4002b98SHong Zhang 1866*d4002b98SHong Zhang PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B) 1867*d4002b98SHong Zhang { 1868*d4002b98SHong Zhang PetscErrorCode ierr; 1869*d4002b98SHong Zhang 1870*d4002b98SHong Zhang PetscFunctionBegin; 1871*d4002b98SHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 1872*d4002b98SHong Zhang ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr); 1873*d4002b98SHong Zhang if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) { 1874*d4002b98SHong Zhang ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 1875*d4002b98SHong Zhang } 1876*d4002b98SHong Zhang ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1877*d4002b98SHong Zhang ierr = MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 1878*d4002b98SHong Zhang PetscFunctionReturn(0); 1879*d4002b98SHong Zhang } 1880*d4002b98SHong Zhang 1881*d4002b98SHong Zhang /*@C 1882*d4002b98SHong Zhang MatCreateSeqSELL - Creates a sparse matrix in SELL format. 1883*d4002b98SHong Zhang 1884*d4002b98SHong Zhang Collective on MPI_Comm 1885*d4002b98SHong Zhang 1886*d4002b98SHong Zhang Input Parameters: 1887*d4002b98SHong Zhang + comm - MPI communicator, set to PETSC_COMM_SELF 1888*d4002b98SHong Zhang . m - number of rows 1889*d4002b98SHong Zhang . n - number of columns 1890*d4002b98SHong Zhang . rlenmax - maximum number of nonzeros in a row 1891*d4002b98SHong Zhang - rlen - array containing the number of nonzeros in the various rows 1892*d4002b98SHong Zhang (possibly different for each row) or NULL 1893*d4002b98SHong Zhang 1894*d4002b98SHong Zhang Output Parameter: 1895*d4002b98SHong Zhang . A - the matrix 1896*d4002b98SHong Zhang 1897*d4002b98SHong Zhang It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1898*d4002b98SHong Zhang MatXXXXSetPreallocation() paradgm instead of this routine directly. 1899*d4002b98SHong Zhang [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation] 1900*d4002b98SHong Zhang 1901*d4002b98SHong Zhang Notes: 1902*d4002b98SHong Zhang If nnz is given then nz is ignored 1903*d4002b98SHong Zhang 1904*d4002b98SHong Zhang Specify the preallocated storage with either rlenmax or rlen (not both). 1905*d4002b98SHong Zhang Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory 1906*d4002b98SHong Zhang allocation. For large problems you MUST preallocate memory or you 1907*d4002b98SHong Zhang will get TERRIBLE performance, see the users' manual chapter on matrices. 1908*d4002b98SHong Zhang 1909*d4002b98SHong Zhang Level: intermediate 1910*d4002b98SHong Zhang 1911*d4002b98SHong Zhang .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatCreateSeqSELLWithArrays() 1912*d4002b98SHong Zhang 1913*d4002b98SHong Zhang @*/ 1914*d4002b98SHong Zhang PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A) 1915*d4002b98SHong Zhang { 1916*d4002b98SHong Zhang PetscErrorCode ierr; 1917*d4002b98SHong Zhang 1918*d4002b98SHong Zhang PetscFunctionBegin; 1919*d4002b98SHong Zhang ierr = MatCreate(comm,A);CHKERRQ(ierr); 1920*d4002b98SHong Zhang ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1921*d4002b98SHong Zhang ierr = MatSetType(*A,MATSEQSELL);CHKERRQ(ierr); 1922*d4002b98SHong Zhang ierr = MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);CHKERRQ(ierr); 1923*d4002b98SHong Zhang PetscFunctionReturn(0); 1924*d4002b98SHong Zhang } 1925*d4002b98SHong Zhang 1926*d4002b98SHong Zhang PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg) 1927*d4002b98SHong Zhang { 1928*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data; 1929*d4002b98SHong Zhang PetscInt totalslices=a->totalslices; 1930*d4002b98SHong Zhang PetscErrorCode ierr; 1931*d4002b98SHong Zhang 1932*d4002b98SHong Zhang PetscFunctionBegin; 1933*d4002b98SHong Zhang /* If the matrix dimensions are not equal,or no of nonzeros */ 1934*d4002b98SHong Zhang if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) { 1935*d4002b98SHong Zhang *flg = PETSC_FALSE; 1936*d4002b98SHong Zhang PetscFunctionReturn(0); 1937*d4002b98SHong Zhang } 1938*d4002b98SHong Zhang /* if the a->colidx are the same */ 1939*d4002b98SHong Zhang ierr = PetscMemcmp(a->colidx,b->colidx,a->sliidx[totalslices]*sizeof(PetscInt),flg);CHKERRQ(ierr); 1940*d4002b98SHong Zhang if (!*flg) PetscFunctionReturn(0); 1941*d4002b98SHong Zhang /* if a->val are the same */ 1942*d4002b98SHong Zhang ierr = PetscMemcmp(a->val,b->val,a->sliidx[totalslices]*sizeof(PetscScalar),flg);CHKERRQ(ierr); 1943*d4002b98SHong Zhang PetscFunctionReturn(0); 1944*d4002b98SHong Zhang } 1945*d4002b98SHong Zhang 1946*d4002b98SHong Zhang PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A) 1947*d4002b98SHong Zhang { 1948*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1949*d4002b98SHong Zhang 1950*d4002b98SHong Zhang PetscFunctionBegin; 1951*d4002b98SHong Zhang a->idiagvalid = PETSC_FALSE; 1952*d4002b98SHong Zhang a->ibdiagvalid = PETSC_FALSE; 1953*d4002b98SHong Zhang PetscFunctionReturn(0); 1954*d4002b98SHong Zhang } 1955*d4002b98SHong Zhang 1956*d4002b98SHong Zhang PetscErrorCode MatConjugate_SeqSELL(Mat A) 1957*d4002b98SHong Zhang { 1958*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX) 1959*d4002b98SHong Zhang Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1960*d4002b98SHong Zhang PetscInt i; 1961*d4002b98SHong Zhang PetscScalar *val = a->val; 1962*d4002b98SHong Zhang 1963*d4002b98SHong Zhang PetscFunctionBegin; 1964*d4002b98SHong Zhang for (i=0; i<a->sliidx[a->totalslices]; i++) { 1965*d4002b98SHong Zhang val[i] = PetscConj(val[i]); 1966*d4002b98SHong Zhang } 1967*d4002b98SHong Zhang #else 1968*d4002b98SHong Zhang PetscFunctionBegin; 1969*d4002b98SHong Zhang #endif 1970*d4002b98SHong Zhang PetscFunctionReturn(0); 1971*d4002b98SHong Zhang } 1972