xref: /petsc/src/mat/impls/sell/seq/sell.c (revision d4002b989b2d6051384fadc0d4465c98dea57b78)
1*d4002b98SHong Zhang 
2*d4002b98SHong Zhang /*
3*d4002b98SHong Zhang   Defines the basic matrix operations for the SELL matrix storage format.
4*d4002b98SHong Zhang */
5*d4002b98SHong Zhang #include <../src/mat/impls/sell/seq/sell.h>  /*I   "petscmat.h"  I*/
6*d4002b98SHong Zhang #include <petscblaslapack.h>
7*d4002b98SHong Zhang #include <petsc/private/kernels/blocktranspose.h>
8*d4002b98SHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H)
9*d4002b98SHong Zhang #include <immintrin.h>
10*d4002b98SHong Zhang 
11*d4002b98SHong Zhang #if !defined(_MM_SCALE_8)
12*d4002b98SHong Zhang #define _MM_SCALE_8    8
13*d4002b98SHong Zhang #endif
14*d4002b98SHong Zhang 
15*d4002b98SHong Zhang #if defined(__AVX512F__)
16*d4002b98SHong Zhang /* these do not work
17*d4002b98SHong Zhang  vec_idx  = _mm512_loadunpackhi_epi32(vec_idx,acolidx);
18*d4002b98SHong Zhang  vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval);
19*d4002b98SHong Zhang */
20*d4002b98SHong Zhang #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \
21*d4002b98SHong Zhang /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \
22*d4002b98SHong Zhang vec_idx  = _mm256_load_si256((__m256i const*)acolidx); \
23*d4002b98SHong Zhang vec_vals = _mm512_load_pd(aval); \
24*d4002b98SHong Zhang vec_x    = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \
25*d4002b98SHong Zhang vec_y    = _mm512_fmadd_pd(vec_x,vec_vals,vec_y);
26*d4002b98SHong Zhang 
27*d4002b98SHong Zhang #endif
28*d4002b98SHong Zhang #endif  /* PETSC_HAVE_IMMINTRIN_H */
29*d4002b98SHong Zhang 
30*d4002b98SHong Zhang /*@C
31*d4002b98SHong Zhang  MatSeqSELLSetPreallocation - For good matrix assembly performance
32*d4002b98SHong Zhang  the user should preallocate the matrix storage by setting the parameter nz
33*d4002b98SHong Zhang  (or the array nnz).  By setting these parameters accurately, performance
34*d4002b98SHong Zhang  during matrix assembly can be increased significantly.
35*d4002b98SHong Zhang 
36*d4002b98SHong Zhang  Collective on MPI_Comm
37*d4002b98SHong Zhang 
38*d4002b98SHong Zhang  Input Parameters:
39*d4002b98SHong Zhang  +  B - The matrix
40*d4002b98SHong Zhang  .  nz - number of nonzeros per row (same for all rows)
41*d4002b98SHong Zhang  -  nnz - array containing the number of nonzeros in the various rows
42*d4002b98SHong Zhang  (possibly different for each row) or NULL
43*d4002b98SHong Zhang 
44*d4002b98SHong Zhang  Notes:
45*d4002b98SHong Zhang  If nnz is given then nz is ignored.
46*d4002b98SHong Zhang 
47*d4002b98SHong Zhang  Specify the preallocated storage with either nz or nnz (not both).
48*d4002b98SHong Zhang  Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
49*d4002b98SHong Zhang  allocation.  For large problems you MUST preallocate memory or you
50*d4002b98SHong Zhang  will get TERRIBLE performance, see the users' manual chapter on matrices.
51*d4002b98SHong Zhang 
52*d4002b98SHong Zhang  You can call MatGetInfo() to get information on how effective the preallocation was;
53*d4002b98SHong Zhang  for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
54*d4002b98SHong Zhang  You can also run with the option -info and look for messages with the string
55*d4002b98SHong Zhang  malloc in them to see if additional memory allocation was needed.
56*d4002b98SHong Zhang 
57*d4002b98SHong Zhang  Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix
58*d4002b98SHong Zhang  entries or columns indices.
59*d4002b98SHong Zhang 
60*d4002b98SHong Zhang  The maximum number of nonzeos in any row should be as accuate as possible.
61*d4002b98SHong Zhang  If it is underesitmated, you will get bad performance due to reallocation
62*d4002b98SHong Zhang  (MatSeqXSELLReallocateSELL).
63*d4002b98SHong Zhang 
64*d4002b98SHong Zhang  Level: intermediate
65*d4002b98SHong Zhang 
66*d4002b98SHong Zhang  .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo()
67*d4002b98SHong Zhang 
68*d4002b98SHong Zhang  @*/
69*d4002b98SHong Zhang PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[])
70*d4002b98SHong Zhang {
71*d4002b98SHong Zhang   PetscErrorCode ierr;
72*d4002b98SHong Zhang 
73*d4002b98SHong Zhang   PetscFunctionBegin;
74*d4002b98SHong Zhang   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
75*d4002b98SHong Zhang   PetscValidType(B,1);
76*d4002b98SHong Zhang   ierr = PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));CHKERRQ(ierr);
77*d4002b98SHong Zhang   PetscFunctionReturn(0);
78*d4002b98SHong Zhang }
79*d4002b98SHong Zhang 
80*d4002b98SHong Zhang PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[])
81*d4002b98SHong Zhang {
82*d4002b98SHong Zhang   Mat_SeqSELL    *b;
83*d4002b98SHong Zhang   PetscInt       i,j,totalslices;
84*d4002b98SHong Zhang   PetscBool      skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE;
85*d4002b98SHong Zhang   PetscErrorCode ierr;
86*d4002b98SHong Zhang 
87*d4002b98SHong Zhang   PetscFunctionBegin;
88*d4002b98SHong Zhang   if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE;
89*d4002b98SHong Zhang   if (maxallocrow == MAT_SKIP_ALLOCATION) {
90*d4002b98SHong Zhang     skipallocation = PETSC_TRUE;
91*d4002b98SHong Zhang     maxallocrow    = 0;
92*d4002b98SHong Zhang   }
93*d4002b98SHong Zhang 
94*d4002b98SHong Zhang   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
95*d4002b98SHong Zhang   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
96*d4002b98SHong Zhang 
97*d4002b98SHong Zhang   /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */
98*d4002b98SHong Zhang   if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5;
99*d4002b98SHong Zhang   if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow);
100*d4002b98SHong Zhang   if (rlen) {
101*d4002b98SHong Zhang     for (i=0; i<B->rmap->n; i++) {
102*d4002b98SHong Zhang       if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]);
103*d4002b98SHong Zhang       if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n);
104*d4002b98SHong Zhang     }
105*d4002b98SHong Zhang   }
106*d4002b98SHong Zhang 
107*d4002b98SHong Zhang   B->preallocated = PETSC_TRUE;
108*d4002b98SHong Zhang 
109*d4002b98SHong Zhang   b = (Mat_SeqSELL*)B->data;
110*d4002b98SHong Zhang 
111*d4002b98SHong Zhang   totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */
112*d4002b98SHong Zhang   b->totalslices = totalslices;
113*d4002b98SHong Zhang   if (!skipallocation) {
114*d4002b98SHong Zhang     if (B->rmap->n & 0x07) PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n);
115*d4002b98SHong Zhang 
116*d4002b98SHong Zhang     if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */
117*d4002b98SHong Zhang       ierr = PetscMalloc1(totalslices+1,&b->sliidx);CHKERRQ(ierr);
118*d4002b98SHong Zhang       ierr = PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr);
119*d4002b98SHong Zhang     }
120*d4002b98SHong Zhang     if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */
121*d4002b98SHong Zhang       if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10;
122*d4002b98SHong Zhang       else if (maxallocrow < 0) maxallocrow = 1;
123*d4002b98SHong Zhang       for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow;
124*d4002b98SHong Zhang     } else {
125*d4002b98SHong Zhang       maxallocrow = 0;
126*d4002b98SHong Zhang       b->sliidx[0] = 0;
127*d4002b98SHong Zhang       for (i=1; i<totalslices; i++) {
128*d4002b98SHong Zhang         b->sliidx[i] = 0;
129*d4002b98SHong Zhang         for (j=0;j<8;j++) {
130*d4002b98SHong Zhang           b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]);
131*d4002b98SHong Zhang         }
132*d4002b98SHong Zhang         maxallocrow = PetscMax(b->sliidx[i],maxallocrow);
133*d4002b98SHong Zhang         b->sliidx[i] = b->sliidx[i-1] + 8*b->sliidx[i];
134*d4002b98SHong Zhang       }
135*d4002b98SHong Zhang       /* last slice */
136*d4002b98SHong Zhang       b->sliidx[totalslices] = 0;
137*d4002b98SHong Zhang       for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]);
138*d4002b98SHong Zhang       maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow);
139*d4002b98SHong Zhang       b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices];
140*d4002b98SHong Zhang     }
141*d4002b98SHong Zhang 
142*d4002b98SHong Zhang     /* allocate space for val, colidx, rlen */
143*d4002b98SHong Zhang     /* FIXME: should B's old memory be unlogged? */
144*d4002b98SHong Zhang     ierr = MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);CHKERRQ(ierr);
145*d4002b98SHong Zhang     /* FIXME: assuming an element of the bit array takes 8 bits */
146*d4002b98SHong Zhang     ierr = PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);CHKERRQ(ierr);
147*d4002b98SHong Zhang     ierr = PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
148*d4002b98SHong Zhang     /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */
149*d4002b98SHong Zhang     ierr = PetscCalloc1(8*totalslices,&b->rlen);CHKERRQ(ierr);
150*d4002b98SHong Zhang     ierr = PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));CHKERRQ(ierr);
151*d4002b98SHong Zhang 
152*d4002b98SHong Zhang     b->singlemalloc = PETSC_TRUE;
153*d4002b98SHong Zhang     b->free_val     = PETSC_TRUE;
154*d4002b98SHong Zhang     b->free_colidx  = PETSC_TRUE;
155*d4002b98SHong Zhang   } else {
156*d4002b98SHong Zhang     b->free_val    = PETSC_FALSE;
157*d4002b98SHong Zhang     b->free_colidx = PETSC_FALSE;
158*d4002b98SHong Zhang   }
159*d4002b98SHong Zhang 
160*d4002b98SHong Zhang   b->nz               = 0;
161*d4002b98SHong Zhang   b->maxallocrow      = maxallocrow;
162*d4002b98SHong Zhang   b->rlenmax          = maxallocrow;
163*d4002b98SHong Zhang   b->maxallocmat      = b->sliidx[totalslices];
164*d4002b98SHong Zhang   B->info.nz_unneeded = (double)b->maxallocmat;
165*d4002b98SHong Zhang   if (realalloc) {
166*d4002b98SHong Zhang     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
167*d4002b98SHong Zhang   }
168*d4002b98SHong Zhang   PetscFunctionReturn(0);
169*d4002b98SHong Zhang }
170*d4002b98SHong Zhang 
171*d4002b98SHong Zhang PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
172*d4002b98SHong Zhang {
173*d4002b98SHong Zhang   Mat            B;
174*d4002b98SHong Zhang   Mat_SeqSELL     *a=(Mat_SeqSELL*)A->data;
175*d4002b98SHong Zhang   PetscInt       i,j,row;
176*d4002b98SHong Zhang   PetscBool      isnonzero;
177*d4002b98SHong Zhang   PetscErrorCode ierr;
178*d4002b98SHong Zhang 
179*d4002b98SHong Zhang   PetscFunctionBegin;
180*d4002b98SHong Zhang   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
181*d4002b98SHong Zhang   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
182*d4002b98SHong Zhang   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
183*d4002b98SHong Zhang   ierr = MatSeqAIJSetPreallocation(B,0,a->rlen);CHKERRQ(ierr);
184*d4002b98SHong Zhang   ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
185*d4002b98SHong Zhang 
186*d4002b98SHong Zhang   for (i=0; i<a->totalslices; i++) { /* loop over slices */
187*d4002b98SHong Zhang     for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
188*d4002b98SHong Zhang       isnonzero = (PetscBool)((j-a->sliidx[i])/8 < a->rlen[8*i+row]);
189*d4002b98SHong Zhang       if (isnonzero) {
190*d4002b98SHong Zhang         ierr = MatSetValue(B,8*i+row,a->colidx[j],a->val[j],INSERT_VALUES);CHKERRQ(ierr);
191*d4002b98SHong Zhang       }
192*d4002b98SHong Zhang     }
193*d4002b98SHong Zhang   }
194*d4002b98SHong Zhang   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
195*d4002b98SHong Zhang   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
196*d4002b98SHong Zhang   B->rmap->bs = A->rmap->bs;
197*d4002b98SHong Zhang 
198*d4002b98SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
199*d4002b98SHong Zhang     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
200*d4002b98SHong Zhang   } else {
201*d4002b98SHong Zhang     *newmat = B;
202*d4002b98SHong Zhang   }
203*d4002b98SHong Zhang   PetscFunctionReturn(0);
204*d4002b98SHong Zhang }
205*d4002b98SHong Zhang 
206*d4002b98SHong Zhang #include <../src/mat/impls/aij/seq/aij.h>
207*d4002b98SHong Zhang 
208*d4002b98SHong Zhang PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
209*d4002b98SHong Zhang {
210*d4002b98SHong Zhang   Mat               B;
211*d4002b98SHong Zhang   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)A->data;
212*d4002b98SHong Zhang   PetscInt          *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols;
213*d4002b98SHong Zhang   const PetscInt    *cols;
214*d4002b98SHong Zhang   const PetscScalar *vals;
215*d4002b98SHong Zhang   PetscErrorCode    ierr;
216*d4002b98SHong Zhang 
217*d4002b98SHong Zhang   PetscFunctionBegin;
218*d4002b98SHong Zhang   if (A->rmap->bs > 1) {
219*d4002b98SHong Zhang     ierr = MatConvert_Basic(A,newtype,reuse,newmat);CHKERRQ(ierr);
220*d4002b98SHong Zhang     PetscFunctionReturn(0);
221*d4002b98SHong Zhang   }
222*d4002b98SHong Zhang   /* Can we just use ilen? */
223*d4002b98SHong Zhang   ierr = PetscMalloc1(m,&rowlengths);CHKERRQ(ierr);
224*d4002b98SHong Zhang   for (i=0; i<m; i++) {
225*d4002b98SHong Zhang     rowlengths[i] = ai[i+1] - ai[i];
226*d4002b98SHong Zhang   }
227*d4002b98SHong Zhang 
228*d4002b98SHong Zhang   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
229*d4002b98SHong Zhang   ierr = MatSetSizes(B,m,n,m,n);CHKERRQ(ierr);
230*d4002b98SHong Zhang   ierr = MatSetType(B,MATSEQSELL);CHKERRQ(ierr);
231*d4002b98SHong Zhang   ierr = MatSeqSELLSetPreallocation(B,0,rowlengths);CHKERRQ(ierr);
232*d4002b98SHong Zhang   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
233*d4002b98SHong Zhang 
234*d4002b98SHong Zhang   ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
235*d4002b98SHong Zhang 
236*d4002b98SHong Zhang   for (row=0; row<m; row++) {
237*d4002b98SHong Zhang     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
238*d4002b98SHong Zhang     ierr = MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
239*d4002b98SHong Zhang     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
240*d4002b98SHong Zhang   }
241*d4002b98SHong Zhang   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
242*d4002b98SHong Zhang   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
243*d4002b98SHong Zhang   B->rmap->bs = A->rmap->bs;
244*d4002b98SHong Zhang 
245*d4002b98SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
246*d4002b98SHong Zhang     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
247*d4002b98SHong Zhang   } else {
248*d4002b98SHong Zhang     *newmat = B;
249*d4002b98SHong Zhang   }
250*d4002b98SHong Zhang   PetscFunctionReturn(0);
251*d4002b98SHong Zhang }
252*d4002b98SHong Zhang 
253*d4002b98SHong Zhang PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy)
254*d4002b98SHong Zhang {
255*d4002b98SHong Zhang   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
256*d4002b98SHong Zhang   PetscScalar       *y;
257*d4002b98SHong Zhang   const PetscScalar *x;
258*d4002b98SHong Zhang   const MatScalar   *aval=a->val;
259*d4002b98SHong Zhang   PetscInt          totalslices=a->totalslices;
260*d4002b98SHong Zhang   const PetscInt    *acolidx=a->colidx;
261*d4002b98SHong Zhang   PetscInt          i,j=0;
262*d4002b98SHong Zhang   PetscErrorCode    ierr;
263*d4002b98SHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
264*d4002b98SHong Zhang   __m512d           vec_x,vec_y,vec_vals;
265*d4002b98SHong Zhang   __m256i           vec_idx;
266*d4002b98SHong Zhang   __mmask8          mask;
267*d4002b98SHong Zhang   __m512d           vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4;
268*d4002b98SHong Zhang   __m256i           vec_idx2,vec_idx3,vec_idx4;
269*d4002b98SHong Zhang #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
270*d4002b98SHong Zhang   __m128d           vec_x_tmp;
271*d4002b98SHong Zhang   __m256d           vec_x,vec_y,vec_y2,vec_vals;
272*d4002b98SHong Zhang   MatScalar         yval;
273*d4002b98SHong Zhang   PetscInt          r,rows_left,row,nnz_in_row;
274*d4002b98SHong Zhang #else
275*d4002b98SHong Zhang   PetscScalar       sum[8];
276*d4002b98SHong Zhang #endif
277*d4002b98SHong Zhang 
278*d4002b98SHong Zhang #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
279*d4002b98SHong Zhang #pragma disjoint(*x,*y,*aval)
280*d4002b98SHong Zhang #endif
281*d4002b98SHong Zhang 
282*d4002b98SHong Zhang   PetscFunctionBegin;
283*d4002b98SHong Zhang   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
284*d4002b98SHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
285*d4002b98SHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
286*d4002b98SHong Zhang   for (i=0; i<totalslices; i++) { /* loop over slices */
287*d4002b98SHong Zhang     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
288*d4002b98SHong Zhang     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
289*d4002b98SHong Zhang 
290*d4002b98SHong Zhang     vec_y  = _mm512_setzero_pd();
291*d4002b98SHong Zhang     vec_y2 = _mm512_setzero_pd();
292*d4002b98SHong Zhang     vec_y3 = _mm512_setzero_pd();
293*d4002b98SHong Zhang     vec_y4 = _mm512_setzero_pd();
294*d4002b98SHong Zhang 
295*d4002b98SHong Zhang     j = a->sliidx[i]>>3; /* index of the bit array; 8 bits are read at each time, corresponding to a slice columnn */
296*d4002b98SHong Zhang     switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) {
297*d4002b98SHong Zhang     case 3:
298*d4002b98SHong Zhang       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
299*d4002b98SHong Zhang       acolidx += 8; aval += 8;
300*d4002b98SHong Zhang       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
301*d4002b98SHong Zhang       acolidx += 8; aval += 8;
302*d4002b98SHong Zhang       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
303*d4002b98SHong Zhang       acolidx += 8; aval += 8;
304*d4002b98SHong Zhang       j += 3;
305*d4002b98SHong Zhang       break;
306*d4002b98SHong Zhang     case 2:
307*d4002b98SHong Zhang       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
308*d4002b98SHong Zhang       acolidx += 8; aval += 8;
309*d4002b98SHong Zhang       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
310*d4002b98SHong Zhang       acolidx += 8; aval += 8;
311*d4002b98SHong Zhang       j += 2;
312*d4002b98SHong Zhang       break;
313*d4002b98SHong Zhang     case 1:
314*d4002b98SHong Zhang       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
315*d4002b98SHong Zhang       acolidx += 8; aval += 8;
316*d4002b98SHong Zhang       j += 1;
317*d4002b98SHong Zhang       break;
318*d4002b98SHong Zhang     }
319*d4002b98SHong Zhang     #pragma novector
320*d4002b98SHong Zhang     for (; j<(a->sliidx[i+1]>>3); j+=4) {
321*d4002b98SHong Zhang       /*
322*d4002b98SHong Zhang       vec_idx  = _mm512_loadunpackhi_epi32(vec_idx,acolidx);
323*d4002b98SHong Zhang       vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval);
324*d4002b98SHong Zhang       */
325*d4002b98SHong Zhang       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
326*d4002b98SHong Zhang       acolidx += 8; aval += 8;
327*d4002b98SHong Zhang       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
328*d4002b98SHong Zhang       acolidx += 8; aval += 8;
329*d4002b98SHong Zhang       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
330*d4002b98SHong Zhang       acolidx += 8; aval += 8;
331*d4002b98SHong Zhang       AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4);
332*d4002b98SHong Zhang       acolidx += 8; aval += 8;
333*d4002b98SHong Zhang     }
334*d4002b98SHong Zhang 
335*d4002b98SHong Zhang     vec_y = _mm512_add_pd(vec_y,vec_y2);
336*d4002b98SHong Zhang     vec_y = _mm512_add_pd(vec_y,vec_y3);
337*d4002b98SHong Zhang     vec_y = _mm512_add_pd(vec_y,vec_y4);
338*d4002b98SHong Zhang     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
339*d4002b98SHong Zhang       mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
340*d4002b98SHong Zhang       _mm512_mask_store_pd(&y[8*i],mask,vec_y);
341*d4002b98SHong Zhang     } else {
342*d4002b98SHong Zhang       _mm512_store_pd(&y[8*i],vec_y);
343*d4002b98SHong Zhang     }
344*d4002b98SHong Zhang   }
345*d4002b98SHong Zhang #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
346*d4002b98SHong Zhang   for (i=0; i<totalslices; i++) { /* loop over full slices */
347*d4002b98SHong Zhang     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
348*d4002b98SHong Zhang     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
349*d4002b98SHong Zhang 
350*d4002b98SHong Zhang     vec_y  = _mm256_setzero_pd();
351*d4002b98SHong Zhang     vec_y2 = _mm256_setzero_pd();
352*d4002b98SHong Zhang 
353*d4002b98SHong Zhang     /* last slice may have padding rows. Don't use vectorization. */
354*d4002b98SHong Zhang     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
355*d4002b98SHong Zhang       rows_left = A->rmap->n - 8*i;
356*d4002b98SHong Zhang       for (r=0; r<rows_left; ++r) {
357*d4002b98SHong Zhang         yval = (MatScalar)0;
358*d4002b98SHong Zhang         row = 8*i + r;
359*d4002b98SHong Zhang         nnz_in_row = a->rlen[row];
360*d4002b98SHong Zhang         for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]];
361*d4002b98SHong Zhang         y[row] = yval;
362*d4002b98SHong Zhang       }
363*d4002b98SHong Zhang       break;
364*d4002b98SHong Zhang     }
365*d4002b98SHong Zhang 
366*d4002b98SHong Zhang     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
367*d4002b98SHong Zhang     for (; j<a->sliidx[i+1]; j+=8) {
368*d4002b98SHong Zhang       vec_vals  = _mm256_loadu_pd(aval);
369*d4002b98SHong Zhang       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
370*d4002b98SHong Zhang       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
371*d4002b98SHong Zhang       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
372*d4002b98SHong Zhang       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
373*d4002b98SHong Zhang       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
374*d4002b98SHong Zhang       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
375*d4002b98SHong Zhang       vec_y     = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y);
376*d4002b98SHong Zhang       aval     += 4;
377*d4002b98SHong Zhang 
378*d4002b98SHong Zhang       vec_vals  = _mm256_loadu_pd(aval);
379*d4002b98SHong Zhang       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
380*d4002b98SHong Zhang       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
381*d4002b98SHong Zhang       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
382*d4002b98SHong Zhang       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
383*d4002b98SHong Zhang       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
384*d4002b98SHong Zhang       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
385*d4002b98SHong Zhang       vec_y2    = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2);
386*d4002b98SHong Zhang       aval     += 4;
387*d4002b98SHong Zhang     }
388*d4002b98SHong Zhang 
389*d4002b98SHong Zhang     _mm256_storeu_pd(y + i*8,     vec_y);
390*d4002b98SHong Zhang     _mm256_storeu_pd(y + i*8 + 4, vec_y2);
391*d4002b98SHong Zhang   }
392*d4002b98SHong Zhang 
393*d4002b98SHong Zhang #else
394*d4002b98SHong Zhang   for (i=0; i<totalslices; i++) { /* loop over slices */
395*d4002b98SHong Zhang     for (j=0; j<8; j++) sum[j] = 0.0;
396*d4002b98SHong Zhang 
397*d4002b98SHong Zhang     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
398*d4002b98SHong Zhang       sum[0] += aval[j]*x[acolidx[j]];
399*d4002b98SHong Zhang       sum[1] += aval[j+1]*x[acolidx[j+1]];
400*d4002b98SHong Zhang       sum[2] += aval[j+2]*x[acolidx[j+2]];
401*d4002b98SHong Zhang       sum[3] += aval[j+3]*x[acolidx[j+3]];
402*d4002b98SHong Zhang       sum[4] += aval[j+4]*x[acolidx[j+4]];
403*d4002b98SHong Zhang       sum[5] += aval[j+5]*x[acolidx[j+5]];
404*d4002b98SHong Zhang       sum[6] += aval[j+6]*x[acolidx[j+6]];
405*d4002b98SHong Zhang       sum[7] += aval[j+7]*x[acolidx[j+7]];
406*d4002b98SHong Zhang     }
407*d4002b98SHong Zhang     if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
408*d4002b98SHong Zhang       for(j=0;j<(A->rmap->n & 0x07);j++) y[8*i+j] = sum[j];
409*d4002b98SHong Zhang     } else {
410*d4002b98SHong Zhang       y[8*i]   = sum[0];
411*d4002b98SHong Zhang       y[8*i+1] = sum[1];
412*d4002b98SHong Zhang       y[8*i+2] = sum[2];
413*d4002b98SHong Zhang       y[8*i+3] = sum[3];
414*d4002b98SHong Zhang       y[8*i+4] = sum[4];
415*d4002b98SHong Zhang       y[8*i+5] = sum[5];
416*d4002b98SHong Zhang       y[8*i+6] = sum[6];
417*d4002b98SHong Zhang       y[8*i+7] = sum[7];
418*d4002b98SHong Zhang     }
419*d4002b98SHong Zhang   }
420*d4002b98SHong Zhang #endif
421*d4002b98SHong Zhang 
422*d4002b98SHong Zhang   ierr = PetscLogFlops(2.0*a->nz-a->nonzerorowcnt);CHKERRQ(ierr); /* theoretical minimal FLOPs */
423*d4002b98SHong Zhang   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
424*d4002b98SHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
425*d4002b98SHong Zhang   PetscFunctionReturn(0);
426*d4002b98SHong Zhang }
427*d4002b98SHong Zhang 
428*d4002b98SHong Zhang #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h>
429*d4002b98SHong Zhang PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz)
430*d4002b98SHong Zhang {
431*d4002b98SHong Zhang   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
432*d4002b98SHong Zhang   PetscScalar       *y,*z;
433*d4002b98SHong Zhang   const PetscScalar *x;
434*d4002b98SHong Zhang   const MatScalar   *aval=a->val;
435*d4002b98SHong Zhang   PetscInt          totalslices=a->totalslices;
436*d4002b98SHong Zhang   const PetscInt    *acolidx=a->colidx;
437*d4002b98SHong Zhang   PetscInt          i,j;
438*d4002b98SHong Zhang   PetscErrorCode    ierr;
439*d4002b98SHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)  && !defined(PETSC_USE_64BIT_INDICES)
440*d4002b98SHong Zhang   __m512d           vec_x,vec_y,vec_z,vec_vals;
441*d4002b98SHong Zhang   __m256i           vec_idx;
442*d4002b98SHong Zhang   __mmask8          mask;
443*d4002b98SHong Zhang #else
444*d4002b98SHong Zhang   PetscScalar       sum[8];
445*d4002b98SHong Zhang #endif
446*d4002b98SHong Zhang 
447*d4002b98SHong Zhang #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
448*d4002b98SHong Zhang #pragma disjoint(*x,*y,*aval)
449*d4002b98SHong Zhang #endif
450*d4002b98SHong Zhang 
451*d4002b98SHong Zhang   PetscFunctionBegin;
452*d4002b98SHong Zhang   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
453*d4002b98SHong Zhang   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
454*d4002b98SHong Zhang   for (i=0; i<totalslices; i++) { /* loop over slices */
455*d4002b98SHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
456*d4002b98SHong Zhang     vec_y = _mm512_load_pd(&y[8*i]);
457*d4002b98SHong Zhang     #pragma novector
458*d4002b98SHong Zhang     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
459*d4002b98SHong Zhang       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
460*d4002b98SHong Zhang       acolidx += 8; aval += 8;
461*d4002b98SHong Zhang     }
462*d4002b98SHong Zhang     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
463*d4002b98SHong Zhang       mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
464*d4002b98SHong Zhang       _mm512_mask_store_pd(&z[8*i],mask,vec_y);
465*d4002b98SHong Zhang     } else {
466*d4002b98SHong Zhang       _mm512_store_pd(&z[8*i],vec_y);
467*d4002b98SHong Zhang     }
468*d4002b98SHong Zhang #else
469*d4002b98SHong Zhang     sum[0] = y[8*i];
470*d4002b98SHong Zhang     sum[1] = y[8*i+1];
471*d4002b98SHong Zhang     sum[2] = y[8*i+2];
472*d4002b98SHong Zhang     sum[3] = y[8*i+3];
473*d4002b98SHong Zhang     sum[4] = y[8*i+4];
474*d4002b98SHong Zhang     sum[5] = y[8*i+5];
475*d4002b98SHong Zhang     sum[6] = y[8*i+6];
476*d4002b98SHong Zhang     sum[7] = y[8*i+7];
477*d4002b98SHong Zhang     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
478*d4002b98SHong Zhang       sum[0] += aval[j]*x[acolidx[j]];
479*d4002b98SHong Zhang       sum[1] += aval[j+1]*x[acolidx[j+1]];
480*d4002b98SHong Zhang       sum[2] += aval[j+2]*x[acolidx[j+2]];
481*d4002b98SHong Zhang       sum[3] += aval[j+3]*x[acolidx[j+3]];
482*d4002b98SHong Zhang       sum[4] += aval[j+4]*x[acolidx[j+4]];
483*d4002b98SHong Zhang       sum[5] += aval[j+5]*x[acolidx[j+5]];
484*d4002b98SHong Zhang       sum[6] += aval[j+6]*x[acolidx[j+6]];
485*d4002b98SHong Zhang       sum[7] += aval[j+7]*x[acolidx[j+7]];
486*d4002b98SHong Zhang     }
487*d4002b98SHong Zhang     if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
488*d4002b98SHong Zhang       for(j=0;j<(A->rmap->n & 0x07);j++) z[8*i+j] = sum[j];
489*d4002b98SHong Zhang     } else {
490*d4002b98SHong Zhang       z[8*i]   = sum[0];
491*d4002b98SHong Zhang       z[8*i+1] = sum[1];
492*d4002b98SHong Zhang       z[8*i+2] = sum[2];
493*d4002b98SHong Zhang       z[8*i+3] = sum[3];
494*d4002b98SHong Zhang       z[8*i+4] = sum[4];
495*d4002b98SHong Zhang       z[8*i+5] = sum[5];
496*d4002b98SHong Zhang       z[8*i+6] = sum[6];
497*d4002b98SHong Zhang       z[8*i+7] = sum[7];
498*d4002b98SHong Zhang     }
499*d4002b98SHong Zhang #endif
500*d4002b98SHong Zhang   }
501*d4002b98SHong Zhang 
502*d4002b98SHong Zhang   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
503*d4002b98SHong Zhang   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
504*d4002b98SHong Zhang   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
505*d4002b98SHong Zhang   PetscFunctionReturn(0);
506*d4002b98SHong Zhang }
507*d4002b98SHong Zhang 
508*d4002b98SHong Zhang PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy)
509*d4002b98SHong Zhang {
510*d4002b98SHong Zhang   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
511*d4002b98SHong Zhang   PetscScalar       *y;
512*d4002b98SHong Zhang   const PetscScalar *x;
513*d4002b98SHong Zhang   const MatScalar   *aval=a->val;
514*d4002b98SHong Zhang   const PetscInt    *acolidx=a->colidx;
515*d4002b98SHong Zhang   PetscInt          i,j,row;
516*d4002b98SHong Zhang   PetscBool         isnonzero;
517*d4002b98SHong Zhang   PetscErrorCode    ierr;
518*d4002b98SHong Zhang 
519*d4002b98SHong Zhang #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
520*d4002b98SHong Zhang #pragma disjoint(*x,*y,*aval)
521*d4002b98SHong Zhang #endif
522*d4002b98SHong Zhang 
523*d4002b98SHong Zhang   PetscFunctionBegin;
524*d4002b98SHong Zhang   if (zz != yy) { ierr = VecCopy(zz,yy);CHKERRQ(ierr); }
525*d4002b98SHong Zhang   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
526*d4002b98SHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
527*d4002b98SHong Zhang   for (i=0; i<a->totalslices; i++) { /* loop over slices */
528*d4002b98SHong Zhang     for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
529*d4002b98SHong Zhang       isnonzero = (PetscBool)((j-a->sliidx[i])/8 < a->rlen[8*i+row]);
530*d4002b98SHong Zhang       if (isnonzero) y[acolidx[j]] += aval[j]*x[8*i+row];
531*d4002b98SHong Zhang     }
532*d4002b98SHong Zhang   }
533*d4002b98SHong Zhang   ierr = PetscLogFlops(2.0*a->sliidx[a->totalslices]);CHKERRQ(ierr);
534*d4002b98SHong Zhang   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
535*d4002b98SHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
536*d4002b98SHong Zhang   PetscFunctionReturn(0);
537*d4002b98SHong Zhang }
538*d4002b98SHong Zhang 
539*d4002b98SHong Zhang PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy)
540*d4002b98SHong Zhang {
541*d4002b98SHong Zhang   PetscErrorCode ierr;
542*d4002b98SHong Zhang 
543*d4002b98SHong Zhang   PetscFunctionBegin;
544*d4002b98SHong Zhang   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
545*d4002b98SHong Zhang   ierr = MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);CHKERRQ(ierr);
546*d4002b98SHong Zhang   PetscFunctionReturn(0);
547*d4002b98SHong Zhang }
548*d4002b98SHong Zhang 
549*d4002b98SHong Zhang /*
550*d4002b98SHong Zhang      Checks for missing diagonals
551*d4002b98SHong Zhang */
552*d4002b98SHong Zhang PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool  *missing,PetscInt *d)
553*d4002b98SHong Zhang {
554*d4002b98SHong Zhang   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
555*d4002b98SHong Zhang   PetscInt    *diag,i;
556*d4002b98SHong Zhang 
557*d4002b98SHong Zhang   PetscFunctionBegin;
558*d4002b98SHong Zhang   *missing = PETSC_FALSE;
559*d4002b98SHong Zhang   if (A->rmap->n > 0 && !(a->colidx)) {
560*d4002b98SHong Zhang     *missing = PETSC_TRUE;
561*d4002b98SHong Zhang     if (d) *d = 0;
562*d4002b98SHong Zhang     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
563*d4002b98SHong Zhang   } else {
564*d4002b98SHong Zhang     diag = a->diag;
565*d4002b98SHong Zhang     for (i=0; i<A->rmap->n; i++) {
566*d4002b98SHong Zhang       if (diag[i] == -1) {
567*d4002b98SHong Zhang         *missing = PETSC_TRUE;
568*d4002b98SHong Zhang         if (d) *d = i;
569*d4002b98SHong Zhang         PetscInfo1(A,"Matrix is missing diagonal number %D\n",i);
570*d4002b98SHong Zhang         break;
571*d4002b98SHong Zhang       }
572*d4002b98SHong Zhang     }
573*d4002b98SHong Zhang   }
574*d4002b98SHong Zhang   PetscFunctionReturn(0);
575*d4002b98SHong Zhang }
576*d4002b98SHong Zhang 
577*d4002b98SHong Zhang PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A)
578*d4002b98SHong Zhang {
579*d4002b98SHong Zhang   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
580*d4002b98SHong Zhang   PetscInt       i,j,m=A->rmap->n,shift;
581*d4002b98SHong Zhang   PetscErrorCode ierr;
582*d4002b98SHong Zhang 
583*d4002b98SHong Zhang   PetscFunctionBegin;
584*d4002b98SHong Zhang   if (!a->diag) {
585*d4002b98SHong Zhang     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
586*d4002b98SHong Zhang     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
587*d4002b98SHong Zhang     a->free_diag = PETSC_TRUE;
588*d4002b98SHong Zhang   }
589*d4002b98SHong Zhang   for (i=0; i<m; i++) { /* loop over rows */
590*d4002b98SHong Zhang     shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
591*d4002b98SHong Zhang     a->diag[i] = -1;
592*d4002b98SHong Zhang     for (j=0; j<a->rlen[i]; j++) {
593*d4002b98SHong Zhang       if (a->colidx[shift+j*8] == i) {
594*d4002b98SHong Zhang         a->diag[i] = shift+j*8;
595*d4002b98SHong Zhang         break;
596*d4002b98SHong Zhang       }
597*d4002b98SHong Zhang     }
598*d4002b98SHong Zhang   }
599*d4002b98SHong Zhang   PetscFunctionReturn(0);
600*d4002b98SHong Zhang }
601*d4002b98SHong Zhang 
602*d4002b98SHong Zhang /*
603*d4002b98SHong Zhang   Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways
604*d4002b98SHong Zhang */
605*d4002b98SHong Zhang PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift)
606*d4002b98SHong Zhang {
607*d4002b98SHong Zhang   Mat_SeqSELL    *a=(Mat_SeqSELL*) A->data;
608*d4002b98SHong Zhang   PetscInt       i,*diag,m = A->rmap->n;
609*d4002b98SHong Zhang   MatScalar      *val = a->val;
610*d4002b98SHong Zhang   PetscScalar    *idiag,*mdiag;
611*d4002b98SHong Zhang   PetscErrorCode ierr;
612*d4002b98SHong Zhang 
613*d4002b98SHong Zhang   PetscFunctionBegin;
614*d4002b98SHong Zhang   if (a->idiagvalid) PetscFunctionReturn(0);
615*d4002b98SHong Zhang   ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr);
616*d4002b98SHong Zhang   diag = a->diag;
617*d4002b98SHong Zhang   if (!a->idiag) {
618*d4002b98SHong Zhang     ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr);
619*d4002b98SHong Zhang     ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr);
620*d4002b98SHong Zhang     val  = a->val;
621*d4002b98SHong Zhang   }
622*d4002b98SHong Zhang   mdiag = a->mdiag;
623*d4002b98SHong Zhang   idiag = a->idiag;
624*d4002b98SHong Zhang 
625*d4002b98SHong Zhang   if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) {
626*d4002b98SHong Zhang     for (i=0; i<m; i++) {
627*d4002b98SHong Zhang       mdiag[i] = val[diag[i]];
628*d4002b98SHong Zhang       if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */
629*d4002b98SHong Zhang         if (PetscRealPart(fshift)) {
630*d4002b98SHong Zhang           ierr = PetscInfo1(A,"Zero diagonal on row %D\n",i);CHKERRQ(ierr);
631*d4002b98SHong Zhang           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
632*d4002b98SHong Zhang           A->factorerror_zeropivot_value = 0.0;
633*d4002b98SHong Zhang           A->factorerror_zeropivot_row   = i;
634*d4002b98SHong Zhang         } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i);
635*d4002b98SHong Zhang       }
636*d4002b98SHong Zhang       idiag[i] = 1.0/val[diag[i]];
637*d4002b98SHong Zhang     }
638*d4002b98SHong Zhang     ierr = PetscLogFlops(m);CHKERRQ(ierr);
639*d4002b98SHong Zhang   } else {
640*d4002b98SHong Zhang     for (i=0; i<m; i++) {
641*d4002b98SHong Zhang       mdiag[i] = val[diag[i]];
642*d4002b98SHong Zhang       idiag[i] = omega/(fshift + val[diag[i]]);
643*d4002b98SHong Zhang     }
644*d4002b98SHong Zhang     ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr);
645*d4002b98SHong Zhang   }
646*d4002b98SHong Zhang   a->idiagvalid = PETSC_TRUE;
647*d4002b98SHong Zhang   PetscFunctionReturn(0);
648*d4002b98SHong Zhang }
649*d4002b98SHong Zhang 
650*d4002b98SHong Zhang PetscErrorCode MatZeroEntries_SeqSELL(Mat A)
651*d4002b98SHong Zhang {
652*d4002b98SHong Zhang   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
653*d4002b98SHong Zhang   PetscErrorCode ierr;
654*d4002b98SHong Zhang 
655*d4002b98SHong Zhang   PetscFunctionBegin;
656*d4002b98SHong Zhang   ierr = PetscMemzero(a->val,(a->sliidx[a->totalslices])*sizeof(PetscScalar));CHKERRQ(ierr);
657*d4002b98SHong Zhang   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
658*d4002b98SHong Zhang   PetscFunctionReturn(0);
659*d4002b98SHong Zhang }
660*d4002b98SHong Zhang 
661*d4002b98SHong Zhang PetscErrorCode MatDestroy_SeqSELL(Mat A)
662*d4002b98SHong Zhang {
663*d4002b98SHong Zhang   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
664*d4002b98SHong Zhang   PetscErrorCode ierr;
665*d4002b98SHong Zhang 
666*d4002b98SHong Zhang   PetscFunctionBegin;
667*d4002b98SHong Zhang #if defined(PETSC_USE_LOG)
668*d4002b98SHong Zhang   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz);
669*d4002b98SHong Zhang #endif
670*d4002b98SHong Zhang   ierr = MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);CHKERRQ(ierr);
671*d4002b98SHong Zhang   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
672*d4002b98SHong Zhang   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
673*d4002b98SHong Zhang   ierr = PetscFree(a->diag);CHKERRQ(ierr);
674*d4002b98SHong Zhang   ierr = PetscFree(a->ibdiag);CHKERRQ(ierr);
675*d4002b98SHong Zhang   ierr = PetscFree(a->rlen);CHKERRQ(ierr);
676*d4002b98SHong Zhang   ierr = PetscFree(a->sliidx);CHKERRQ(ierr);
677*d4002b98SHong Zhang   ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr);
678*d4002b98SHong Zhang   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
679*d4002b98SHong Zhang   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
680*d4002b98SHong Zhang   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
681*d4002b98SHong Zhang 
682*d4002b98SHong Zhang   ierr = PetscFree(A->data);CHKERRQ(ierr);
683*d4002b98SHong Zhang 
684*d4002b98SHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
685*d4002b98SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
686*d4002b98SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
687*d4002b98SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
688*d4002b98SHong Zhang #endif
689*d4002b98SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);CHKERRQ(ierr);
690*d4002b98SHong Zhang   PetscFunctionReturn(0);
691*d4002b98SHong Zhang }
692*d4002b98SHong Zhang 
693*d4002b98SHong Zhang PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg)
694*d4002b98SHong Zhang {
695*d4002b98SHong Zhang   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
696*d4002b98SHong Zhang   PetscErrorCode ierr;
697*d4002b98SHong Zhang 
698*d4002b98SHong Zhang   PetscFunctionBegin;
699*d4002b98SHong Zhang   switch (op) {
700*d4002b98SHong Zhang   case MAT_ROW_ORIENTED:
701*d4002b98SHong Zhang     a->roworiented = flg;
702*d4002b98SHong Zhang     break;
703*d4002b98SHong Zhang   case MAT_KEEP_NONZERO_PATTERN:
704*d4002b98SHong Zhang     a->keepnonzeropattern = flg;
705*d4002b98SHong Zhang     break;
706*d4002b98SHong Zhang   case MAT_NEW_NONZERO_LOCATIONS:
707*d4002b98SHong Zhang     a->nonew = (flg ? 0 : 1);
708*d4002b98SHong Zhang     break;
709*d4002b98SHong Zhang   case MAT_NEW_NONZERO_LOCATION_ERR:
710*d4002b98SHong Zhang     a->nonew = (flg ? -1 : 0);
711*d4002b98SHong Zhang     break;
712*d4002b98SHong Zhang   case MAT_NEW_NONZERO_ALLOCATION_ERR:
713*d4002b98SHong Zhang     a->nonew = (flg ? -2 : 0);
714*d4002b98SHong Zhang     break;
715*d4002b98SHong Zhang   case MAT_UNUSED_NONZERO_LOCATION_ERR:
716*d4002b98SHong Zhang     a->nounused = (flg ? -1 : 0);
717*d4002b98SHong Zhang     break;
718*d4002b98SHong Zhang   case MAT_NEW_DIAGONALS:
719*d4002b98SHong Zhang   case MAT_IGNORE_OFF_PROC_ENTRIES:
720*d4002b98SHong Zhang   case MAT_USE_HASH_TABLE:
721*d4002b98SHong Zhang     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
722*d4002b98SHong Zhang     break;
723*d4002b98SHong Zhang   case MAT_SPD:
724*d4002b98SHong Zhang   case MAT_SYMMETRIC:
725*d4002b98SHong Zhang   case MAT_STRUCTURALLY_SYMMETRIC:
726*d4002b98SHong Zhang   case MAT_HERMITIAN:
727*d4002b98SHong Zhang   case MAT_SYMMETRY_ETERNAL:
728*d4002b98SHong Zhang     /* These options are handled directly by MatSetOption() */
729*d4002b98SHong Zhang     break;
730*d4002b98SHong Zhang   default:
731*d4002b98SHong Zhang     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
732*d4002b98SHong Zhang   }
733*d4002b98SHong Zhang   PetscFunctionReturn(0);
734*d4002b98SHong Zhang }
735*d4002b98SHong Zhang 
736*d4002b98SHong Zhang PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v)
737*d4002b98SHong Zhang {
738*d4002b98SHong Zhang   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
739*d4002b98SHong Zhang   PetscInt       i,j,n,shift;
740*d4002b98SHong Zhang   PetscScalar    *x,zero=0.0;
741*d4002b98SHong Zhang   PetscErrorCode ierr;
742*d4002b98SHong Zhang 
743*d4002b98SHong Zhang   PetscFunctionBegin;
744*d4002b98SHong Zhang   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
745*d4002b98SHong Zhang   if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
746*d4002b98SHong Zhang 
747*d4002b98SHong Zhang   if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) {
748*d4002b98SHong Zhang     PetscInt *diag=a->diag;
749*d4002b98SHong Zhang     ierr = VecGetArray(v,&x);CHKERRQ(ierr);
750*d4002b98SHong Zhang     for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]];
751*d4002b98SHong Zhang     ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
752*d4002b98SHong Zhang     PetscFunctionReturn(0);
753*d4002b98SHong Zhang   }
754*d4002b98SHong Zhang 
755*d4002b98SHong Zhang   ierr = VecSet(v,zero);CHKERRQ(ierr);
756*d4002b98SHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
757*d4002b98SHong Zhang   for (i=0; i<n; i++) { /* loop over rows */
758*d4002b98SHong Zhang     shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
759*d4002b98SHong Zhang     x[i] = 0;
760*d4002b98SHong Zhang     for (j=0; j<a->rlen[i]; j++) {
761*d4002b98SHong Zhang       if (a->colidx[shift+j*8] == i) {
762*d4002b98SHong Zhang         x[i] = a->val[shift+j*8];
763*d4002b98SHong Zhang         break;
764*d4002b98SHong Zhang       }
765*d4002b98SHong Zhang     }
766*d4002b98SHong Zhang   }
767*d4002b98SHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
768*d4002b98SHong Zhang   PetscFunctionReturn(0);
769*d4002b98SHong Zhang }
770*d4002b98SHong Zhang 
771*d4002b98SHong Zhang PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr)
772*d4002b98SHong Zhang {
773*d4002b98SHong Zhang   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
774*d4002b98SHong Zhang   const PetscScalar *l,*r;
775*d4002b98SHong Zhang   PetscInt          i,j,m,n,row;
776*d4002b98SHong Zhang   PetscErrorCode    ierr;
777*d4002b98SHong Zhang 
778*d4002b98SHong Zhang   PetscFunctionBegin;
779*d4002b98SHong Zhang   if (ll) {
780*d4002b98SHong Zhang     /* The local size is used so that VecMPI can be passed to this routine
781*d4002b98SHong Zhang        by MatDiagonalScale_MPISELL */
782*d4002b98SHong Zhang     ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr);
783*d4002b98SHong Zhang     if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
784*d4002b98SHong Zhang     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
785*d4002b98SHong Zhang     for (i=0; i<a->totalslices; i++) { /* loop over slices */
786*d4002b98SHong Zhang       for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
787*d4002b98SHong Zhang         a->val[j] *= l[8*i+row];
788*d4002b98SHong Zhang       }
789*d4002b98SHong Zhang     }
790*d4002b98SHong Zhang     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
791*d4002b98SHong Zhang     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
792*d4002b98SHong Zhang   }
793*d4002b98SHong Zhang   if (rr) {
794*d4002b98SHong Zhang     ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr);
795*d4002b98SHong Zhang     if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
796*d4002b98SHong Zhang     ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr);
797*d4002b98SHong Zhang     for (i=0; i<a->totalslices; i++) { /* loop over slices */
798*d4002b98SHong Zhang       for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) {
799*d4002b98SHong Zhang         a->val[j] *= r[a->colidx[j]];
800*d4002b98SHong Zhang       }
801*d4002b98SHong Zhang     }
802*d4002b98SHong Zhang     ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr);
803*d4002b98SHong Zhang     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
804*d4002b98SHong Zhang   }
805*d4002b98SHong Zhang   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
806*d4002b98SHong Zhang   PetscFunctionReturn(0);
807*d4002b98SHong Zhang }
808*d4002b98SHong Zhang 
809*d4002b98SHong Zhang extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
810*d4002b98SHong Zhang 
811*d4002b98SHong Zhang PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
812*d4002b98SHong Zhang {
813*d4002b98SHong Zhang   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
814*d4002b98SHong Zhang   PetscInt    *cp,i,k,low,high,t,row,col,l;
815*d4002b98SHong Zhang   PetscInt    shift;
816*d4002b98SHong Zhang   MatScalar   *vp;
817*d4002b98SHong Zhang 
818*d4002b98SHong Zhang   PetscFunctionBegin;
819*d4002b98SHong Zhang   for (k=0; k<m; k++) { /* loop over added rows */
820*d4002b98SHong Zhang     row = im[k];
821*d4002b98SHong Zhang     if (row<0) continue;
822*d4002b98SHong Zhang #if defined(PETSC_USE_DEBUG)
823*d4002b98SHong Zhang     if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
824*d4002b98SHong Zhang #endif
825*d4002b98SHong Zhang     shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
826*d4002b98SHong Zhang     cp = a->colidx+shift; /* pointer to the row */
827*d4002b98SHong Zhang     vp = a->val+shift; /* pointer to the row */
828*d4002b98SHong Zhang     for (l=0; l<n; l++) { /* loop over added rows */
829*d4002b98SHong Zhang       col = in[l];
830*d4002b98SHong Zhang       if (col<0) continue;
831*d4002b98SHong Zhang #if defined(PETSC_USE_DEBUG)
832*d4002b98SHong Zhang       if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1);
833*d4002b98SHong Zhang #endif
834*d4002b98SHong Zhang       high = a->rlen[row]; low = 0; /* assume unsorted */
835*d4002b98SHong Zhang       while (high-low > 5) {
836*d4002b98SHong Zhang         t = (low+high)/2;
837*d4002b98SHong Zhang         if (*(cp+t*8) > col) high = t;
838*d4002b98SHong Zhang         else low = t;
839*d4002b98SHong Zhang       }
840*d4002b98SHong Zhang       for (i=low; i<high; i++) {
841*d4002b98SHong Zhang         if (*(cp+8*i) > col) break;
842*d4002b98SHong Zhang         if (*(cp+8*i) == col) {
843*d4002b98SHong Zhang           *v++ = *(vp+8*i);
844*d4002b98SHong Zhang           goto finished;
845*d4002b98SHong Zhang         }
846*d4002b98SHong Zhang       }
847*d4002b98SHong Zhang       *v++ = 0.0;
848*d4002b98SHong Zhang     finished:;
849*d4002b98SHong Zhang     }
850*d4002b98SHong Zhang   }
851*d4002b98SHong Zhang   PetscFunctionReturn(0);
852*d4002b98SHong Zhang }
853*d4002b98SHong Zhang 
854*d4002b98SHong Zhang PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer)
855*d4002b98SHong Zhang {
856*d4002b98SHong Zhang   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
857*d4002b98SHong Zhang   PetscInt          i,j,m=A->rmap->n,shift;
858*d4002b98SHong Zhang   const char        *name;
859*d4002b98SHong Zhang   PetscViewerFormat format;
860*d4002b98SHong Zhang   PetscErrorCode    ierr;
861*d4002b98SHong Zhang 
862*d4002b98SHong Zhang   PetscFunctionBegin;
863*d4002b98SHong Zhang   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
864*d4002b98SHong Zhang   if (format == PETSC_VIEWER_ASCII_MATLAB) {
865*d4002b98SHong Zhang     PetscInt nofinalvalue = 0;
866*d4002b98SHong Zhang     /*
867*d4002b98SHong Zhang     if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) {
868*d4002b98SHong Zhang       nofinalvalue = 1;
869*d4002b98SHong Zhang     }
870*d4002b98SHong Zhang     */
871*d4002b98SHong Zhang     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
872*d4002b98SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);CHKERRQ(ierr);
873*d4002b98SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);CHKERRQ(ierr);
874*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX)
875*d4002b98SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);CHKERRQ(ierr);
876*d4002b98SHong Zhang #else
877*d4002b98SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);CHKERRQ(ierr);
878*d4002b98SHong Zhang #endif
879*d4002b98SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr);
880*d4002b98SHong Zhang 
881*d4002b98SHong Zhang     for (i=0; i<m; i++) {
882*d4002b98SHong Zhang       shift = a->sliidx[i>>3]+(i&0x07);
883*d4002b98SHong Zhang       for (j=0; j<a->rlen[i]; j++) {
884*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX)
885*d4002b98SHong Zhang         ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
886*d4002b98SHong Zhang #else
887*d4002b98SHong Zhang         ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);CHKERRQ(ierr);
888*d4002b98SHong Zhang #endif
889*d4002b98SHong Zhang       }
890*d4002b98SHong Zhang     }
891*d4002b98SHong Zhang     /*
892*d4002b98SHong Zhang     if (nofinalvalue) {
893*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX)
894*d4002b98SHong Zhang       ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr);
895*d4002b98SHong Zhang #else
896*d4002b98SHong Zhang       ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr);
897*d4002b98SHong Zhang #endif
898*d4002b98SHong Zhang     }
899*d4002b98SHong Zhang     */
900*d4002b98SHong Zhang     ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
901*d4002b98SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr);
902*d4002b98SHong Zhang     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
903*d4002b98SHong Zhang   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
904*d4002b98SHong Zhang     PetscFunctionReturn(0);
905*d4002b98SHong Zhang   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
906*d4002b98SHong Zhang     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
907*d4002b98SHong Zhang     for (i=0; i<m; i++) {
908*d4002b98SHong Zhang       ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr);
909*d4002b98SHong Zhang       shift = a->sliidx[i>>3]+(i&0x07);
910*d4002b98SHong Zhang       for (j=0; j<a->rlen[i]; j++) {
911*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX)
912*d4002b98SHong Zhang         if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
913*d4002b98SHong Zhang           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
914*d4002b98SHong Zhang         } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
915*d4002b98SHong Zhang           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
916*d4002b98SHong Zhang         } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) {
917*d4002b98SHong Zhang           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr);
918*d4002b98SHong Zhang         }
919*d4002b98SHong Zhang #else
920*d4002b98SHong Zhang         if (a->val[shift+8*j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);}
921*d4002b98SHong Zhang #endif
922*d4002b98SHong Zhang       }
923*d4002b98SHong Zhang       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
924*d4002b98SHong Zhang     }
925*d4002b98SHong Zhang     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
926*d4002b98SHong Zhang   } else if (format == PETSC_VIEWER_ASCII_DENSE) {
927*d4002b98SHong Zhang     PetscInt    cnt=0,jcnt;
928*d4002b98SHong Zhang     PetscScalar value;
929*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX)
930*d4002b98SHong Zhang     PetscBool   realonly=PETSC_TRUE;
931*d4002b98SHong Zhang     for (i=0; i<a->sliidx[a->totalslices]; i++) {
932*d4002b98SHong Zhang       if (PetscImaginaryPart(a->val[i]) != 0.0) {
933*d4002b98SHong Zhang         realonly = PETSC_FALSE;
934*d4002b98SHong Zhang         break;
935*d4002b98SHong Zhang       }
936*d4002b98SHong Zhang     }
937*d4002b98SHong Zhang #endif
938*d4002b98SHong Zhang 
939*d4002b98SHong Zhang     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
940*d4002b98SHong Zhang     for (i=0; i<m; i++) {
941*d4002b98SHong Zhang       jcnt = 0;
942*d4002b98SHong Zhang       shift = a->sliidx[i>>3]+(i&0x07);
943*d4002b98SHong Zhang       for (j=0; j<A->cmap->n; j++) {
944*d4002b98SHong Zhang         if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) {
945*d4002b98SHong Zhang           value = a->val[cnt++];
946*d4002b98SHong Zhang           jcnt++;
947*d4002b98SHong Zhang         } else {
948*d4002b98SHong Zhang           value = 0.0;
949*d4002b98SHong Zhang         }
950*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX)
951*d4002b98SHong Zhang         if (realonly) {
952*d4002b98SHong Zhang           ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr);
953*d4002b98SHong Zhang         } else {
954*d4002b98SHong Zhang           ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr);
955*d4002b98SHong Zhang         }
956*d4002b98SHong Zhang #else
957*d4002b98SHong Zhang         ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr);
958*d4002b98SHong Zhang #endif
959*d4002b98SHong Zhang       }
960*d4002b98SHong Zhang       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
961*d4002b98SHong Zhang     }
962*d4002b98SHong Zhang     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
963*d4002b98SHong Zhang   } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) {
964*d4002b98SHong Zhang     PetscInt fshift=1;
965*d4002b98SHong Zhang     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
966*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX)
967*d4002b98SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr);
968*d4002b98SHong Zhang #else
969*d4002b98SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr);
970*d4002b98SHong Zhang #endif
971*d4002b98SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);CHKERRQ(ierr);
972*d4002b98SHong Zhang     for (i=0; i<m; i++) {
973*d4002b98SHong Zhang       shift = a->sliidx[i>>3]+(i&0x07);
974*d4002b98SHong Zhang       for (j=0; j<a->rlen[i]; j++) {
975*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX)
976*d4002b98SHong Zhang         ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
977*d4002b98SHong Zhang #else
978*d4002b98SHong Zhang         ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);CHKERRQ(ierr);
979*d4002b98SHong Zhang #endif
980*d4002b98SHong Zhang       }
981*d4002b98SHong Zhang     }
982*d4002b98SHong Zhang     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
983*d4002b98SHong Zhang   } else {
984*d4002b98SHong Zhang     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
985*d4002b98SHong Zhang     if (A->factortype) {
986*d4002b98SHong Zhang       for (i=0; i<m; i++) {
987*d4002b98SHong Zhang         shift = a->sliidx[i>>3]+(i&0x07);
988*d4002b98SHong Zhang         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr);
989*d4002b98SHong Zhang         /* L part */
990*d4002b98SHong Zhang         for (j=shift; j<a->diag[i]; j+=8) {
991*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX)
992*d4002b98SHong Zhang           if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) {
993*d4002b98SHong Zhang             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr);
994*d4002b98SHong Zhang           } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) {
995*d4002b98SHong Zhang             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr);
996*d4002b98SHong Zhang           } else {
997*d4002b98SHong Zhang             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr);
998*d4002b98SHong Zhang           }
999*d4002b98SHong Zhang #else
1000*d4002b98SHong Zhang           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr);
1001*d4002b98SHong Zhang #endif
1002*d4002b98SHong Zhang         }
1003*d4002b98SHong Zhang         /* diagonal */
1004*d4002b98SHong Zhang         j = a->diag[i];
1005*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX)
1006*d4002b98SHong Zhang         if (PetscImaginaryPart(a->val[j]) > 0.0) {
1007*d4002b98SHong Zhang           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));CHKERRQ(ierr);
1008*d4002b98SHong Zhang         } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1009*d4002b98SHong Zhang           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));CHKERRQ(ierr);
1010*d4002b98SHong Zhang         } else {
1011*d4002b98SHong Zhang           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));CHKERRQ(ierr);
1012*d4002b98SHong Zhang         }
1013*d4002b98SHong Zhang #else
1014*d4002b98SHong Zhang         ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));CHKERRQ(ierr);
1015*d4002b98SHong Zhang #endif
1016*d4002b98SHong Zhang 
1017*d4002b98SHong Zhang         /* U part */
1018*d4002b98SHong Zhang         for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) {
1019*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX)
1020*d4002b98SHong Zhang           if (PetscImaginaryPart(a->val[j]) > 0.0) {
1021*d4002b98SHong Zhang             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr);
1022*d4002b98SHong Zhang           } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1023*d4002b98SHong Zhang             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr);
1024*d4002b98SHong Zhang           } else {
1025*d4002b98SHong Zhang             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr);
1026*d4002b98SHong Zhang           }
1027*d4002b98SHong Zhang #else
1028*d4002b98SHong Zhang           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr);
1029*d4002b98SHong Zhang #endif
1030*d4002b98SHong Zhang         }
1031*d4002b98SHong Zhang         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1032*d4002b98SHong Zhang       }
1033*d4002b98SHong Zhang     } else {
1034*d4002b98SHong Zhang       for (i=0; i<m; i++) {
1035*d4002b98SHong Zhang         shift = a->sliidx[i>>3]+(i&0x07);
1036*d4002b98SHong Zhang         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr);
1037*d4002b98SHong Zhang         for (j=0; j<a->rlen[i]; j++) {
1038*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX)
1039*d4002b98SHong Zhang           if (PetscImaginaryPart(a->val[j]) > 0.0) {
1040*d4002b98SHong Zhang             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1041*d4002b98SHong Zhang           } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1042*d4002b98SHong Zhang             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1043*d4002b98SHong Zhang           } else {
1044*d4002b98SHong Zhang             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr);
1045*d4002b98SHong Zhang           }
1046*d4002b98SHong Zhang #else
1047*d4002b98SHong Zhang           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);
1048*d4002b98SHong Zhang #endif
1049*d4002b98SHong Zhang         }
1050*d4002b98SHong Zhang         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1051*d4002b98SHong Zhang       }
1052*d4002b98SHong Zhang     }
1053*d4002b98SHong Zhang     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1054*d4002b98SHong Zhang   }
1055*d4002b98SHong Zhang   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1056*d4002b98SHong Zhang   PetscFunctionReturn(0);
1057*d4002b98SHong Zhang }
1058*d4002b98SHong Zhang 
1059*d4002b98SHong Zhang #include <petscdraw.h>
1060*d4002b98SHong Zhang PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa)
1061*d4002b98SHong Zhang {
1062*d4002b98SHong Zhang   Mat               A=(Mat)Aa;
1063*d4002b98SHong Zhang   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
1064*d4002b98SHong Zhang   PetscInt          i,j,m=A->rmap->n,shift;
1065*d4002b98SHong Zhang   int               color;
1066*d4002b98SHong Zhang   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1067*d4002b98SHong Zhang   PetscViewer       viewer;
1068*d4002b98SHong Zhang   PetscViewerFormat format;
1069*d4002b98SHong Zhang   PetscErrorCode    ierr;
1070*d4002b98SHong Zhang 
1071*d4002b98SHong Zhang   PetscFunctionBegin;
1072*d4002b98SHong Zhang   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1073*d4002b98SHong Zhang   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1074*d4002b98SHong Zhang   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1075*d4002b98SHong Zhang 
1076*d4002b98SHong Zhang   /* loop over matrix elements drawing boxes */
1077*d4002b98SHong Zhang 
1078*d4002b98SHong Zhang   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1079*d4002b98SHong Zhang     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1080*d4002b98SHong Zhang     /* Blue for negative, Cyan for zero and  Red for positive */
1081*d4002b98SHong Zhang     color = PETSC_DRAW_BLUE;
1082*d4002b98SHong Zhang     for (i=0; i<m; i++) {
1083*d4002b98SHong Zhang       shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1084*d4002b98SHong Zhang       y_l = m - i - 1.0; y_r = y_l + 1.0;
1085*d4002b98SHong Zhang       for (j=0; j<a->rlen[i]; j++) {
1086*d4002b98SHong Zhang         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1087*d4002b98SHong Zhang         if (PetscRealPart(a->val[shift+8*j]) >=  0.) continue;
1088*d4002b98SHong Zhang         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1089*d4002b98SHong Zhang       }
1090*d4002b98SHong Zhang     }
1091*d4002b98SHong Zhang     color = PETSC_DRAW_CYAN;
1092*d4002b98SHong Zhang     for (i=0; i<m; i++) {
1093*d4002b98SHong Zhang       shift = a->sliidx[i>>3]+(i&0x07);
1094*d4002b98SHong Zhang       y_l = m - i - 1.0; y_r = y_l + 1.0;
1095*d4002b98SHong Zhang       for (j=0; j<a->rlen[i]; j++) {
1096*d4002b98SHong Zhang         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1097*d4002b98SHong Zhang         if (a->val[shift+8*j] !=  0.) continue;
1098*d4002b98SHong Zhang         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1099*d4002b98SHong Zhang       }
1100*d4002b98SHong Zhang     }
1101*d4002b98SHong Zhang     color = PETSC_DRAW_RED;
1102*d4002b98SHong Zhang     for (i=0; i<m; i++) {
1103*d4002b98SHong Zhang       shift = a->sliidx[i>>3]+(i&0x07);
1104*d4002b98SHong Zhang       y_l = m - i - 1.0; y_r = y_l + 1.0;
1105*d4002b98SHong Zhang       for (j=0; j<a->rlen[i]; j++) {
1106*d4002b98SHong Zhang         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1107*d4002b98SHong Zhang         if (PetscRealPart(a->val[shift+8*j]) <=  0.) continue;
1108*d4002b98SHong Zhang         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1109*d4002b98SHong Zhang       }
1110*d4002b98SHong Zhang     }
1111*d4002b98SHong Zhang     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1112*d4002b98SHong Zhang   } else {
1113*d4002b98SHong Zhang     /* use contour shading to indicate magnitude of values */
1114*d4002b98SHong Zhang     /* first determine max of all nonzero values */
1115*d4002b98SHong Zhang     PetscReal minv=0.0,maxv=0.0;
1116*d4002b98SHong Zhang     PetscInt  count=0;
1117*d4002b98SHong Zhang     PetscDraw popup;
1118*d4002b98SHong Zhang     for (i=0; i<a->sliidx[a->totalslices]; i++) {
1119*d4002b98SHong Zhang       if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]);
1120*d4002b98SHong Zhang     }
1121*d4002b98SHong Zhang     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1122*d4002b98SHong Zhang     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1123*d4002b98SHong Zhang     ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr);
1124*d4002b98SHong Zhang 
1125*d4002b98SHong Zhang     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1126*d4002b98SHong Zhang     for (i=0; i<m; i++) {
1127*d4002b98SHong Zhang       shift = a->sliidx[i>>3]+(i&0x07);
1128*d4002b98SHong Zhang       y_l = m - i - 1.0;
1129*d4002b98SHong Zhang       y_r = y_l + 1.0;
1130*d4002b98SHong Zhang       for (j=0; j<a->rlen[i]; j++) {
1131*d4002b98SHong Zhang         x_l = a->colidx[shift+j*8];
1132*d4002b98SHong Zhang         x_r = x_l + 1.0;
1133*d4002b98SHong Zhang         color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv);
1134*d4002b98SHong Zhang         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1135*d4002b98SHong Zhang         count++;
1136*d4002b98SHong Zhang       }
1137*d4002b98SHong Zhang     }
1138*d4002b98SHong Zhang     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1139*d4002b98SHong Zhang   }
1140*d4002b98SHong Zhang   PetscFunctionReturn(0);
1141*d4002b98SHong Zhang }
1142*d4002b98SHong Zhang 
1143*d4002b98SHong Zhang #include <petscdraw.h>
1144*d4002b98SHong Zhang PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer)
1145*d4002b98SHong Zhang {
1146*d4002b98SHong Zhang   PetscDraw      draw;
1147*d4002b98SHong Zhang   PetscReal      xr,yr,xl,yl,h,w;
1148*d4002b98SHong Zhang   PetscBool      isnull;
1149*d4002b98SHong Zhang   PetscErrorCode ierr;
1150*d4002b98SHong Zhang 
1151*d4002b98SHong Zhang   PetscFunctionBegin;
1152*d4002b98SHong Zhang   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1153*d4002b98SHong Zhang   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1154*d4002b98SHong Zhang   if (isnull) PetscFunctionReturn(0);
1155*d4002b98SHong Zhang 
1156*d4002b98SHong Zhang   xr   = A->cmap->n; yr  = A->rmap->n; h = yr/10.0; w = xr/10.0;
1157*d4002b98SHong Zhang   xr  += w;          yr += h;         xl = -w;     yl = -h;
1158*d4002b98SHong Zhang   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1159*d4002b98SHong Zhang   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1160*d4002b98SHong Zhang   ierr = PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);CHKERRQ(ierr);
1161*d4002b98SHong Zhang   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1162*d4002b98SHong Zhang   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1163*d4002b98SHong Zhang   PetscFunctionReturn(0);
1164*d4002b98SHong Zhang }
1165*d4002b98SHong Zhang 
1166*d4002b98SHong Zhang PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer)
1167*d4002b98SHong Zhang {
1168*d4002b98SHong Zhang   PetscBool      iascii,isbinary,isdraw;
1169*d4002b98SHong Zhang   PetscErrorCode ierr;
1170*d4002b98SHong Zhang 
1171*d4002b98SHong Zhang   PetscFunctionBegin;
1172*d4002b98SHong Zhang   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1173*d4002b98SHong Zhang   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1174*d4002b98SHong Zhang   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1175*d4002b98SHong Zhang   if (iascii) {
1176*d4002b98SHong Zhang     ierr = MatView_SeqSELL_ASCII(A,viewer);CHKERRQ(ierr);
1177*d4002b98SHong Zhang   } else if (isbinary) {
1178*d4002b98SHong Zhang     /* ierr = MatView_SeqSELL_Binary(A,viewer);CHKERRQ(ierr); */
1179*d4002b98SHong Zhang   } else if (isdraw) {
1180*d4002b98SHong Zhang     ierr = MatView_SeqSELL_Draw(A,viewer);CHKERRQ(ierr);
1181*d4002b98SHong Zhang   }
1182*d4002b98SHong Zhang   PetscFunctionReturn(0);
1183*d4002b98SHong Zhang }
1184*d4002b98SHong Zhang 
1185*d4002b98SHong Zhang PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode)
1186*d4002b98SHong Zhang {
1187*d4002b98SHong Zhang   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1188*d4002b98SHong Zhang   PetscInt       i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k;
1189*d4002b98SHong Zhang   MatScalar      *vp;
1190*d4002b98SHong Zhang   PetscErrorCode ierr;
1191*d4002b98SHong Zhang 
1192*d4002b98SHong Zhang   PetscFunctionBegin;
1193*d4002b98SHong Zhang   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1194*d4002b98SHong Zhang   /* To do: compress out the unused elements */
1195*d4002b98SHong Zhang   ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr);
1196*d4002b98SHong Zhang   ierr = PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);CHKERRQ(ierr);
1197*d4002b98SHong Zhang   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr);
1198*d4002b98SHong Zhang   ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);CHKERRQ(ierr);
1199*d4002b98SHong Zhang   /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */
1200*d4002b98SHong Zhang   for (i=0; i<a->totalslices; ++i) {
1201*d4002b98SHong Zhang     shift = a->sliidx[i];    /* starting index of the slice */
1202*d4002b98SHong Zhang     cp    = a->colidx+shift; /* pointer to the column indices of the slice */
1203*d4002b98SHong Zhang     vp    = a->val+shift;    /* pointer to the nonzero values of the slice */
1204*d4002b98SHong Zhang     for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */
1205*d4002b98SHong Zhang       row  = 8*i + row_in_slice;
1206*d4002b98SHong Zhang       nrow = a->rlen[row]; /* number of nonzeros in row */
1207*d4002b98SHong Zhang       /*
1208*d4002b98SHong Zhang         Search for the nearest nonzero. Normally setting the index to zero may cause extra communication.
1209*d4002b98SHong Zhang         But if the entire slice are empty, it is fine to use 0 since the index will not be loaded.
1210*d4002b98SHong Zhang       */
1211*d4002b98SHong Zhang       lastcol = 0;
1212*d4002b98SHong Zhang       if (nrow>0) { /* nonempty row */
1213*d4002b98SHong Zhang         lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */
1214*d4002b98SHong Zhang       } else if (!row_in_slice) { /* first row of the currect slice is empty */
1215*d4002b98SHong Zhang         for (j=1;j<8;j++) {
1216*d4002b98SHong Zhang           if (a->rlen[8*i+j]) {
1217*d4002b98SHong Zhang             lastcol = cp[j];
1218*d4002b98SHong Zhang             break;
1219*d4002b98SHong Zhang           }
1220*d4002b98SHong Zhang         }
1221*d4002b98SHong Zhang       } else {
1222*d4002b98SHong Zhang         if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */
1223*d4002b98SHong Zhang       }
1224*d4002b98SHong Zhang 
1225*d4002b98SHong Zhang       for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) {
1226*d4002b98SHong Zhang         cp[8*k+row_in_slice] = lastcol;
1227*d4002b98SHong Zhang         vp[8*k+row_in_slice] = (MatScalar)0;
1228*d4002b98SHong Zhang       }
1229*d4002b98SHong Zhang     }
1230*d4002b98SHong Zhang   }
1231*d4002b98SHong Zhang 
1232*d4002b98SHong Zhang   A->info.mallocs += a->reallocs;
1233*d4002b98SHong Zhang   a->reallocs      = 0;
1234*d4002b98SHong Zhang 
1235*d4002b98SHong Zhang   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
1236*d4002b98SHong Zhang   PetscFunctionReturn(0);
1237*d4002b98SHong Zhang }
1238*d4002b98SHong Zhang 
1239*d4002b98SHong Zhang PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info)
1240*d4002b98SHong Zhang {
1241*d4002b98SHong Zhang   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1242*d4002b98SHong Zhang 
1243*d4002b98SHong Zhang   PetscFunctionBegin;
1244*d4002b98SHong Zhang   info->block_size   = 1.0;
1245*d4002b98SHong Zhang   info->nz_allocated = (double)a->maxallocmat;
1246*d4002b98SHong Zhang   info->nz_used      = (double)a->sliidx[a->totalslices]; /* include padding zeros */
1247*d4002b98SHong Zhang   info->nz_unneeded  = (double)(a->maxallocmat-a->sliidx[a->totalslices]);
1248*d4002b98SHong Zhang   info->assemblies   = (double)A->num_ass;
1249*d4002b98SHong Zhang   info->mallocs      = (double)A->info.mallocs;
1250*d4002b98SHong Zhang   info->memory       = ((PetscObject)A)->mem;
1251*d4002b98SHong Zhang   if (A->factortype) {
1252*d4002b98SHong Zhang     info->fill_ratio_given  = A->info.fill_ratio_given;
1253*d4002b98SHong Zhang     info->fill_ratio_needed = A->info.fill_ratio_needed;
1254*d4002b98SHong Zhang     info->factor_mallocs    = A->info.factor_mallocs;
1255*d4002b98SHong Zhang   } else {
1256*d4002b98SHong Zhang     info->fill_ratio_given  = 0;
1257*d4002b98SHong Zhang     info->fill_ratio_needed = 0;
1258*d4002b98SHong Zhang     info->factor_mallocs    = 0;
1259*d4002b98SHong Zhang   }
1260*d4002b98SHong Zhang   PetscFunctionReturn(0);
1261*d4002b98SHong Zhang }
1262*d4002b98SHong Zhang 
1263*d4002b98SHong Zhang PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1264*d4002b98SHong Zhang {
1265*d4002b98SHong Zhang   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1266*d4002b98SHong Zhang   PetscInt       shift,i,k,l,low,high,t,ii,row,col,nrow;
1267*d4002b98SHong Zhang   PetscInt       *cp,nonew=a->nonew,lastcol=-1;
1268*d4002b98SHong Zhang   MatScalar      *vp,value;
1269*d4002b98SHong Zhang   PetscErrorCode ierr;
1270*d4002b98SHong Zhang 
1271*d4002b98SHong Zhang   PetscFunctionBegin;
1272*d4002b98SHong Zhang   for (k=0; k<m; k++) { /* loop over added rows */
1273*d4002b98SHong Zhang     row = im[k];
1274*d4002b98SHong Zhang     if (row < 0) continue;
1275*d4002b98SHong Zhang #if defined(PETSC_USE_DEBUG)
1276*d4002b98SHong Zhang     if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
1277*d4002b98SHong Zhang #endif
1278*d4002b98SHong Zhang     shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
1279*d4002b98SHong Zhang     cp    = a->colidx+shift; /* pointer to the row */
1280*d4002b98SHong Zhang     vp    = a->val+shift; /* pointer to the row */
1281*d4002b98SHong Zhang     nrow  = a->rlen[row];
1282*d4002b98SHong Zhang     low   = 0;
1283*d4002b98SHong Zhang     high  = nrow;
1284*d4002b98SHong Zhang 
1285*d4002b98SHong Zhang     for (l=0; l<n; l++) { /* loop over added columns */
1286*d4002b98SHong Zhang       col = in[l];
1287*d4002b98SHong Zhang       if (col<0) continue;
1288*d4002b98SHong Zhang #if defined(PETSC_USE_DEBUG)
1289*d4002b98SHong Zhang       if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1);
1290*d4002b98SHong Zhang #endif
1291*d4002b98SHong Zhang       if (a->roworiented) {
1292*d4002b98SHong Zhang         value = v[l+k*n];
1293*d4002b98SHong Zhang       } else {
1294*d4002b98SHong Zhang         value = v[k+l*m];
1295*d4002b98SHong Zhang       }
1296*d4002b98SHong Zhang       if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue;
1297*d4002b98SHong Zhang 
1298*d4002b98SHong Zhang       /* search in this row for the specified colmun, i indicates the column to be set */
1299*d4002b98SHong Zhang       if (col <= lastcol) low = 0;
1300*d4002b98SHong Zhang       else high = nrow;
1301*d4002b98SHong Zhang       lastcol = col;
1302*d4002b98SHong Zhang       while (high-low > 5) {
1303*d4002b98SHong Zhang         t = (low+high)/2;
1304*d4002b98SHong Zhang         if (*(cp+t*8) > col) high = t;
1305*d4002b98SHong Zhang         else low = t;
1306*d4002b98SHong Zhang       }
1307*d4002b98SHong Zhang       for (i=low; i<high; i++) {
1308*d4002b98SHong Zhang         if (*(cp+i*8) > col) break;
1309*d4002b98SHong Zhang         if (*(cp+i*8) == col) {
1310*d4002b98SHong Zhang           if (is == ADD_VALUES) *(vp+i*8) += value;
1311*d4002b98SHong Zhang           else *(vp+i*8) = value;
1312*d4002b98SHong Zhang           low = i + 1;
1313*d4002b98SHong Zhang           goto noinsert;
1314*d4002b98SHong Zhang         }
1315*d4002b98SHong Zhang       }
1316*d4002b98SHong Zhang       if (value == 0.0 && a->ignorezeroentries) goto noinsert;
1317*d4002b98SHong Zhang       if (nonew == 1) goto noinsert;
1318*d4002b98SHong Zhang       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1319*d4002b98SHong Zhang       /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */
1320*d4002b98SHong Zhang       MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar);
1321*d4002b98SHong Zhang       /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */
1322*d4002b98SHong Zhang       for (ii=nrow-1; ii>=i; ii--) {
1323*d4002b98SHong Zhang         *(cp+(ii+1)*8) = *(cp+ii*8);
1324*d4002b98SHong Zhang         *(vp+(ii+1)*8) = *(vp+ii*8);
1325*d4002b98SHong Zhang       }
1326*d4002b98SHong Zhang       a->rlen[row]++;
1327*d4002b98SHong Zhang       *(cp+i*8) = col;
1328*d4002b98SHong Zhang       *(vp+i*8) = value;
1329*d4002b98SHong Zhang       a->nz++;
1330*d4002b98SHong Zhang       A->nonzerostate++;
1331*d4002b98SHong Zhang       low = i+1; high++; nrow++;
1332*d4002b98SHong Zhang noinsert:;
1333*d4002b98SHong Zhang     }
1334*d4002b98SHong Zhang     a->rlen[row] = nrow;
1335*d4002b98SHong Zhang   }
1336*d4002b98SHong Zhang   PetscFunctionReturn(0);
1337*d4002b98SHong Zhang }
1338*d4002b98SHong Zhang 
1339*d4002b98SHong Zhang PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str)
1340*d4002b98SHong Zhang {
1341*d4002b98SHong Zhang   PetscErrorCode ierr;
1342*d4002b98SHong Zhang 
1343*d4002b98SHong Zhang   PetscFunctionBegin;
1344*d4002b98SHong Zhang   /* If the two matrices have the same copy implementation, use fast copy. */
1345*d4002b98SHong Zhang   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1346*d4002b98SHong Zhang     Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1347*d4002b98SHong Zhang     Mat_SeqSELL *b=(Mat_SeqSELL*)B->data;
1348*d4002b98SHong Zhang 
1349*d4002b98SHong Zhang     if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1350*d4002b98SHong Zhang     ierr = PetscMemcpy(b->val,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr);
1351*d4002b98SHong Zhang   } else {
1352*d4002b98SHong Zhang     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1353*d4002b98SHong Zhang   }
1354*d4002b98SHong Zhang   PetscFunctionReturn(0);
1355*d4002b98SHong Zhang }
1356*d4002b98SHong Zhang 
1357*d4002b98SHong Zhang PetscErrorCode MatSetUp_SeqSELL(Mat A)
1358*d4002b98SHong Zhang {
1359*d4002b98SHong Zhang   PetscErrorCode ierr;
1360*d4002b98SHong Zhang 
1361*d4002b98SHong Zhang   PetscFunctionBegin;
1362*d4002b98SHong Zhang   ierr = MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,0);CHKERRQ(ierr);
1363*d4002b98SHong Zhang   PetscFunctionReturn(0);
1364*d4002b98SHong Zhang }
1365*d4002b98SHong Zhang 
1366*d4002b98SHong Zhang PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[])
1367*d4002b98SHong Zhang {
1368*d4002b98SHong Zhang   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1369*d4002b98SHong Zhang 
1370*d4002b98SHong Zhang   PetscFunctionBegin;
1371*d4002b98SHong Zhang   *array = a->val;
1372*d4002b98SHong Zhang   PetscFunctionReturn(0);
1373*d4002b98SHong Zhang }
1374*d4002b98SHong Zhang 
1375*d4002b98SHong Zhang PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[])
1376*d4002b98SHong Zhang {
1377*d4002b98SHong Zhang   PetscFunctionBegin;
1378*d4002b98SHong Zhang   PetscFunctionReturn(0);
1379*d4002b98SHong Zhang }
1380*d4002b98SHong Zhang 
1381*d4002b98SHong Zhang PetscErrorCode MatRealPart_SeqSELL(Mat A)
1382*d4002b98SHong Zhang {
1383*d4002b98SHong Zhang   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1384*d4002b98SHong Zhang   PetscInt    i;
1385*d4002b98SHong Zhang   MatScalar   *aval=a->val;
1386*d4002b98SHong Zhang 
1387*d4002b98SHong Zhang   PetscFunctionBegin;
1388*d4002b98SHong Zhang   for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]);
1389*d4002b98SHong Zhang   PetscFunctionReturn(0);
1390*d4002b98SHong Zhang }
1391*d4002b98SHong Zhang 
1392*d4002b98SHong Zhang PetscErrorCode MatImaginaryPart_SeqSELL(Mat A)
1393*d4002b98SHong Zhang {
1394*d4002b98SHong Zhang   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1395*d4002b98SHong Zhang   PetscInt       i;
1396*d4002b98SHong Zhang   MatScalar      *aval=a->val;
1397*d4002b98SHong Zhang   PetscErrorCode ierr;
1398*d4002b98SHong Zhang 
1399*d4002b98SHong Zhang   PetscFunctionBegin;
1400*d4002b98SHong Zhang   for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]);
1401*d4002b98SHong Zhang   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
1402*d4002b98SHong Zhang   PetscFunctionReturn(0);
1403*d4002b98SHong Zhang }
1404*d4002b98SHong Zhang 
1405*d4002b98SHong Zhang PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha)
1406*d4002b98SHong Zhang {
1407*d4002b98SHong Zhang   Mat_SeqSELL    *a=(Mat_SeqSELL*)inA->data;
1408*d4002b98SHong Zhang   MatScalar      *aval=a->val;
1409*d4002b98SHong Zhang   PetscScalar    oalpha=alpha;
1410*d4002b98SHong Zhang   PetscBLASInt   one=1,size;
1411*d4002b98SHong Zhang   PetscErrorCode ierr;
1412*d4002b98SHong Zhang 
1413*d4002b98SHong Zhang   PetscFunctionBegin;
1414*d4002b98SHong Zhang   ierr = PetscBLASIntCast(a->sliidx[a->totalslices],&size);CHKERRQ(ierr);
1415*d4002b98SHong Zhang   PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one));
1416*d4002b98SHong Zhang   ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1417*d4002b98SHong Zhang   ierr = MatSeqSELLInvalidateDiagonal(inA);CHKERRQ(ierr);
1418*d4002b98SHong Zhang   PetscFunctionReturn(0);
1419*d4002b98SHong Zhang }
1420*d4002b98SHong Zhang 
1421*d4002b98SHong Zhang PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a)
1422*d4002b98SHong Zhang {
1423*d4002b98SHong Zhang   Mat_SeqSELL    *y=(Mat_SeqSELL*)Y->data;
1424*d4002b98SHong Zhang   PetscErrorCode ierr;
1425*d4002b98SHong Zhang 
1426*d4002b98SHong Zhang   PetscFunctionBegin;
1427*d4002b98SHong Zhang   if (!Y->preallocated || !y->nz) {
1428*d4002b98SHong Zhang     ierr = MatSeqSELLSetPreallocation(Y,1,NULL);CHKERRQ(ierr);
1429*d4002b98SHong Zhang   }
1430*d4002b98SHong Zhang   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1431*d4002b98SHong Zhang   PetscFunctionReturn(0);
1432*d4002b98SHong Zhang }
1433*d4002b98SHong Zhang 
1434*d4002b98SHong Zhang PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1435*d4002b98SHong Zhang {
1436*d4002b98SHong Zhang   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
1437*d4002b98SHong Zhang   PetscScalar       *x,sum,*t;
1438*d4002b98SHong Zhang   const MatScalar   *idiag=0,*mdiag;
1439*d4002b98SHong Zhang   const PetscScalar *b,*xb;
1440*d4002b98SHong Zhang   PetscInt          n,m=A->rmap->n,i,j,shift;
1441*d4002b98SHong Zhang   const PetscInt    *diag;
1442*d4002b98SHong Zhang   PetscErrorCode    ierr;
1443*d4002b98SHong Zhang 
1444*d4002b98SHong Zhang   PetscFunctionBegin;
1445*d4002b98SHong Zhang   its = its*lits;
1446*d4002b98SHong Zhang 
1447*d4002b98SHong Zhang   if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */
1448*d4002b98SHong Zhang   if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqSELL(A,omega,fshift);CHKERRQ(ierr);}
1449*d4002b98SHong Zhang   a->fshift = fshift;
1450*d4002b98SHong Zhang   a->omega  = omega;
1451*d4002b98SHong Zhang 
1452*d4002b98SHong Zhang   diag  = a->diag;
1453*d4002b98SHong Zhang   t     = a->ssor_work;
1454*d4002b98SHong Zhang   idiag = a->idiag;
1455*d4002b98SHong Zhang   mdiag = a->mdiag;
1456*d4002b98SHong Zhang 
1457*d4002b98SHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1458*d4002b98SHong Zhang   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1459*d4002b98SHong Zhang   /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */
1460*d4002b98SHong Zhang   if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented");
1461*d4002b98SHong Zhang   if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented");
1462*d4002b98SHong Zhang   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
1463*d4002b98SHong Zhang 
1464*d4002b98SHong Zhang   if (flag & SOR_ZERO_INITIAL_GUESS) {
1465*d4002b98SHong Zhang     if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1466*d4002b98SHong Zhang       for (i=0; i<m; i++) {
1467*d4002b98SHong Zhang         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1468*d4002b98SHong Zhang         sum   = b[i];
1469*d4002b98SHong Zhang         n     = (diag[i]-shift)/8;
1470*d4002b98SHong Zhang         for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1471*d4002b98SHong Zhang         t[i]  = sum;
1472*d4002b98SHong Zhang         x[i]  = sum*idiag[i];
1473*d4002b98SHong Zhang       }
1474*d4002b98SHong Zhang       xb   = t;
1475*d4002b98SHong Zhang       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1476*d4002b98SHong Zhang     } else xb = b;
1477*d4002b98SHong Zhang     if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1478*d4002b98SHong Zhang       for (i=m-1; i>=0; i--) {
1479*d4002b98SHong Zhang         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1480*d4002b98SHong Zhang         sum   = xb[i];
1481*d4002b98SHong Zhang         n     = a->rlen[i]-(diag[i]-shift)/8-1;
1482*d4002b98SHong Zhang         for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1483*d4002b98SHong Zhang         if (xb == b) {
1484*d4002b98SHong Zhang           x[i] = sum*idiag[i];
1485*d4002b98SHong Zhang         } else {
1486*d4002b98SHong Zhang           x[i] = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1487*d4002b98SHong Zhang         }
1488*d4002b98SHong Zhang       }
1489*d4002b98SHong Zhang       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */
1490*d4002b98SHong Zhang     }
1491*d4002b98SHong Zhang     its--;
1492*d4002b98SHong Zhang   }
1493*d4002b98SHong Zhang   while (its--) {
1494*d4002b98SHong Zhang     if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1495*d4002b98SHong Zhang       for (i=0; i<m; i++) {
1496*d4002b98SHong Zhang         /* lower */
1497*d4002b98SHong Zhang         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1498*d4002b98SHong Zhang         sum   = b[i];
1499*d4002b98SHong Zhang         n     = (diag[i]-shift)/8;
1500*d4002b98SHong Zhang         for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1501*d4002b98SHong Zhang         t[i]  = sum;             /* save application of the lower-triangular part */
1502*d4002b98SHong Zhang         /* upper */
1503*d4002b98SHong Zhang         n     = a->rlen[i]-(diag[i]-shift)/8-1;
1504*d4002b98SHong Zhang         for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1505*d4002b98SHong Zhang         x[i]  = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1506*d4002b98SHong Zhang       }
1507*d4002b98SHong Zhang       xb   = t;
1508*d4002b98SHong Zhang       ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1509*d4002b98SHong Zhang     } else xb = b;
1510*d4002b98SHong Zhang     if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1511*d4002b98SHong Zhang       for (i=m-1; i>=0; i--) {
1512*d4002b98SHong Zhang         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1513*d4002b98SHong Zhang         sum = xb[i];
1514*d4002b98SHong Zhang         if (xb == b) {
1515*d4002b98SHong Zhang           /* whole matrix (no checkpointing available) */
1516*d4002b98SHong Zhang           n     = a->rlen[i];
1517*d4002b98SHong Zhang           for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1518*d4002b98SHong Zhang           x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i];
1519*d4002b98SHong Zhang         } else { /* lower-triangular part has been saved, so only apply upper-triangular */
1520*d4002b98SHong Zhang           n     = a->rlen[i]-(diag[i]-shift)/8-1;
1521*d4002b98SHong Zhang           for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1522*d4002b98SHong Zhang           x[i]  = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1523*d4002b98SHong Zhang         }
1524*d4002b98SHong Zhang       }
1525*d4002b98SHong Zhang       if (xb == b) {
1526*d4002b98SHong Zhang         ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1527*d4002b98SHong Zhang       } else {
1528*d4002b98SHong Zhang         ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */
1529*d4002b98SHong Zhang       }
1530*d4002b98SHong Zhang     }
1531*d4002b98SHong Zhang   }
1532*d4002b98SHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1533*d4002b98SHong Zhang   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1534*d4002b98SHong Zhang   PetscFunctionReturn(0);
1535*d4002b98SHong Zhang }
1536*d4002b98SHong Zhang 
1537*d4002b98SHong Zhang /* -------------------------------------------------------------------*/
1538*d4002b98SHong Zhang static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL,
1539*d4002b98SHong Zhang                                        0,
1540*d4002b98SHong Zhang                                        0,
1541*d4002b98SHong Zhang                                        MatMult_SeqSELL,
1542*d4002b98SHong Zhang                                /* 4*/  MatMultAdd_SeqSELL,
1543*d4002b98SHong Zhang                                        MatMultTranspose_SeqSELL,
1544*d4002b98SHong Zhang                                        MatMultTransposeAdd_SeqSELL,
1545*d4002b98SHong Zhang                                        0,
1546*d4002b98SHong Zhang                                        0,
1547*d4002b98SHong Zhang                                        0,
1548*d4002b98SHong Zhang                                /* 10*/ 0,
1549*d4002b98SHong Zhang                                        0,
1550*d4002b98SHong Zhang                                        0,
1551*d4002b98SHong Zhang                                        MatSOR_SeqSELL,
1552*d4002b98SHong Zhang                                        0,
1553*d4002b98SHong Zhang                                /* 15*/ MatGetInfo_SeqSELL,
1554*d4002b98SHong Zhang                                        MatEqual_SeqSELL,
1555*d4002b98SHong Zhang                                        MatGetDiagonal_SeqSELL,
1556*d4002b98SHong Zhang                                        MatDiagonalScale_SeqSELL,
1557*d4002b98SHong Zhang                                        0,
1558*d4002b98SHong Zhang                                /* 20*/ 0,
1559*d4002b98SHong Zhang                                        MatAssemblyEnd_SeqSELL,
1560*d4002b98SHong Zhang                                        MatSetOption_SeqSELL,
1561*d4002b98SHong Zhang                                        MatZeroEntries_SeqSELL,
1562*d4002b98SHong Zhang                                /* 24*/ 0,
1563*d4002b98SHong Zhang                                        0,
1564*d4002b98SHong Zhang                                        0,
1565*d4002b98SHong Zhang                                        0,
1566*d4002b98SHong Zhang                                        0,
1567*d4002b98SHong Zhang                                /* 29*/ MatSetUp_SeqSELL,
1568*d4002b98SHong Zhang                                        0,
1569*d4002b98SHong Zhang                                        0,
1570*d4002b98SHong Zhang                                        0,
1571*d4002b98SHong Zhang                                        0,
1572*d4002b98SHong Zhang                                /* 34*/ MatDuplicate_SeqSELL,
1573*d4002b98SHong Zhang                                        0,
1574*d4002b98SHong Zhang                                        0,
1575*d4002b98SHong Zhang                                        0,
1576*d4002b98SHong Zhang                                        0,
1577*d4002b98SHong Zhang                                /* 39*/ 0,
1578*d4002b98SHong Zhang                                        0,
1579*d4002b98SHong Zhang                                        0,
1580*d4002b98SHong Zhang                                        MatGetValues_SeqSELL,
1581*d4002b98SHong Zhang                                        MatCopy_SeqSELL,
1582*d4002b98SHong Zhang                                /* 44*/ 0,
1583*d4002b98SHong Zhang                                        MatScale_SeqSELL,
1584*d4002b98SHong Zhang                                        MatShift_SeqSELL,
1585*d4002b98SHong Zhang                                        0,
1586*d4002b98SHong Zhang                                        0,
1587*d4002b98SHong Zhang                                /* 49*/ 0,
1588*d4002b98SHong Zhang                                        0,
1589*d4002b98SHong Zhang                                        0,
1590*d4002b98SHong Zhang                                        0,
1591*d4002b98SHong Zhang                                        0,
1592*d4002b98SHong Zhang                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
1593*d4002b98SHong Zhang                                        0,
1594*d4002b98SHong Zhang                                        0,
1595*d4002b98SHong Zhang                                        0,
1596*d4002b98SHong Zhang                                        0,
1597*d4002b98SHong Zhang                                /* 59*/ 0,
1598*d4002b98SHong Zhang                                        MatDestroy_SeqSELL,
1599*d4002b98SHong Zhang                                        MatView_SeqSELL,
1600*d4002b98SHong Zhang                                        0,
1601*d4002b98SHong Zhang                                        0,
1602*d4002b98SHong Zhang                                /* 64*/ 0,
1603*d4002b98SHong Zhang                                        0,
1604*d4002b98SHong Zhang                                        0,
1605*d4002b98SHong Zhang                                        0,
1606*d4002b98SHong Zhang                                        0,
1607*d4002b98SHong Zhang                                /* 69*/ 0,
1608*d4002b98SHong Zhang                                        0,
1609*d4002b98SHong Zhang                                        0,
1610*d4002b98SHong Zhang                                        0,
1611*d4002b98SHong Zhang                                        0,
1612*d4002b98SHong Zhang                                /* 74*/ 0,
1613*d4002b98SHong Zhang                                        MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */
1614*d4002b98SHong Zhang                                        0,
1615*d4002b98SHong Zhang                                        0,
1616*d4002b98SHong Zhang                                        0,
1617*d4002b98SHong Zhang                                /* 79*/ 0,
1618*d4002b98SHong Zhang                                        0,
1619*d4002b98SHong Zhang                                        0,
1620*d4002b98SHong Zhang                                        0,
1621*d4002b98SHong Zhang                                        0,
1622*d4002b98SHong Zhang                                /* 84*/ 0,
1623*d4002b98SHong Zhang                                        0,
1624*d4002b98SHong Zhang                                        0,
1625*d4002b98SHong Zhang                                        0,
1626*d4002b98SHong Zhang                                        0,
1627*d4002b98SHong Zhang                                /* 89*/ 0,
1628*d4002b98SHong Zhang                                        0,
1629*d4002b98SHong Zhang                                        0,
1630*d4002b98SHong Zhang                                        0,
1631*d4002b98SHong Zhang                                        0,
1632*d4002b98SHong Zhang                                /* 94*/ 0,
1633*d4002b98SHong Zhang                                        0,
1634*d4002b98SHong Zhang                                        0,
1635*d4002b98SHong Zhang                                        0,
1636*d4002b98SHong Zhang                                        0,
1637*d4002b98SHong Zhang                                /* 99*/ 0,
1638*d4002b98SHong Zhang                                        0,
1639*d4002b98SHong Zhang                                        0,
1640*d4002b98SHong Zhang                                        MatConjugate_SeqSELL,
1641*d4002b98SHong Zhang                                        0,
1642*d4002b98SHong Zhang                                /*104*/ 0,
1643*d4002b98SHong Zhang                                        0,
1644*d4002b98SHong Zhang                                        0,
1645*d4002b98SHong Zhang                                        0,
1646*d4002b98SHong Zhang                                        0,
1647*d4002b98SHong Zhang                                /*109*/ 0,
1648*d4002b98SHong Zhang                                        0,
1649*d4002b98SHong Zhang                                        0,
1650*d4002b98SHong Zhang                                        0,
1651*d4002b98SHong Zhang                                        MatMissingDiagonal_SeqSELL,
1652*d4002b98SHong Zhang                                /*114*/ 0,
1653*d4002b98SHong Zhang                                        0,
1654*d4002b98SHong Zhang                                        0,
1655*d4002b98SHong Zhang                                        0,
1656*d4002b98SHong Zhang                                        0,
1657*d4002b98SHong Zhang                                /*119*/ 0,
1658*d4002b98SHong Zhang                                        0,
1659*d4002b98SHong Zhang                                        0,
1660*d4002b98SHong Zhang                                        0,
1661*d4002b98SHong Zhang                                        0,
1662*d4002b98SHong Zhang                                /*124*/ 0,
1663*d4002b98SHong Zhang                                        0,
1664*d4002b98SHong Zhang                                        0,
1665*d4002b98SHong Zhang                                        0,
1666*d4002b98SHong Zhang                                        0,
1667*d4002b98SHong Zhang                                /*129*/ 0,
1668*d4002b98SHong Zhang                                        0,
1669*d4002b98SHong Zhang                                        0,
1670*d4002b98SHong Zhang                                        0,
1671*d4002b98SHong Zhang                                        0,
1672*d4002b98SHong Zhang                                /*134*/ 0,
1673*d4002b98SHong Zhang                                        0,
1674*d4002b98SHong Zhang                                        0,
1675*d4002b98SHong Zhang                                        0,
1676*d4002b98SHong Zhang                                        0,
1677*d4002b98SHong Zhang                                /*139*/ 0,
1678*d4002b98SHong Zhang                                        0,
1679*d4002b98SHong Zhang                                        0,
1680*d4002b98SHong Zhang                                        MatFDColoringSetUp_SeqXAIJ,
1681*d4002b98SHong Zhang                                        0,
1682*d4002b98SHong Zhang                                 /*144*/0
1683*d4002b98SHong Zhang };
1684*d4002b98SHong Zhang 
1685*d4002b98SHong Zhang PetscErrorCode MatStoreValues_SeqSELL(Mat mat)
1686*d4002b98SHong Zhang {
1687*d4002b98SHong Zhang   Mat_SeqSELL    *a=(Mat_SeqSELL*)mat->data;
1688*d4002b98SHong Zhang   PetscErrorCode ierr;
1689*d4002b98SHong Zhang 
1690*d4002b98SHong Zhang   PetscFunctionBegin;
1691*d4002b98SHong Zhang   if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1692*d4002b98SHong Zhang 
1693*d4002b98SHong Zhang   /* allocate space for values if not already there */
1694*d4002b98SHong Zhang   if (!a->saved_values) {
1695*d4002b98SHong Zhang     ierr = PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);CHKERRQ(ierr);
1696*d4002b98SHong Zhang     ierr = PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));CHKERRQ(ierr);
1697*d4002b98SHong Zhang   }
1698*d4002b98SHong Zhang 
1699*d4002b98SHong Zhang   /* copy values over */
1700*d4002b98SHong Zhang   ierr = PetscMemcpy(a->saved_values,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr);
1701*d4002b98SHong Zhang   PetscFunctionReturn(0);
1702*d4002b98SHong Zhang }
1703*d4002b98SHong Zhang 
1704*d4002b98SHong Zhang PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat)
1705*d4002b98SHong Zhang {
1706*d4002b98SHong Zhang   Mat_SeqSELL    *a=(Mat_SeqSELL*)mat->data;
1707*d4002b98SHong Zhang   PetscErrorCode ierr;
1708*d4002b98SHong Zhang 
1709*d4002b98SHong Zhang   PetscFunctionBegin;
1710*d4002b98SHong Zhang   if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1711*d4002b98SHong Zhang   if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1712*d4002b98SHong Zhang   /* copy values over */
1713*d4002b98SHong Zhang   ierr = PetscMemcpy(a->val,a->saved_values,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr);
1714*d4002b98SHong Zhang   PetscFunctionReturn(0);
1715*d4002b98SHong Zhang }
1716*d4002b98SHong Zhang 
1717*d4002b98SHong Zhang /*@C
1718*d4002b98SHong Zhang  MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray()
1719*d4002b98SHong Zhang 
1720*d4002b98SHong Zhang  Not Collective
1721*d4002b98SHong Zhang 
1722*d4002b98SHong Zhang  Input Parameters:
1723*d4002b98SHong Zhang  .  mat - a MATSEQSELL matrix
1724*d4002b98SHong Zhang  .  array - pointer to the data
1725*d4002b98SHong Zhang 
1726*d4002b98SHong Zhang  Level: intermediate
1727*d4002b98SHong Zhang 
1728*d4002b98SHong Zhang  .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90()
1729*d4002b98SHong Zhang  @*/
1730*d4002b98SHong Zhang PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array)
1731*d4002b98SHong Zhang {
1732*d4002b98SHong Zhang   PetscErrorCode ierr;
1733*d4002b98SHong Zhang 
1734*d4002b98SHong Zhang   PetscFunctionBegin;
1735*d4002b98SHong Zhang   ierr = PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
1736*d4002b98SHong Zhang   PetscFunctionReturn(0);
1737*d4002b98SHong Zhang }
1738*d4002b98SHong Zhang 
1739*d4002b98SHong Zhang PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B)
1740*d4002b98SHong Zhang {
1741*d4002b98SHong Zhang   Mat_SeqSELL    *b;
1742*d4002b98SHong Zhang   PetscMPIInt    size;
1743*d4002b98SHong Zhang   PetscErrorCode ierr;
1744*d4002b98SHong Zhang 
1745*d4002b98SHong Zhang   PetscFunctionBegin;
1746*d4002b98SHong Zhang   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
1747*d4002b98SHong Zhang   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1");
1748*d4002b98SHong Zhang 
1749*d4002b98SHong Zhang   ierr = PetscNewLog(B,&b);CHKERRQ(ierr);
1750*d4002b98SHong Zhang 
1751*d4002b98SHong Zhang   B->data = (void*)b;
1752*d4002b98SHong Zhang 
1753*d4002b98SHong Zhang   ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1754*d4002b98SHong Zhang 
1755*d4002b98SHong Zhang   b->row                = 0;
1756*d4002b98SHong Zhang   b->col                = 0;
1757*d4002b98SHong Zhang   b->icol               = 0;
1758*d4002b98SHong Zhang   b->reallocs           = 0;
1759*d4002b98SHong Zhang   b->ignorezeroentries  = PETSC_FALSE;
1760*d4002b98SHong Zhang   b->roworiented        = PETSC_TRUE;
1761*d4002b98SHong Zhang   b->nonew              = 0;
1762*d4002b98SHong Zhang   b->diag               = 0;
1763*d4002b98SHong Zhang   b->solve_work         = 0;
1764*d4002b98SHong Zhang   B->spptr              = 0;
1765*d4002b98SHong Zhang   b->saved_values       = 0;
1766*d4002b98SHong Zhang   b->idiag              = 0;
1767*d4002b98SHong Zhang   b->mdiag              = 0;
1768*d4002b98SHong Zhang   b->ssor_work          = 0;
1769*d4002b98SHong Zhang   b->omega              = 1.0;
1770*d4002b98SHong Zhang   b->fshift             = 0.0;
1771*d4002b98SHong Zhang   b->idiagvalid         = PETSC_FALSE;
1772*d4002b98SHong Zhang   b->ibdiagvalid        = PETSC_FALSE;
1773*d4002b98SHong Zhang   b->keepnonzeropattern = PETSC_FALSE;
1774*d4002b98SHong Zhang 
1775*d4002b98SHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);CHKERRQ(ierr);
1776*d4002b98SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);CHKERRQ(ierr);
1777*d4002b98SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);CHKERRQ(ierr);
1778*d4002b98SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);CHKERRQ(ierr);
1779*d4002b98SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);CHKERRQ(ierr);
1780*d4002b98SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);CHKERRQ(ierr);
1781*d4002b98SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);CHKERRQ(ierr);
1782*d4002b98SHong Zhang   PetscFunctionReturn(0);
1783*d4002b98SHong Zhang }
1784*d4002b98SHong Zhang 
1785*d4002b98SHong Zhang /*
1786*d4002b98SHong Zhang  Given a matrix generated with MatGetFactor() duplicates all the information in A into B
1787*d4002b98SHong Zhang  */
1788*d4002b98SHong Zhang PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
1789*d4002b98SHong Zhang {
1790*d4002b98SHong Zhang   Mat_SeqSELL    *c,*a=(Mat_SeqSELL*)A->data;
1791*d4002b98SHong Zhang   PetscInt       i,m=A->rmap->n;
1792*d4002b98SHong Zhang   PetscInt       totalslices=a->totalslices;
1793*d4002b98SHong Zhang   PetscErrorCode ierr;
1794*d4002b98SHong Zhang 
1795*d4002b98SHong Zhang   PetscFunctionBegin;
1796*d4002b98SHong Zhang   c = (Mat_SeqSELL*)C->data;
1797*d4002b98SHong Zhang 
1798*d4002b98SHong Zhang   C->factortype = A->factortype;
1799*d4002b98SHong Zhang   c->row        = 0;
1800*d4002b98SHong Zhang   c->col        = 0;
1801*d4002b98SHong Zhang   c->icol       = 0;
1802*d4002b98SHong Zhang   c->reallocs   = 0;
1803*d4002b98SHong Zhang 
1804*d4002b98SHong Zhang   C->assembled = PETSC_TRUE;
1805*d4002b98SHong Zhang 
1806*d4002b98SHong Zhang   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
1807*d4002b98SHong Zhang   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
1808*d4002b98SHong Zhang 
1809*d4002b98SHong Zhang   ierr = PetscMalloc1(8*totalslices,&c->rlen);CHKERRQ(ierr);
1810*d4002b98SHong Zhang   ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr);
1811*d4002b98SHong Zhang   ierr = PetscMalloc1(totalslices+1,&c->sliidx);CHKERRQ(ierr);
1812*d4002b98SHong Zhang   ierr = PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr);
1813*d4002b98SHong Zhang 
1814*d4002b98SHong Zhang   for (i=0; i<m; i++) c->rlen[i] = a->rlen[i];
1815*d4002b98SHong Zhang   for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i];
1816*d4002b98SHong Zhang 
1817*d4002b98SHong Zhang   /* allocate the matrix space */
1818*d4002b98SHong Zhang   if (mallocmatspace) {
1819*d4002b98SHong Zhang     ierr = PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);CHKERRQ(ierr);
1820*d4002b98SHong Zhang     ierr = PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
1821*d4002b98SHong Zhang 
1822*d4002b98SHong Zhang     c->singlemalloc = PETSC_TRUE;
1823*d4002b98SHong Zhang 
1824*d4002b98SHong Zhang     if (m > 0) {
1825*d4002b98SHong Zhang       ierr = PetscMemcpy(c->colidx,a->colidx,(a->maxallocmat)*sizeof(PetscInt));CHKERRQ(ierr);
1826*d4002b98SHong Zhang       if (cpvalues == MAT_COPY_VALUES) {
1827*d4002b98SHong Zhang         ierr = PetscMemcpy(c->val,a->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr);
1828*d4002b98SHong Zhang       } else {
1829*d4002b98SHong Zhang         ierr = PetscMemzero(c->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr);
1830*d4002b98SHong Zhang       }
1831*d4002b98SHong Zhang     }
1832*d4002b98SHong Zhang   }
1833*d4002b98SHong Zhang 
1834*d4002b98SHong Zhang   c->ignorezeroentries = a->ignorezeroentries;
1835*d4002b98SHong Zhang   c->roworiented       = a->roworiented;
1836*d4002b98SHong Zhang   c->nonew             = a->nonew;
1837*d4002b98SHong Zhang   if (a->diag) {
1838*d4002b98SHong Zhang     ierr = PetscMalloc1(m,&c->diag);CHKERRQ(ierr);
1839*d4002b98SHong Zhang     ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr);
1840*d4002b98SHong Zhang     for (i=0; i<m; i++) {
1841*d4002b98SHong Zhang       c->diag[i] = a->diag[i];
1842*d4002b98SHong Zhang     }
1843*d4002b98SHong Zhang   } else c->diag = 0;
1844*d4002b98SHong Zhang 
1845*d4002b98SHong Zhang   c->solve_work         = 0;
1846*d4002b98SHong Zhang   c->saved_values       = 0;
1847*d4002b98SHong Zhang   c->idiag              = 0;
1848*d4002b98SHong Zhang   c->ssor_work          = 0;
1849*d4002b98SHong Zhang   c->keepnonzeropattern = a->keepnonzeropattern;
1850*d4002b98SHong Zhang   c->free_val           = PETSC_TRUE;
1851*d4002b98SHong Zhang   c->free_colidx        = PETSC_TRUE;
1852*d4002b98SHong Zhang 
1853*d4002b98SHong Zhang   c->maxallocmat  = a->maxallocmat;
1854*d4002b98SHong Zhang   c->maxallocrow  = a->maxallocrow;
1855*d4002b98SHong Zhang   c->rlenmax      = a->rlenmax;
1856*d4002b98SHong Zhang   c->nz           = a->nz;
1857*d4002b98SHong Zhang   C->preallocated = PETSC_TRUE;
1858*d4002b98SHong Zhang 
1859*d4002b98SHong Zhang   c->nonzerorowcnt = a->nonzerorowcnt;
1860*d4002b98SHong Zhang   C->nonzerostate  = A->nonzerostate;
1861*d4002b98SHong Zhang 
1862*d4002b98SHong Zhang   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
1863*d4002b98SHong Zhang   PetscFunctionReturn(0);
1864*d4002b98SHong Zhang }
1865*d4002b98SHong Zhang 
1866*d4002b98SHong Zhang PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B)
1867*d4002b98SHong Zhang {
1868*d4002b98SHong Zhang   PetscErrorCode ierr;
1869*d4002b98SHong Zhang 
1870*d4002b98SHong Zhang   PetscFunctionBegin;
1871*d4002b98SHong Zhang   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
1872*d4002b98SHong Zhang   ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr);
1873*d4002b98SHong Zhang   if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) {
1874*d4002b98SHong Zhang     ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr);
1875*d4002b98SHong Zhang   }
1876*d4002b98SHong Zhang   ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1877*d4002b98SHong Zhang   ierr = MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
1878*d4002b98SHong Zhang   PetscFunctionReturn(0);
1879*d4002b98SHong Zhang }
1880*d4002b98SHong Zhang 
1881*d4002b98SHong Zhang /*@C
1882*d4002b98SHong Zhang  MatCreateSeqSELL - Creates a sparse matrix in SELL format.
1883*d4002b98SHong Zhang 
1884*d4002b98SHong Zhang  Collective on MPI_Comm
1885*d4002b98SHong Zhang 
1886*d4002b98SHong Zhang  Input Parameters:
1887*d4002b98SHong Zhang  +  comm - MPI communicator, set to PETSC_COMM_SELF
1888*d4002b98SHong Zhang  .  m - number of rows
1889*d4002b98SHong Zhang  .  n - number of columns
1890*d4002b98SHong Zhang  .  rlenmax - maximum number of nonzeros in a row
1891*d4002b98SHong Zhang  -  rlen - array containing the number of nonzeros in the various rows
1892*d4002b98SHong Zhang  (possibly different for each row) or NULL
1893*d4002b98SHong Zhang 
1894*d4002b98SHong Zhang  Output Parameter:
1895*d4002b98SHong Zhang  .  A - the matrix
1896*d4002b98SHong Zhang 
1897*d4002b98SHong Zhang  It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1898*d4002b98SHong Zhang  MatXXXXSetPreallocation() paradgm instead of this routine directly.
1899*d4002b98SHong Zhang  [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation]
1900*d4002b98SHong Zhang 
1901*d4002b98SHong Zhang  Notes:
1902*d4002b98SHong Zhang  If nnz is given then nz is ignored
1903*d4002b98SHong Zhang 
1904*d4002b98SHong Zhang  Specify the preallocated storage with either rlenmax or rlen (not both).
1905*d4002b98SHong Zhang  Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory
1906*d4002b98SHong Zhang  allocation.  For large problems you MUST preallocate memory or you
1907*d4002b98SHong Zhang  will get TERRIBLE performance, see the users' manual chapter on matrices.
1908*d4002b98SHong Zhang 
1909*d4002b98SHong Zhang  Level: intermediate
1910*d4002b98SHong Zhang 
1911*d4002b98SHong Zhang  .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatCreateSeqSELLWithArrays()
1912*d4002b98SHong Zhang 
1913*d4002b98SHong Zhang  @*/
1914*d4002b98SHong Zhang PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A)
1915*d4002b98SHong Zhang {
1916*d4002b98SHong Zhang   PetscErrorCode ierr;
1917*d4002b98SHong Zhang 
1918*d4002b98SHong Zhang   PetscFunctionBegin;
1919*d4002b98SHong Zhang   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1920*d4002b98SHong Zhang   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1921*d4002b98SHong Zhang   ierr = MatSetType(*A,MATSEQSELL);CHKERRQ(ierr);
1922*d4002b98SHong Zhang   ierr = MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);CHKERRQ(ierr);
1923*d4002b98SHong Zhang   PetscFunctionReturn(0);
1924*d4002b98SHong Zhang }
1925*d4002b98SHong Zhang 
1926*d4002b98SHong Zhang PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg)
1927*d4002b98SHong Zhang {
1928*d4002b98SHong Zhang   Mat_SeqSELL     *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data;
1929*d4002b98SHong Zhang   PetscInt       totalslices=a->totalslices;
1930*d4002b98SHong Zhang   PetscErrorCode ierr;
1931*d4002b98SHong Zhang 
1932*d4002b98SHong Zhang   PetscFunctionBegin;
1933*d4002b98SHong Zhang   /* If the  matrix dimensions are not equal,or no of nonzeros */
1934*d4002b98SHong Zhang   if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) {
1935*d4002b98SHong Zhang     *flg = PETSC_FALSE;
1936*d4002b98SHong Zhang     PetscFunctionReturn(0);
1937*d4002b98SHong Zhang   }
1938*d4002b98SHong Zhang   /* if the a->colidx are the same */
1939*d4002b98SHong Zhang   ierr = PetscMemcmp(a->colidx,b->colidx,a->sliidx[totalslices]*sizeof(PetscInt),flg);CHKERRQ(ierr);
1940*d4002b98SHong Zhang   if (!*flg) PetscFunctionReturn(0);
1941*d4002b98SHong Zhang   /* if a->val are the same */
1942*d4002b98SHong Zhang   ierr = PetscMemcmp(a->val,b->val,a->sliidx[totalslices]*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1943*d4002b98SHong Zhang   PetscFunctionReturn(0);
1944*d4002b98SHong Zhang }
1945*d4002b98SHong Zhang 
1946*d4002b98SHong Zhang PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A)
1947*d4002b98SHong Zhang {
1948*d4002b98SHong Zhang   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1949*d4002b98SHong Zhang 
1950*d4002b98SHong Zhang   PetscFunctionBegin;
1951*d4002b98SHong Zhang   a->idiagvalid  = PETSC_FALSE;
1952*d4002b98SHong Zhang   a->ibdiagvalid = PETSC_FALSE;
1953*d4002b98SHong Zhang   PetscFunctionReturn(0);
1954*d4002b98SHong Zhang }
1955*d4002b98SHong Zhang 
1956*d4002b98SHong Zhang PetscErrorCode MatConjugate_SeqSELL(Mat A)
1957*d4002b98SHong Zhang {
1958*d4002b98SHong Zhang #if defined(PETSC_USE_COMPLEX)
1959*d4002b98SHong Zhang   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1960*d4002b98SHong Zhang   PetscInt    i;
1961*d4002b98SHong Zhang   PetscScalar *val = a->val;
1962*d4002b98SHong Zhang 
1963*d4002b98SHong Zhang   PetscFunctionBegin;
1964*d4002b98SHong Zhang   for (i=0; i<a->sliidx[a->totalslices]; i++) {
1965*d4002b98SHong Zhang     val[i] = PetscConj(val[i]);
1966*d4002b98SHong Zhang   }
1967*d4002b98SHong Zhang #else
1968*d4002b98SHong Zhang   PetscFunctionBegin;
1969*d4002b98SHong Zhang #endif
1970*d4002b98SHong Zhang   PetscFunctionReturn(0);
1971*d4002b98SHong Zhang }
1972