xref: /petsc/src/mat/impls/sell/seq/sell.c (revision d5e5b2e5ca62a8992987e93de468e8647daa7936)
1 
2 /*
3   Defines the basic matrix operations for the SELL matrix storage format.
4 */
5 #include <../src/mat/impls/sell/seq/sell.h>  /*I   "petscmat.h"  I*/
6 #include <petscblaslapack.h>
7 #include <petsc/private/kernels/blocktranspose.h>
8 #if defined(PETSC_HAVE_IMMINTRIN_H) && (defined(__AVX512F__) || (defined(__AVX2__) && defined(__FMA__)) || defined(__AVX__)) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
9 
10   #include <immintrin.h>
11 
12   #if !defined(_MM_SCALE_8)
13   #define _MM_SCALE_8    8
14   #endif
15 
16   #if defined(__AVX512F__)
17   /* these do not work
18    vec_idx  = _mm512_loadunpackhi_epi32(vec_idx,acolidx);
19    vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval);
20   */
21     #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \
22     /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \
23     vec_idx  = _mm256_loadu_si256((__m256i const*)acolidx); \
24     vec_vals = _mm512_loadu_pd(aval); \
25     vec_x    = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \
26     vec_y    = _mm512_fmadd_pd(vec_x,vec_vals,vec_y)
27   #elif defined(__AVX2__) && defined(__FMA__)
28     #define AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \
29     vec_vals = _mm256_loadu_pd(aval); \
30     vec_idx  = _mm_loadu_si128((__m128i const*)acolidx); /* SSE2 */ \
31     vec_x    = _mm256_i32gather_pd(x,vec_idx,_MM_SCALE_8); \
32     vec_y    = _mm256_fmadd_pd(vec_x,vec_vals,vec_y)
33   #endif
34 #endif  /* PETSC_HAVE_IMMINTRIN_H */
35 
36 /*@C
37  MatSeqSELLSetPreallocation - For good matrix assembly performance
38  the user should preallocate the matrix storage by setting the parameter nz
39  (or the array nnz).  By setting these parameters accurately, performance
40  during matrix assembly can be increased significantly.
41 
42  Collective
43 
44  Input Parameters:
45  +  B - The matrix
46  .  nz - number of nonzeros per row (same for all rows)
47  -  nnz - array containing the number of nonzeros in the various rows
48  (possibly different for each row) or NULL
49 
50  Notes:
51  If nnz is given then nz is ignored.
52 
53  Specify the preallocated storage with either nz or nnz (not both).
54  Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
55  allocation.  For large problems you MUST preallocate memory or you
56  will get TERRIBLE performance, see the users' manual chapter on matrices.
57 
58  You can call MatGetInfo() to get information on how effective the preallocation was;
59  for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
60  You can also run with the option -info and look for messages with the string
61  malloc in them to see if additional memory allocation was needed.
62 
63  Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix
64  entries or columns indices.
65 
66  The maximum number of nonzeos in any row should be as accurate as possible.
67  If it is underestimated, you will get bad performance due to reallocation
68  (MatSeqXSELLReallocateSELL).
69 
70  Level: intermediate
71 
72  .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo()
73 
74  @*/
75 PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[])
76 {
77   PetscErrorCode ierr;
78 
79   PetscFunctionBegin;
80   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
81   PetscValidType(B,1);
82   ierr = PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));CHKERRQ(ierr);
83   PetscFunctionReturn(0);
84 }
85 
86 PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[])
87 {
88   Mat_SeqSELL    *b;
89   PetscInt       i,j,totalslices;
90   PetscBool      skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE;
91   PetscErrorCode ierr;
92 
93   PetscFunctionBegin;
94   if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE;
95   if (maxallocrow == MAT_SKIP_ALLOCATION) {
96     skipallocation = PETSC_TRUE;
97     maxallocrow    = 0;
98   }
99 
100   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
101   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
102 
103   /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */
104   if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5;
105   if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %" PetscInt_FMT,maxallocrow);
106   if (rlen) {
107     for (i=0; i<B->rmap->n; i++) {
108       if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,rlen[i]);
109       if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT,i,rlen[i],B->cmap->n);
110     }
111   }
112 
113   B->preallocated = PETSC_TRUE;
114 
115   b = (Mat_SeqSELL*)B->data;
116 
117   totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */
118   b->totalslices = totalslices;
119   if (!skipallocation) {
120     if (B->rmap->n & 0x07) {ierr = PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %" PetscInt_FMT ")\n",B->rmap->n);CHKERRQ(ierr);}
121 
122     if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */
123       ierr = PetscMalloc1(totalslices+1,&b->sliidx);CHKERRQ(ierr);
124       ierr = PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr);
125     }
126     if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */
127       if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10;
128       else if (maxallocrow < 0) maxallocrow = 1;
129       for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow;
130     } else {
131       maxallocrow = 0;
132       b->sliidx[0] = 0;
133       for (i=1; i<totalslices; i++) {
134         b->sliidx[i] = 0;
135         for (j=0;j<8;j++) {
136           b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]);
137         }
138         maxallocrow = PetscMax(b->sliidx[i],maxallocrow);
139         ierr = PetscIntSumError(b->sliidx[i-1],8*b->sliidx[i],&b->sliidx[i]);CHKERRQ(ierr);
140       }
141       /* last slice */
142       b->sliidx[totalslices] = 0;
143       for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]);
144       maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow);
145       b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices];
146     }
147 
148     /* allocate space for val, colidx, rlen */
149     /* FIXME: should B's old memory be unlogged? */
150     ierr = MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);CHKERRQ(ierr);
151     /* FIXME: assuming an element of the bit array takes 8 bits */
152     ierr = PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);CHKERRQ(ierr);
153     ierr = PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
154     /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */
155     ierr = PetscCalloc1(8*totalslices,&b->rlen);CHKERRQ(ierr);
156     ierr = PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));CHKERRQ(ierr);
157 
158     b->singlemalloc = PETSC_TRUE;
159     b->free_val     = PETSC_TRUE;
160     b->free_colidx  = PETSC_TRUE;
161   } else {
162     b->free_val    = PETSC_FALSE;
163     b->free_colidx = PETSC_FALSE;
164   }
165 
166   b->nz               = 0;
167   b->maxallocrow      = maxallocrow;
168   b->rlenmax          = maxallocrow;
169   b->maxallocmat      = b->sliidx[totalslices];
170   B->info.nz_unneeded = (double)b->maxallocmat;
171   if (realalloc) {
172     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
173   }
174   PetscFunctionReturn(0);
175 }
176 
177 PetscErrorCode MatGetRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
178 {
179   Mat_SeqSELL *a = (Mat_SeqSELL*)A->data;
180   PetscInt    shift;
181 
182   PetscFunctionBegin;
183   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range",row);
184   if (nz) *nz = a->rlen[row];
185   shift = a->sliidx[row>>3]+(row&0x07);
186   if (!a->getrowcols) {
187     PetscErrorCode ierr;
188 
189     ierr = PetscMalloc2(a->rlenmax,&a->getrowcols,a->rlenmax,&a->getrowvals);CHKERRQ(ierr);
190   }
191   if (idx) {
192     PetscInt j;
193     for (j=0; j<a->rlen[row]; j++) a->getrowcols[j] = a->colidx[shift+8*j];
194     *idx = a->getrowcols;
195   }
196   if (v) {
197     PetscInt j;
198     for (j=0; j<a->rlen[row]; j++) a->getrowvals[j] = a->val[shift+8*j];
199     *v = a->getrowvals;
200   }
201   PetscFunctionReturn(0);
202 }
203 
204 PetscErrorCode MatRestoreRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
205 {
206   PetscFunctionBegin;
207   PetscFunctionReturn(0);
208 }
209 
210 PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
211 {
212   Mat            B;
213   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
214   PetscInt       i;
215   PetscErrorCode ierr;
216 
217   PetscFunctionBegin;
218   if (reuse == MAT_REUSE_MATRIX) {
219     B    = *newmat;
220     ierr = MatZeroEntries(B);CHKERRQ(ierr);
221   } else {
222     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
223     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
224     ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
225     ierr = MatSeqAIJSetPreallocation(B,0,a->rlen);CHKERRQ(ierr);
226   }
227 
228   for (i=0; i<A->rmap->n; i++) {
229     PetscInt    nz = 0,*cols = NULL;
230     PetscScalar *vals = NULL;
231 
232     ierr = MatGetRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
233     ierr = MatSetValues(B,1,&i,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
234     ierr = MatRestoreRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
235   }
236 
237   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
238   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
239   B->rmap->bs = A->rmap->bs;
240 
241   if (reuse == MAT_INPLACE_MATRIX) {
242     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
243   } else {
244     *newmat = B;
245   }
246   PetscFunctionReturn(0);
247 }
248 
249 #include <../src/mat/impls/aij/seq/aij.h>
250 
251 PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
252 {
253   Mat               B;
254   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)A->data;
255   PetscInt          *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols;
256   const PetscInt    *cols;
257   const PetscScalar *vals;
258   PetscErrorCode    ierr;
259 
260   PetscFunctionBegin;
261 
262   if (reuse == MAT_REUSE_MATRIX) {
263     B = *newmat;
264   } else {
265     if (PetscDefined(USE_DEBUG) || !a->ilen) {
266       ierr = PetscMalloc1(m,&rowlengths);CHKERRQ(ierr);
267       for (i=0; i<m; i++) {
268         rowlengths[i] = ai[i+1] - ai[i];
269       }
270     }
271     if (PetscDefined(USE_DEBUG) && a->ilen) {
272       PetscBool eq;
273       ierr = PetscMemcmp(rowlengths,a->ilen,m*sizeof(PetscInt),&eq);CHKERRQ(ierr);
274       if (!eq) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"SeqAIJ ilen array incorrect");
275       ierr = PetscFree(rowlengths);CHKERRQ(ierr);
276       rowlengths = a->ilen;
277     } else if (a->ilen) rowlengths = a->ilen;
278     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
279     ierr = MatSetSizes(B,m,n,m,n);CHKERRQ(ierr);
280     ierr = MatSetType(B,MATSEQSELL);CHKERRQ(ierr);
281     ierr = MatSeqSELLSetPreallocation(B,0,rowlengths);CHKERRQ(ierr);
282     if (rowlengths != a->ilen) {ierr = PetscFree(rowlengths);CHKERRQ(ierr);}
283   }
284 
285   for (row=0; row<m; row++) {
286     ierr = MatGetRow_SeqAIJ(A,row,&ncols,(PetscInt**)&cols,(PetscScalar**)&vals);CHKERRQ(ierr);
287     ierr = MatSetValues_SeqSELL(B,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
288     ierr = MatRestoreRow_SeqAIJ(A,row,&ncols,(PetscInt**)&cols,(PetscScalar**)&vals);CHKERRQ(ierr);
289   }
290   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
291   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
292   B->rmap->bs = A->rmap->bs;
293 
294   if (reuse == MAT_INPLACE_MATRIX) {
295     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
296   } else {
297     *newmat = B;
298   }
299   PetscFunctionReturn(0);
300 }
301 
302 PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy)
303 {
304   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
305   PetscScalar       *y;
306   const PetscScalar *x;
307   const MatScalar   *aval=a->val;
308   PetscInt          totalslices=a->totalslices;
309   const PetscInt    *acolidx=a->colidx;
310   PetscInt          i,j;
311   PetscErrorCode    ierr;
312 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
313   __m512d           vec_x,vec_y,vec_vals;
314   __m256i           vec_idx;
315   __mmask8          mask;
316   __m512d           vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4;
317   __m256i           vec_idx2,vec_idx3,vec_idx4;
318 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
319   __m128i           vec_idx;
320   __m256d           vec_x,vec_y,vec_y2,vec_vals;
321   MatScalar         yval;
322   PetscInt          r,rows_left,row,nnz_in_row;
323 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
324   __m128d           vec_x_tmp;
325   __m256d           vec_x,vec_y,vec_y2,vec_vals;
326   MatScalar         yval;
327   PetscInt          r,rows_left,row,nnz_in_row;
328 #else
329   PetscScalar       sum[8];
330 #endif
331 
332 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
333 #pragma disjoint(*x,*y,*aval)
334 #endif
335 
336   PetscFunctionBegin;
337   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
338   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
339 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
340   for (i=0; i<totalslices; i++) { /* loop over slices */
341     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
342     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
343 
344     vec_y  = _mm512_setzero_pd();
345     vec_y2 = _mm512_setzero_pd();
346     vec_y3 = _mm512_setzero_pd();
347     vec_y4 = _mm512_setzero_pd();
348 
349     j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */
350     switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) {
351     case 3:
352       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
353       acolidx += 8; aval += 8;
354       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
355       acolidx += 8; aval += 8;
356       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
357       acolidx += 8; aval += 8;
358       j += 3;
359       break;
360     case 2:
361       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
362       acolidx += 8; aval += 8;
363       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
364       acolidx += 8; aval += 8;
365       j += 2;
366       break;
367     case 1:
368       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
369       acolidx += 8; aval += 8;
370       j += 1;
371       break;
372     }
373     #pragma novector
374     for (; j<(a->sliidx[i+1]>>3); j+=4) {
375       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
376       acolidx += 8; aval += 8;
377       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
378       acolidx += 8; aval += 8;
379       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
380       acolidx += 8; aval += 8;
381       AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4);
382       acolidx += 8; aval += 8;
383     }
384 
385     vec_y = _mm512_add_pd(vec_y,vec_y2);
386     vec_y = _mm512_add_pd(vec_y,vec_y3);
387     vec_y = _mm512_add_pd(vec_y,vec_y4);
388     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
389       mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
390       _mm512_mask_storeu_pd(&y[8*i],mask,vec_y);
391     } else {
392       _mm512_storeu_pd(&y[8*i],vec_y);
393     }
394   }
395 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
396   for (i=0; i<totalslices; i++) { /* loop over full slices */
397     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
398     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
399 
400     /* last slice may have padding rows. Don't use vectorization. */
401     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
402       rows_left = A->rmap->n - 8*i;
403       for (r=0; r<rows_left; ++r) {
404         yval = (MatScalar)0;
405         row = 8*i + r;
406         nnz_in_row = a->rlen[row];
407         for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]];
408         y[row] = yval;
409       }
410       break;
411     }
412 
413     vec_y  = _mm256_setzero_pd();
414     vec_y2 = _mm256_setzero_pd();
415 
416     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
417     #pragma novector
418     #pragma unroll(2)
419     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
420       AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
421       aval += 4; acolidx += 4;
422       AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y2);
423       aval += 4; acolidx += 4;
424     }
425 
426     _mm256_storeu_pd(y+i*8,vec_y);
427     _mm256_storeu_pd(y+i*8+4,vec_y2);
428   }
429 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
430   for (i=0; i<totalslices; i++) { /* loop over full slices */
431     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
432     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
433 
434     vec_y  = _mm256_setzero_pd();
435     vec_y2 = _mm256_setzero_pd();
436 
437     /* last slice may have padding rows. Don't use vectorization. */
438     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
439       rows_left = A->rmap->n - 8*i;
440       for (r=0; r<rows_left; ++r) {
441         yval = (MatScalar)0;
442         row = 8*i + r;
443         nnz_in_row = a->rlen[row];
444         for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]];
445         y[row] = yval;
446       }
447       break;
448     }
449 
450     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
451     #pragma novector
452     #pragma unroll(2)
453     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
454       vec_vals  = _mm256_loadu_pd(aval);
455       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
456       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
457       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
458       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
459       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
460       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
461       vec_y     = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y);
462       aval     += 4;
463 
464       vec_vals  = _mm256_loadu_pd(aval);
465       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
466       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
467       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
468       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
469       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
470       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
471       vec_y2    = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2);
472       aval     += 4;
473     }
474 
475     _mm256_storeu_pd(y + i*8,     vec_y);
476     _mm256_storeu_pd(y + i*8 + 4, vec_y2);
477   }
478 #else
479   for (i=0; i<totalslices; i++) { /* loop over slices */
480     for (j=0; j<8; j++) sum[j] = 0.0;
481     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
482       sum[0] += aval[j] * x[acolidx[j]];
483       sum[1] += aval[j+1] * x[acolidx[j+1]];
484       sum[2] += aval[j+2] * x[acolidx[j+2]];
485       sum[3] += aval[j+3] * x[acolidx[j+3]];
486       sum[4] += aval[j+4] * x[acolidx[j+4]];
487       sum[5] += aval[j+5] * x[acolidx[j+5]];
488       sum[6] += aval[j+6] * x[acolidx[j+6]];
489       sum[7] += aval[j+7] * x[acolidx[j+7]];
490     }
491     if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
492       for (j=0; j<(A->rmap->n & 0x07); j++) y[8*i+j] = sum[j];
493     } else {
494       for (j=0; j<8; j++) y[8*i+j] = sum[j];
495     }
496   }
497 #endif
498 
499   ierr = PetscLogFlops(2.0*a->nz-a->nonzerorowcnt);CHKERRQ(ierr); /* theoretical minimal FLOPs */
500   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
501   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
502   PetscFunctionReturn(0);
503 }
504 
505 #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h>
506 PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz)
507 {
508   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
509   PetscScalar       *y,*z;
510   const PetscScalar *x;
511   const MatScalar   *aval=a->val;
512   PetscInt          totalslices=a->totalslices;
513   const PetscInt    *acolidx=a->colidx;
514   PetscInt          i,j;
515   PetscErrorCode    ierr;
516 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
517   __m512d           vec_x,vec_y,vec_vals;
518   __m256i           vec_idx;
519   __mmask8          mask;
520   __m512d           vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4;
521   __m256i           vec_idx2,vec_idx3,vec_idx4;
522 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
523   __m128d           vec_x_tmp;
524   __m256d           vec_x,vec_y,vec_y2,vec_vals;
525   MatScalar         yval;
526   PetscInt          r,row,nnz_in_row;
527 #else
528   PetscScalar       sum[8];
529 #endif
530 
531 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
532 #pragma disjoint(*x,*y,*aval)
533 #endif
534 
535   PetscFunctionBegin;
536   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
537   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
538 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
539   for (i=0; i<totalslices; i++) { /* loop over slices */
540     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
541     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
542 
543     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
544       mask   = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
545       vec_y  = _mm512_mask_loadu_pd(vec_y,mask,&y[8*i]);
546     } else {
547       vec_y  = _mm512_loadu_pd(&y[8*i]);
548     }
549     vec_y2 = _mm512_setzero_pd();
550     vec_y3 = _mm512_setzero_pd();
551     vec_y4 = _mm512_setzero_pd();
552 
553     j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */
554     switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) {
555     case 3:
556       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
557       acolidx += 8; aval += 8;
558       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
559       acolidx += 8; aval += 8;
560       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
561       acolidx += 8; aval += 8;
562       j += 3;
563       break;
564     case 2:
565       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
566       acolidx += 8; aval += 8;
567       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
568       acolidx += 8; aval += 8;
569       j += 2;
570       break;
571     case 1:
572       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
573       acolidx += 8; aval += 8;
574       j += 1;
575       break;
576     }
577     #pragma novector
578     for (; j<(a->sliidx[i+1]>>3); j+=4) {
579       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
580       acolidx += 8; aval += 8;
581       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
582       acolidx += 8; aval += 8;
583       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
584       acolidx += 8; aval += 8;
585       AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4);
586       acolidx += 8; aval += 8;
587     }
588 
589     vec_y = _mm512_add_pd(vec_y,vec_y2);
590     vec_y = _mm512_add_pd(vec_y,vec_y3);
591     vec_y = _mm512_add_pd(vec_y,vec_y4);
592     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
593       _mm512_mask_storeu_pd(&z[8*i],mask,vec_y);
594     } else {
595       _mm512_storeu_pd(&z[8*i],vec_y);
596     }
597   }
598 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
599   for (i=0; i<totalslices; i++) { /* loop over full slices */
600     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
601     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
602 
603     /* last slice may have padding rows. Don't use vectorization. */
604     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
605       for (r=0; r<(A->rmap->n & 0x07); ++r) {
606         row        = 8*i + r;
607         yval       = (MatScalar)0.0;
608         nnz_in_row = a->rlen[row];
609         for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]];
610         z[row] = y[row] + yval;
611       }
612       break;
613     }
614 
615     vec_y  = _mm256_loadu_pd(y+8*i);
616     vec_y2 = _mm256_loadu_pd(y+8*i+4);
617 
618     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
619     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
620       vec_vals  = _mm256_loadu_pd(aval);
621       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
622       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
623       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
624       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
625       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
626       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
627       vec_y     = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y);
628       aval     += 4;
629 
630       vec_vals  = _mm256_loadu_pd(aval);
631       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
632       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
633       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
634       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
635       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
636       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
637       vec_y2    = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2);
638       aval     += 4;
639     }
640 
641     _mm256_storeu_pd(z+i*8,vec_y);
642     _mm256_storeu_pd(z+i*8+4,vec_y2);
643   }
644 #else
645   for (i=0; i<totalslices; i++) { /* loop over slices */
646     for (j=0; j<8; j++) sum[j] = 0.0;
647     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
648       sum[0] += aval[j] * x[acolidx[j]];
649       sum[1] += aval[j+1] * x[acolidx[j+1]];
650       sum[2] += aval[j+2] * x[acolidx[j+2]];
651       sum[3] += aval[j+3] * x[acolidx[j+3]];
652       sum[4] += aval[j+4] * x[acolidx[j+4]];
653       sum[5] += aval[j+5] * x[acolidx[j+5]];
654       sum[6] += aval[j+6] * x[acolidx[j+6]];
655       sum[7] += aval[j+7] * x[acolidx[j+7]];
656     }
657     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
658       for (j=0; j<(A->rmap->n & 0x07); j++) z[8*i+j] = y[8*i+j] + sum[j];
659     } else {
660       for (j=0; j<8; j++) z[8*i+j] = y[8*i+j] + sum[j];
661     }
662   }
663 #endif
664 
665   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
666   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
667   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
668   PetscFunctionReturn(0);
669 }
670 
671 PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy)
672 {
673   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
674   PetscScalar       *y;
675   const PetscScalar *x;
676   const MatScalar   *aval=a->val;
677   const PetscInt    *acolidx=a->colidx;
678   PetscInt          i,j,r,row,nnz_in_row,totalslices=a->totalslices;
679   PetscErrorCode    ierr;
680 
681 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
682 #pragma disjoint(*x,*y,*aval)
683 #endif
684 
685   PetscFunctionBegin;
686   if (A->symmetric) {
687     ierr = MatMultAdd_SeqSELL(A,xx,zz,yy);CHKERRQ(ierr);
688     PetscFunctionReturn(0);
689   }
690   if (zz != yy) { ierr = VecCopy(zz,yy);CHKERRQ(ierr); }
691   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
692   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
693   for (i=0; i<a->totalslices; i++) { /* loop over slices */
694     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
695       for (r=0; r<(A->rmap->n & 0x07); ++r) {
696         row        = 8*i + r;
697         nnz_in_row = a->rlen[row];
698         for (j=0; j<nnz_in_row; ++j) y[acolidx[8*j+r]] += aval[8*j+r] * x[row];
699       }
700       break;
701     }
702     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
703       y[acolidx[j]]   += aval[j] * x[8*i];
704       y[acolidx[j+1]] += aval[j+1] * x[8*i+1];
705       y[acolidx[j+2]] += aval[j+2] * x[8*i+2];
706       y[acolidx[j+3]] += aval[j+3] * x[8*i+3];
707       y[acolidx[j+4]] += aval[j+4] * x[8*i+4];
708       y[acolidx[j+5]] += aval[j+5] * x[8*i+5];
709       y[acolidx[j+6]] += aval[j+6] * x[8*i+6];
710       y[acolidx[j+7]] += aval[j+7] * x[8*i+7];
711     }
712   }
713   ierr = PetscLogFlops(2.0*a->sliidx[a->totalslices]);CHKERRQ(ierr);
714   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
715   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
716   PetscFunctionReturn(0);
717 }
718 
719 PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy)
720 {
721   PetscErrorCode ierr;
722 
723   PetscFunctionBegin;
724   if (A->symmetric) {
725     ierr = MatMult_SeqSELL(A,xx,yy);CHKERRQ(ierr);
726   } else {
727     ierr = VecSet(yy,0.0);CHKERRQ(ierr);
728     ierr = MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);CHKERRQ(ierr);
729   }
730   PetscFunctionReturn(0);
731 }
732 
733 /*
734      Checks for missing diagonals
735 */
736 PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool  *missing,PetscInt *d)
737 {
738   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
739   PetscInt       *diag,i;
740   PetscErrorCode ierr;
741 
742   PetscFunctionBegin;
743   *missing = PETSC_FALSE;
744   if (A->rmap->n > 0 && !(a->colidx)) {
745     *missing = PETSC_TRUE;
746     if (d) *d = 0;
747     ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
748   } else {
749     diag = a->diag;
750     for (i=0; i<A->rmap->n; i++) {
751       if (diag[i] == -1) {
752         *missing = PETSC_TRUE;
753         if (d) *d = i;
754         ierr = PetscInfo1(A,"Matrix is missing diagonal number %" PetscInt_FMT "\n",i);CHKERRQ(ierr);
755         break;
756       }
757     }
758   }
759   PetscFunctionReturn(0);
760 }
761 
762 PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A)
763 {
764   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
765   PetscInt       i,j,m=A->rmap->n,shift;
766   PetscErrorCode ierr;
767 
768   PetscFunctionBegin;
769   if (!a->diag) {
770     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
771     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
772     a->free_diag = PETSC_TRUE;
773   }
774   for (i=0; i<m; i++) { /* loop over rows */
775     shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
776     a->diag[i] = -1;
777     for (j=0; j<a->rlen[i]; j++) {
778       if (a->colidx[shift+j*8] == i) {
779         a->diag[i] = shift+j*8;
780         break;
781       }
782     }
783   }
784   PetscFunctionReturn(0);
785 }
786 
787 /*
788   Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways
789 */
790 PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift)
791 {
792   Mat_SeqSELL    *a=(Mat_SeqSELL*) A->data;
793   PetscInt       i,*diag,m = A->rmap->n;
794   MatScalar      *val = a->val;
795   PetscScalar    *idiag,*mdiag;
796   PetscErrorCode ierr;
797 
798   PetscFunctionBegin;
799   if (a->idiagvalid) PetscFunctionReturn(0);
800   ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr);
801   diag = a->diag;
802   if (!a->idiag) {
803     ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr);
804     ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr);
805     val  = a->val;
806   }
807   mdiag = a->mdiag;
808   idiag = a->idiag;
809 
810   if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) {
811     for (i=0; i<m; i++) {
812       mdiag[i] = val[diag[i]];
813       if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */
814         if (PetscRealPart(fshift)) {
815           ierr = PetscInfo1(A,"Zero diagonal on row %" PetscInt_FMT "\n",i);CHKERRQ(ierr);
816           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
817           A->factorerror_zeropivot_value = 0.0;
818           A->factorerror_zeropivot_row   = i;
819         } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %" PetscInt_FMT,i);
820       }
821       idiag[i] = 1.0/val[diag[i]];
822     }
823     ierr = PetscLogFlops(m);CHKERRQ(ierr);
824   } else {
825     for (i=0; i<m; i++) {
826       mdiag[i] = val[diag[i]];
827       idiag[i] = omega/(fshift + val[diag[i]]);
828     }
829     ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr);
830   }
831   a->idiagvalid = PETSC_TRUE;
832   PetscFunctionReturn(0);
833 }
834 
835 PetscErrorCode MatZeroEntries_SeqSELL(Mat A)
836 {
837   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
838   PetscErrorCode ierr;
839 
840   PetscFunctionBegin;
841   ierr = PetscArrayzero(a->val,a->sliidx[a->totalslices]);CHKERRQ(ierr);
842   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
843   PetscFunctionReturn(0);
844 }
845 
846 PetscErrorCode MatDestroy_SeqSELL(Mat A)
847 {
848   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
849   PetscErrorCode ierr;
850 
851   PetscFunctionBegin;
852 #if defined(PETSC_USE_LOG)
853   PetscLogObjectState((PetscObject)A,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT,A->rmap->n,A->cmap->n,a->nz);
854 #endif
855   ierr = MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);CHKERRQ(ierr);
856   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
857   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
858   ierr = PetscFree(a->diag);CHKERRQ(ierr);
859   ierr = PetscFree(a->rlen);CHKERRQ(ierr);
860   ierr = PetscFree(a->sliidx);CHKERRQ(ierr);
861   ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr);
862   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
863   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
864   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
865   ierr = PetscFree2(a->getrowcols,a->getrowvals);CHKERRQ(ierr);
866 
867   ierr = PetscFree(A->data);CHKERRQ(ierr);
868 
869   ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr);
870   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
871   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
872   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);CHKERRQ(ierr);
873   PetscFunctionReturn(0);
874 }
875 
876 PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg)
877 {
878   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
879   PetscErrorCode ierr;
880 
881   PetscFunctionBegin;
882   switch (op) {
883   case MAT_ROW_ORIENTED:
884     a->roworiented = flg;
885     break;
886   case MAT_KEEP_NONZERO_PATTERN:
887     a->keepnonzeropattern = flg;
888     break;
889   case MAT_NEW_NONZERO_LOCATIONS:
890     a->nonew = (flg ? 0 : 1);
891     break;
892   case MAT_NEW_NONZERO_LOCATION_ERR:
893     a->nonew = (flg ? -1 : 0);
894     break;
895   case MAT_NEW_NONZERO_ALLOCATION_ERR:
896     a->nonew = (flg ? -2 : 0);
897     break;
898   case MAT_UNUSED_NONZERO_LOCATION_ERR:
899     a->nounused = (flg ? -1 : 0);
900     break;
901   case MAT_FORCE_DIAGONAL_ENTRIES:
902   case MAT_IGNORE_OFF_PROC_ENTRIES:
903   case MAT_USE_HASH_TABLE:
904   case MAT_SORTED_FULL:
905     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
906     break;
907   case MAT_SPD:
908   case MAT_SYMMETRIC:
909   case MAT_STRUCTURALLY_SYMMETRIC:
910   case MAT_HERMITIAN:
911   case MAT_SYMMETRY_ETERNAL:
912     /* These options are handled directly by MatSetOption() */
913     break;
914   default:
915     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
916   }
917   PetscFunctionReturn(0);
918 }
919 
920 PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v)
921 {
922   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
923   PetscInt       i,j,n,shift;
924   PetscScalar    *x,zero=0.0;
925   PetscErrorCode ierr;
926 
927   PetscFunctionBegin;
928   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
929   if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
930 
931   if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) {
932     PetscInt *diag=a->diag;
933     ierr = VecGetArray(v,&x);CHKERRQ(ierr);
934     for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]];
935     ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
936     PetscFunctionReturn(0);
937   }
938 
939   ierr = VecSet(v,zero);CHKERRQ(ierr);
940   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
941   for (i=0; i<n; i++) { /* loop over rows */
942     shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
943     x[i] = 0;
944     for (j=0; j<a->rlen[i]; j++) {
945       if (a->colidx[shift+j*8] == i) {
946         x[i] = a->val[shift+j*8];
947         break;
948       }
949     }
950   }
951   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
952   PetscFunctionReturn(0);
953 }
954 
955 PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr)
956 {
957   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
958   const PetscScalar *l,*r;
959   PetscInt          i,j,m,n,row;
960   PetscErrorCode    ierr;
961 
962   PetscFunctionBegin;
963   if (ll) {
964     /* The local size is used so that VecMPI can be passed to this routine
965        by MatDiagonalScale_MPISELL */
966     ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr);
967     if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
968     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
969     for (i=0; i<a->totalslices; i++) { /* loop over slices */
970       if (i == a->totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
971         for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
972           if (row < (A->rmap->n & 0x07)) a->val[j] *= l[8*i+row];
973         }
974       } else {
975         for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
976           a->val[j] *= l[8*i+row];
977         }
978       }
979     }
980     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
981     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
982   }
983   if (rr) {
984     ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr);
985     if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
986     ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr);
987     for (i=0; i<a->totalslices; i++) { /* loop over slices */
988       if (i == a->totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
989         for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
990           if (row < (A->rmap->n & 0x07)) a->val[j] *= r[a->colidx[j]];
991         }
992       } else {
993         for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) {
994           a->val[j] *= r[a->colidx[j]];
995         }
996       }
997     }
998     ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr);
999     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1000   }
1001   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1006 
1007 PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1008 {
1009   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1010   PetscInt    *cp,i,k,low,high,t,row,col,l;
1011   PetscInt    shift;
1012   MatScalar   *vp;
1013 
1014   PetscFunctionBegin;
1015   for (k=0; k<m; k++) { /* loop over requested rows */
1016     row = im[k];
1017     if (row<0) continue;
1018     if (PetscUnlikelyDebug(row >= A->rmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,row,A->rmap->n-1);
1019     shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
1020     cp = a->colidx+shift; /* pointer to the row */
1021     vp = a->val+shift; /* pointer to the row */
1022     for (l=0; l<n; l++) { /* loop over requested columns */
1023       col = in[l];
1024       if (col<0) continue;
1025       if (PetscUnlikelyDebug(col >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %" PetscInt_FMT " max %" PetscInt_FMT,col,A->cmap->n-1);
1026       high = a->rlen[row]; low = 0; /* assume unsorted */
1027       while (high-low > 5) {
1028         t = (low+high)/2;
1029         if (*(cp+t*8) > col) high = t;
1030         else low = t;
1031       }
1032       for (i=low; i<high; i++) {
1033         if (*(cp+8*i) > col) break;
1034         if (*(cp+8*i) == col) {
1035           *v++ = *(vp+8*i);
1036           goto finished;
1037         }
1038       }
1039       *v++ = 0.0;
1040     finished:;
1041     }
1042   }
1043   PetscFunctionReturn(0);
1044 }
1045 
1046 PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer)
1047 {
1048   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
1049   PetscInt          i,j,m=A->rmap->n,shift;
1050   const char        *name;
1051   PetscViewerFormat format;
1052   PetscErrorCode    ierr;
1053 
1054   PetscFunctionBegin;
1055   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1056   if (format == PETSC_VIEWER_ASCII_MATLAB) {
1057     PetscInt nofinalvalue = 0;
1058     /*
1059     if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) {
1060       nofinalvalue = 1;
1061     }
1062     */
1063     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1064     ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n",m,A->cmap->n);CHKERRQ(ierr);
1065     ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %" PetscInt_FMT " \n",a->nz);CHKERRQ(ierr);
1066 #if defined(PETSC_USE_COMPLEX)
1067     ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%" PetscInt_FMT ",4);\n",a->nz+nofinalvalue);CHKERRQ(ierr);
1068 #else
1069     ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%" PetscInt_FMT ",3);\n",a->nz+nofinalvalue);CHKERRQ(ierr);
1070 #endif
1071     ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr);
1072 
1073     for (i=0; i<m; i++) {
1074       shift = a->sliidx[i>>3]+(i&0x07);
1075       for (j=0; j<a->rlen[i]; j++) {
1076 #if defined(PETSC_USE_COMPLEX)
1077         ierr = PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1078 #else
1079         ierr = PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);CHKERRQ(ierr);
1080 #endif
1081       }
1082     }
1083     /*
1084     if (nofinalvalue) {
1085 #if defined(PETSC_USE_COMPLEX)
1086       ierr = PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr);
1087 #else
1088       ierr = PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr);
1089 #endif
1090     }
1091     */
1092     ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
1093     ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr);
1094     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1095   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
1096     PetscFunctionReturn(0);
1097   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1098     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1099     for (i=0; i<m; i++) {
1100       ierr = PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i);CHKERRQ(ierr);
1101       shift = a->sliidx[i>>3]+(i&0x07);
1102       for (j=0; j<a->rlen[i]; j++) {
1103 #if defined(PETSC_USE_COMPLEX)
1104         if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
1105           ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1106         } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
1107           ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1108         } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) {
1109           ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr);
1110         }
1111 #else
1112         if (a->val[shift+8*j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);}
1113 #endif
1114       }
1115       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1116     }
1117     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1118   } else if (format == PETSC_VIEWER_ASCII_DENSE) {
1119     PetscInt    cnt=0,jcnt;
1120     PetscScalar value;
1121 #if defined(PETSC_USE_COMPLEX)
1122     PetscBool   realonly=PETSC_TRUE;
1123     for (i=0; i<a->sliidx[a->totalslices]; i++) {
1124       if (PetscImaginaryPart(a->val[i]) != 0.0) {
1125         realonly = PETSC_FALSE;
1126         break;
1127       }
1128     }
1129 #endif
1130 
1131     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1132     for (i=0; i<m; i++) {
1133       jcnt = 0;
1134       shift = a->sliidx[i>>3]+(i&0x07);
1135       for (j=0; j<A->cmap->n; j++) {
1136         if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) {
1137           value = a->val[cnt++];
1138           jcnt++;
1139         } else {
1140           value = 0.0;
1141         }
1142 #if defined(PETSC_USE_COMPLEX)
1143         if (realonly) {
1144           ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr);
1145         } else {
1146           ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr);
1147         }
1148 #else
1149         ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr);
1150 #endif
1151       }
1152       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1153     }
1154     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1155   } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) {
1156     PetscInt fshift=1;
1157     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1158 #if defined(PETSC_USE_COMPLEX)
1159     ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr);
1160 #else
1161     ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr);
1162 #endif
1163     ierr = PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", m, A->cmap->n, a->nz);CHKERRQ(ierr);
1164     for (i=0; i<m; i++) {
1165       shift = a->sliidx[i>>3]+(i&0x07);
1166       for (j=0; j<a->rlen[i]; j++) {
1167 #if defined(PETSC_USE_COMPLEX)
1168         ierr = PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1169 #else
1170         ierr = PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);CHKERRQ(ierr);
1171 #endif
1172       }
1173     }
1174     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1175   } else if (format == PETSC_VIEWER_NATIVE) {
1176     for (i=0; i<a->totalslices; i++) { /* loop over slices */
1177       PetscInt row;
1178       ierr = PetscViewerASCIIPrintf(viewer,"slice %" PetscInt_FMT ": %" PetscInt_FMT " %" PetscInt_FMT "\n",i,a->sliidx[i],a->sliidx[i+1]);CHKERRQ(ierr);
1179       for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
1180 #if defined(PETSC_USE_COMPLEX)
1181         if (PetscImaginaryPart(a->val[j]) > 0.0) {
1182           ierr = PetscViewerASCIIPrintf(viewer,"  %" PetscInt_FMT " %" PetscInt_FMT " %g + %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr);
1183         } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1184           ierr = PetscViewerASCIIPrintf(viewer,"  %" PetscInt_FMT " %" PetscInt_FMT " %g - %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),-(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr);
1185         } else {
1186           ierr = PetscViewerASCIIPrintf(viewer,"  %" PetscInt_FMT " %" PetscInt_FMT " %g\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr);
1187         }
1188 #else
1189         ierr = PetscViewerASCIIPrintf(viewer,"  %" PetscInt_FMT " %" PetscInt_FMT " %g\n",8*i+row,a->colidx[j],(double)a->val[j]);CHKERRQ(ierr);
1190 #endif
1191       }
1192     }
1193   } else {
1194     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1195     if (A->factortype) {
1196       for (i=0; i<m; i++) {
1197         shift = a->sliidx[i>>3]+(i&0x07);
1198         ierr = PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i);CHKERRQ(ierr);
1199         /* L part */
1200         for (j=shift; j<a->diag[i]; j+=8) {
1201 #if defined(PETSC_USE_COMPLEX)
1202           if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) {
1203             ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr);
1204           } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) {
1205             ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr);
1206           } else {
1207             ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr);
1208           }
1209 #else
1210           ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr);
1211 #endif
1212         }
1213         /* diagonal */
1214         j = a->diag[i];
1215 #if defined(PETSC_USE_COMPLEX)
1216         if (PetscImaginaryPart(a->val[j]) > 0.0) {
1217           ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));CHKERRQ(ierr);
1218         } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1219           ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));CHKERRQ(ierr);
1220         } else {
1221           ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));CHKERRQ(ierr);
1222         }
1223 #else
1224         ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[j],(double)(1.0/a->val[j]));CHKERRQ(ierr);
1225 #endif
1226 
1227         /* U part */
1228         for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) {
1229 #if defined(PETSC_USE_COMPLEX)
1230           if (PetscImaginaryPart(a->val[j]) > 0.0) {
1231             ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr);
1232           } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1233             ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr);
1234           } else {
1235             ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr);
1236           }
1237 #else
1238           ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr);
1239 #endif
1240         }
1241         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1242       }
1243     } else {
1244       for (i=0; i<m; i++) {
1245         shift = a->sliidx[i>>3]+(i&0x07);
1246         ierr = PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i);CHKERRQ(ierr);
1247         for (j=0; j<a->rlen[i]; j++) {
1248 #if defined(PETSC_USE_COMPLEX)
1249           if (PetscImaginaryPart(a->val[j]) > 0.0) {
1250             ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1251           } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1252             ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1253           } else {
1254             ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr);
1255           }
1256 #else
1257           ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);
1258 #endif
1259         }
1260         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1261       }
1262     }
1263     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1264   }
1265   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #include <petscdraw.h>
1270 PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa)
1271 {
1272   Mat               A=(Mat)Aa;
1273   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
1274   PetscInt          i,j,m=A->rmap->n,shift;
1275   int               color;
1276   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1277   PetscViewer       viewer;
1278   PetscViewerFormat format;
1279   PetscErrorCode    ierr;
1280 
1281   PetscFunctionBegin;
1282   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1283   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1284   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1285 
1286   /* loop over matrix elements drawing boxes */
1287 
1288   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1289     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1290     /* Blue for negative, Cyan for zero and  Red for positive */
1291     color = PETSC_DRAW_BLUE;
1292     for (i=0; i<m; i++) {
1293       shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1294       y_l = m - i - 1.0; y_r = y_l + 1.0;
1295       for (j=0; j<a->rlen[i]; j++) {
1296         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1297         if (PetscRealPart(a->val[shift+8*j]) >=  0.) continue;
1298         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1299       }
1300     }
1301     color = PETSC_DRAW_CYAN;
1302     for (i=0; i<m; i++) {
1303       shift = a->sliidx[i>>3]+(i&0x07);
1304       y_l = m - i - 1.0; y_r = y_l + 1.0;
1305       for (j=0; j<a->rlen[i]; j++) {
1306         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1307         if (a->val[shift+8*j] !=  0.) continue;
1308         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1309       }
1310     }
1311     color = PETSC_DRAW_RED;
1312     for (i=0; i<m; i++) {
1313       shift = a->sliidx[i>>3]+(i&0x07);
1314       y_l = m - i - 1.0; y_r = y_l + 1.0;
1315       for (j=0; j<a->rlen[i]; j++) {
1316         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1317         if (PetscRealPart(a->val[shift+8*j]) <=  0.) continue;
1318         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1319       }
1320     }
1321     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1322   } else {
1323     /* use contour shading to indicate magnitude of values */
1324     /* first determine max of all nonzero values */
1325     PetscReal minv=0.0,maxv=0.0;
1326     PetscInt  count=0;
1327     PetscDraw popup;
1328     for (i=0; i<a->sliidx[a->totalslices]; i++) {
1329       if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]);
1330     }
1331     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1332     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1333     ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr);
1334 
1335     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1336     for (i=0; i<m; i++) {
1337       shift = a->sliidx[i>>3]+(i&0x07);
1338       y_l = m - i - 1.0;
1339       y_r = y_l + 1.0;
1340       for (j=0; j<a->rlen[i]; j++) {
1341         x_l = a->colidx[shift+j*8];
1342         x_r = x_l + 1.0;
1343         color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv);
1344         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1345         count++;
1346       }
1347     }
1348     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1349   }
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #include <petscdraw.h>
1354 PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer)
1355 {
1356   PetscDraw      draw;
1357   PetscReal      xr,yr,xl,yl,h,w;
1358   PetscBool      isnull;
1359   PetscErrorCode ierr;
1360 
1361   PetscFunctionBegin;
1362   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1363   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1364   if (isnull) PetscFunctionReturn(0);
1365 
1366   xr   = A->cmap->n; yr  = A->rmap->n; h = yr/10.0; w = xr/10.0;
1367   xr  += w;          yr += h;         xl = -w;     yl = -h;
1368   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1369   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1370   ierr = PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);CHKERRQ(ierr);
1371   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1372   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1373   PetscFunctionReturn(0);
1374 }
1375 
1376 PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer)
1377 {
1378   PetscBool      iascii,isbinary,isdraw;
1379   PetscErrorCode ierr;
1380 
1381   PetscFunctionBegin;
1382   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1383   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1384   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1385   if (iascii) {
1386     ierr = MatView_SeqSELL_ASCII(A,viewer);CHKERRQ(ierr);
1387   } else if (isbinary) {
1388     /* ierr = MatView_SeqSELL_Binary(A,viewer);CHKERRQ(ierr); */
1389   } else if (isdraw) {
1390     ierr = MatView_SeqSELL_Draw(A,viewer);CHKERRQ(ierr);
1391   }
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode)
1396 {
1397   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1398   PetscInt       i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k;
1399   MatScalar      *vp;
1400   PetscErrorCode ierr;
1401 
1402   PetscFunctionBegin;
1403   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1404   /* To do: compress out the unused elements */
1405   ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr);
1406   ierr = PetscInfo6(A,"Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " allocated %" PetscInt_FMT " used (%" PetscInt_FMT " nonzeros+%" PetscInt_FMT " paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);CHKERRQ(ierr);
1407   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n",a->reallocs);CHKERRQ(ierr);
1408   ierr = PetscInfo1(A,"Maximum nonzeros in any row is %" PetscInt_FMT "\n",a->rlenmax);CHKERRQ(ierr);
1409   /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */
1410   for (i=0; i<a->totalslices; ++i) {
1411     shift = a->sliidx[i];    /* starting index of the slice */
1412     cp    = a->colidx+shift; /* pointer to the column indices of the slice */
1413     vp    = a->val+shift;    /* pointer to the nonzero values of the slice */
1414     for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */
1415       row  = 8*i + row_in_slice;
1416       nrow = a->rlen[row]; /* number of nonzeros in row */
1417       /*
1418         Search for the nearest nonzero. Normally setting the index to zero may cause extra communication.
1419         But if the entire slice are empty, it is fine to use 0 since the index will not be loaded.
1420       */
1421       lastcol = 0;
1422       if (nrow>0) { /* nonempty row */
1423         lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */
1424       } else if (!row_in_slice) { /* first row of the currect slice is empty */
1425         for (j=1;j<8;j++) {
1426           if (a->rlen[8*i+j]) {
1427             lastcol = cp[j];
1428             break;
1429           }
1430         }
1431       } else {
1432         if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */
1433       }
1434 
1435       for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) {
1436         cp[8*k+row_in_slice] = lastcol;
1437         vp[8*k+row_in_slice] = (MatScalar)0;
1438       }
1439     }
1440   }
1441 
1442   A->info.mallocs += a->reallocs;
1443   a->reallocs      = 0;
1444 
1445   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
1446   PetscFunctionReturn(0);
1447 }
1448 
1449 PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info)
1450 {
1451   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1452 
1453   PetscFunctionBegin;
1454   info->block_size   = 1.0;
1455   info->nz_allocated = a->maxallocmat;
1456   info->nz_used      = a->sliidx[a->totalslices]; /* include padding zeros */
1457   info->nz_unneeded  = (a->maxallocmat-a->sliidx[a->totalslices]);
1458   info->assemblies   = A->num_ass;
1459   info->mallocs      = A->info.mallocs;
1460   info->memory       = ((PetscObject)A)->mem;
1461   if (A->factortype) {
1462     info->fill_ratio_given  = A->info.fill_ratio_given;
1463     info->fill_ratio_needed = A->info.fill_ratio_needed;
1464     info->factor_mallocs    = A->info.factor_mallocs;
1465   } else {
1466     info->fill_ratio_given  = 0;
1467     info->fill_ratio_needed = 0;
1468     info->factor_mallocs    = 0;
1469   }
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1474 {
1475   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1476   PetscInt       shift,i,k,l,low,high,t,ii,row,col,nrow;
1477   PetscInt       *cp,nonew=a->nonew,lastcol=-1;
1478   MatScalar      *vp,value;
1479   PetscErrorCode ierr;
1480 
1481   PetscFunctionBegin;
1482   for (k=0; k<m; k++) { /* loop over added rows */
1483     row = im[k];
1484     if (row < 0) continue;
1485     if (PetscUnlikelyDebug(row >= A->rmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,row,A->rmap->n-1);
1486     shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
1487     cp    = a->colidx+shift; /* pointer to the row */
1488     vp    = a->val+shift; /* pointer to the row */
1489     nrow  = a->rlen[row];
1490     low   = 0;
1491     high  = nrow;
1492 
1493     for (l=0; l<n; l++) { /* loop over added columns */
1494       col = in[l];
1495       if (col<0) continue;
1496       if (PetscUnlikelyDebug(col >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %" PetscInt_FMT " max %" PetscInt_FMT,col,A->cmap->n-1);
1497       if (a->roworiented) {
1498         value = v[l+k*n];
1499       } else {
1500         value = v[k+l*m];
1501       }
1502       if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue;
1503 
1504       /* search in this row for the specified colmun, i indicates the column to be set */
1505       if (col <= lastcol) low = 0;
1506       else high = nrow;
1507       lastcol = col;
1508       while (high-low > 5) {
1509         t = (low+high)/2;
1510         if (*(cp+t*8) > col) high = t;
1511         else low = t;
1512       }
1513       for (i=low; i<high; i++) {
1514         if (*(cp+i*8) > col) break;
1515         if (*(cp+i*8) == col) {
1516           if (is == ADD_VALUES) *(vp+i*8) += value;
1517           else *(vp+i*8) = value;
1518           low = i + 1;
1519           goto noinsert;
1520         }
1521       }
1522       if (value == 0.0 && a->ignorezeroentries) goto noinsert;
1523       if (nonew == 1) goto noinsert;
1524       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
1525       /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */
1526       MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar);
1527       /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */
1528       for (ii=nrow-1; ii>=i; ii--) {
1529         *(cp+(ii+1)*8) = *(cp+ii*8);
1530         *(vp+(ii+1)*8) = *(vp+ii*8);
1531       }
1532       a->rlen[row]++;
1533       *(cp+i*8) = col;
1534       *(vp+i*8) = value;
1535       a->nz++;
1536       A->nonzerostate++;
1537       low = i+1; high++; nrow++;
1538 noinsert:;
1539     }
1540     a->rlen[row] = nrow;
1541   }
1542   PetscFunctionReturn(0);
1543 }
1544 
1545 PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str)
1546 {
1547   PetscErrorCode ierr;
1548 
1549   PetscFunctionBegin;
1550   /* If the two matrices have the same copy implementation, use fast copy. */
1551   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1552     Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1553     Mat_SeqSELL *b=(Mat_SeqSELL*)B->data;
1554 
1555     if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1556     ierr = PetscArraycpy(b->val,a->val,a->sliidx[a->totalslices]);CHKERRQ(ierr);
1557   } else {
1558     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1559   }
1560   PetscFunctionReturn(0);
1561 }
1562 
1563 PetscErrorCode MatSetUp_SeqSELL(Mat A)
1564 {
1565   PetscErrorCode ierr;
1566 
1567   PetscFunctionBegin;
1568   ierr = MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
1569   PetscFunctionReturn(0);
1570 }
1571 
1572 PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[])
1573 {
1574   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1575 
1576   PetscFunctionBegin;
1577   *array = a->val;
1578   PetscFunctionReturn(0);
1579 }
1580 
1581 PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[])
1582 {
1583   PetscFunctionBegin;
1584   PetscFunctionReturn(0);
1585 }
1586 
1587 PetscErrorCode MatRealPart_SeqSELL(Mat A)
1588 {
1589   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1590   PetscInt    i;
1591   MatScalar   *aval=a->val;
1592 
1593   PetscFunctionBegin;
1594   for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]);
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 PetscErrorCode MatImaginaryPart_SeqSELL(Mat A)
1599 {
1600   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1601   PetscInt       i;
1602   MatScalar      *aval=a->val;
1603   PetscErrorCode ierr;
1604 
1605   PetscFunctionBegin;
1606   for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]);
1607   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
1608   PetscFunctionReturn(0);
1609 }
1610 
1611 PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha)
1612 {
1613   Mat_SeqSELL    *a=(Mat_SeqSELL*)inA->data;
1614   MatScalar      *aval=a->val;
1615   PetscScalar    oalpha=alpha;
1616   PetscBLASInt   one=1,size;
1617   PetscErrorCode ierr;
1618 
1619   PetscFunctionBegin;
1620   ierr = PetscBLASIntCast(a->sliidx[a->totalslices],&size);CHKERRQ(ierr);
1621   PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one));
1622   ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1623   ierr = MatSeqSELLInvalidateDiagonal(inA);CHKERRQ(ierr);
1624   PetscFunctionReturn(0);
1625 }
1626 
1627 PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a)
1628 {
1629   Mat_SeqSELL    *y=(Mat_SeqSELL*)Y->data;
1630   PetscErrorCode ierr;
1631 
1632   PetscFunctionBegin;
1633   if (!Y->preallocated || !y->nz) {
1634     ierr = MatSeqSELLSetPreallocation(Y,1,NULL);CHKERRQ(ierr);
1635   }
1636   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1637   PetscFunctionReturn(0);
1638 }
1639 
1640 PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1641 {
1642   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
1643   PetscScalar       *x,sum,*t;
1644   const MatScalar   *idiag=NULL,*mdiag;
1645   const PetscScalar *b,*xb;
1646   PetscInt          n,m=A->rmap->n,i,j,shift;
1647   const PetscInt    *diag;
1648   PetscErrorCode    ierr;
1649 
1650   PetscFunctionBegin;
1651   its = its*lits;
1652 
1653   if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */
1654   if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqSELL(A,omega,fshift);CHKERRQ(ierr);}
1655   a->fshift = fshift;
1656   a->omega  = omega;
1657 
1658   diag  = a->diag;
1659   t     = a->ssor_work;
1660   idiag = a->idiag;
1661   mdiag = a->mdiag;
1662 
1663   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1664   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1665   /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */
1666   if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented");
1667   if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented");
1668   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
1669 
1670   if (flag & SOR_ZERO_INITIAL_GUESS) {
1671     if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1672       for (i=0; i<m; i++) {
1673         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1674         sum   = b[i];
1675         n     = (diag[i]-shift)/8;
1676         for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1677         t[i]  = sum;
1678         x[i]  = sum*idiag[i];
1679       }
1680       xb   = t;
1681       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1682     } else xb = b;
1683     if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1684       for (i=m-1; i>=0; i--) {
1685         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1686         sum   = xb[i];
1687         n     = a->rlen[i]-(diag[i]-shift)/8-1;
1688         for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1689         if (xb == b) {
1690           x[i] = sum*idiag[i];
1691         } else {
1692           x[i] = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1693         }
1694       }
1695       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */
1696     }
1697     its--;
1698   }
1699   while (its--) {
1700     if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1701       for (i=0; i<m; i++) {
1702         /* lower */
1703         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1704         sum   = b[i];
1705         n     = (diag[i]-shift)/8;
1706         for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1707         t[i]  = sum;             /* save application of the lower-triangular part */
1708         /* upper */
1709         n     = a->rlen[i]-(diag[i]-shift)/8-1;
1710         for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1711         x[i]  = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1712       }
1713       xb   = t;
1714       ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1715     } else xb = b;
1716     if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1717       for (i=m-1; i>=0; i--) {
1718         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1719         sum = xb[i];
1720         if (xb == b) {
1721           /* whole matrix (no checkpointing available) */
1722           n     = a->rlen[i];
1723           for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1724           x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i];
1725         } else { /* lower-triangular part has been saved, so only apply upper-triangular */
1726           n     = a->rlen[i]-(diag[i]-shift)/8-1;
1727           for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1728           x[i]  = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1729         }
1730       }
1731       if (xb == b) {
1732         ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1733       } else {
1734         ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */
1735       }
1736     }
1737   }
1738   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1739   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 /* -------------------------------------------------------------------*/
1744 static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL,
1745                                        MatGetRow_SeqSELL,
1746                                        MatRestoreRow_SeqSELL,
1747                                        MatMult_SeqSELL,
1748                                /* 4*/  MatMultAdd_SeqSELL,
1749                                        MatMultTranspose_SeqSELL,
1750                                        MatMultTransposeAdd_SeqSELL,
1751                                        NULL,
1752                                        NULL,
1753                                        NULL,
1754                                /* 10*/ NULL,
1755                                        NULL,
1756                                        NULL,
1757                                        MatSOR_SeqSELL,
1758                                        NULL,
1759                                /* 15*/ MatGetInfo_SeqSELL,
1760                                        MatEqual_SeqSELL,
1761                                        MatGetDiagonal_SeqSELL,
1762                                        MatDiagonalScale_SeqSELL,
1763                                        NULL,
1764                                /* 20*/ NULL,
1765                                        MatAssemblyEnd_SeqSELL,
1766                                        MatSetOption_SeqSELL,
1767                                        MatZeroEntries_SeqSELL,
1768                                /* 24*/ NULL,
1769                                        NULL,
1770                                        NULL,
1771                                        NULL,
1772                                        NULL,
1773                                /* 29*/ MatSetUp_SeqSELL,
1774                                        NULL,
1775                                        NULL,
1776                                        NULL,
1777                                        NULL,
1778                                /* 34*/ MatDuplicate_SeqSELL,
1779                                        NULL,
1780                                        NULL,
1781                                        NULL,
1782                                        NULL,
1783                                /* 39*/ NULL,
1784                                        NULL,
1785                                        NULL,
1786                                        MatGetValues_SeqSELL,
1787                                        MatCopy_SeqSELL,
1788                                /* 44*/ NULL,
1789                                        MatScale_SeqSELL,
1790                                        MatShift_SeqSELL,
1791                                        NULL,
1792                                        NULL,
1793                                /* 49*/ NULL,
1794                                        NULL,
1795                                        NULL,
1796                                        NULL,
1797                                        NULL,
1798                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
1799                                        NULL,
1800                                        NULL,
1801                                        NULL,
1802                                        NULL,
1803                                /* 59*/ NULL,
1804                                        MatDestroy_SeqSELL,
1805                                        MatView_SeqSELL,
1806                                        NULL,
1807                                        NULL,
1808                                /* 64*/ NULL,
1809                                        NULL,
1810                                        NULL,
1811                                        NULL,
1812                                        NULL,
1813                                /* 69*/ NULL,
1814                                        NULL,
1815                                        NULL,
1816                                        NULL,
1817                                        NULL,
1818                                /* 74*/ NULL,
1819                                        MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */
1820                                        NULL,
1821                                        NULL,
1822                                        NULL,
1823                                /* 79*/ NULL,
1824                                        NULL,
1825                                        NULL,
1826                                        NULL,
1827                                        NULL,
1828                                /* 84*/ NULL,
1829                                        NULL,
1830                                        NULL,
1831                                        NULL,
1832                                        NULL,
1833                                /* 89*/ NULL,
1834                                        NULL,
1835                                        NULL,
1836                                        NULL,
1837                                        NULL,
1838                                /* 94*/ NULL,
1839                                        NULL,
1840                                        NULL,
1841                                        NULL,
1842                                        NULL,
1843                                /* 99*/ NULL,
1844                                        NULL,
1845                                        NULL,
1846                                        MatConjugate_SeqSELL,
1847                                        NULL,
1848                                /*104*/ NULL,
1849                                        NULL,
1850                                        NULL,
1851                                        NULL,
1852                                        NULL,
1853                                /*109*/ NULL,
1854                                        NULL,
1855                                        NULL,
1856                                        NULL,
1857                                        MatMissingDiagonal_SeqSELL,
1858                                /*114*/ NULL,
1859                                        NULL,
1860                                        NULL,
1861                                        NULL,
1862                                        NULL,
1863                                /*119*/ NULL,
1864                                        NULL,
1865                                        NULL,
1866                                        NULL,
1867                                        NULL,
1868                                /*124*/ NULL,
1869                                        NULL,
1870                                        NULL,
1871                                        NULL,
1872                                        NULL,
1873                                /*129*/ NULL,
1874                                        NULL,
1875                                        NULL,
1876                                        NULL,
1877                                        NULL,
1878                                /*134*/ NULL,
1879                                        NULL,
1880                                        NULL,
1881                                        NULL,
1882                                        NULL,
1883                                /*139*/ NULL,
1884                                        NULL,
1885                                        NULL,
1886                                        MatFDColoringSetUp_SeqXAIJ,
1887                                        NULL,
1888                                /*144*/ NULL
1889 };
1890 
1891 PetscErrorCode MatStoreValues_SeqSELL(Mat mat)
1892 {
1893   Mat_SeqSELL    *a=(Mat_SeqSELL*)mat->data;
1894   PetscErrorCode ierr;
1895 
1896   PetscFunctionBegin;
1897   if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1898 
1899   /* allocate space for values if not already there */
1900   if (!a->saved_values) {
1901     ierr = PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);CHKERRQ(ierr);
1902     ierr = PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));CHKERRQ(ierr);
1903   }
1904 
1905   /* copy values over */
1906   ierr = PetscArraycpy(a->saved_values,a->val,a->sliidx[a->totalslices]);CHKERRQ(ierr);
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat)
1911 {
1912   Mat_SeqSELL    *a=(Mat_SeqSELL*)mat->data;
1913   PetscErrorCode ierr;
1914 
1915   PetscFunctionBegin;
1916   if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1917   if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1918   ierr = PetscArraycpy(a->val,a->saved_values,a->sliidx[a->totalslices]);CHKERRQ(ierr);
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 /*@C
1923  MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray()
1924 
1925  Not Collective
1926 
1927  Input Parameters:
1928  .  mat - a MATSEQSELL matrix
1929  .  array - pointer to the data
1930 
1931  Level: intermediate
1932 
1933  .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90()
1934  @*/
1935 PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array)
1936 {
1937   PetscErrorCode ierr;
1938 
1939   PetscFunctionBegin;
1940   ierr = PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
1941   PetscFunctionReturn(0);
1942 }
1943 
1944 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B)
1945 {
1946   Mat_SeqSELL    *b;
1947   PetscMPIInt    size;
1948   PetscErrorCode ierr;
1949 
1950   PetscFunctionBegin;
1951   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
1952   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1");
1953 
1954   ierr = PetscNewLog(B,&b);CHKERRQ(ierr);
1955 
1956   B->data = (void*)b;
1957 
1958   ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1959 
1960   b->row                = NULL;
1961   b->col                = NULL;
1962   b->icol               = NULL;
1963   b->reallocs           = 0;
1964   b->ignorezeroentries  = PETSC_FALSE;
1965   b->roworiented        = PETSC_TRUE;
1966   b->nonew              = 0;
1967   b->diag               = NULL;
1968   b->solve_work         = NULL;
1969   B->spptr              = NULL;
1970   b->saved_values       = NULL;
1971   b->idiag              = NULL;
1972   b->mdiag              = NULL;
1973   b->ssor_work          = NULL;
1974   b->omega              = 1.0;
1975   b->fshift             = 0.0;
1976   b->idiagvalid         = PETSC_FALSE;
1977   b->keepnonzeropattern = PETSC_FALSE;
1978 
1979   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);CHKERRQ(ierr);
1980   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);CHKERRQ(ierr);
1981   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);CHKERRQ(ierr);
1982   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);CHKERRQ(ierr);
1983   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);CHKERRQ(ierr);
1984   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);CHKERRQ(ierr);
1985   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);CHKERRQ(ierr);
1986   PetscFunctionReturn(0);
1987 }
1988 
1989 /*
1990  Given a matrix generated with MatGetFactor() duplicates all the information in A into B
1991  */
1992 PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
1993 {
1994   Mat_SeqSELL    *c,*a=(Mat_SeqSELL*)A->data;
1995   PetscInt       i,m=A->rmap->n;
1996   PetscInt       totalslices=a->totalslices;
1997   PetscErrorCode ierr;
1998 
1999   PetscFunctionBegin;
2000   c = (Mat_SeqSELL*)C->data;
2001 
2002   C->factortype = A->factortype;
2003   c->row        = NULL;
2004   c->col        = NULL;
2005   c->icol       = NULL;
2006   c->reallocs   = 0;
2007 
2008   C->assembled = PETSC_TRUE;
2009 
2010   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
2011   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
2012 
2013   ierr = PetscMalloc1(8*totalslices,&c->rlen);CHKERRQ(ierr);
2014   ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr);
2015   ierr = PetscMalloc1(totalslices+1,&c->sliidx);CHKERRQ(ierr);
2016   ierr = PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr);
2017 
2018   for (i=0; i<m; i++) c->rlen[i] = a->rlen[i];
2019   for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i];
2020 
2021   /* allocate the matrix space */
2022   if (mallocmatspace) {
2023     ierr = PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);CHKERRQ(ierr);
2024     ierr = PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2025 
2026     c->singlemalloc = PETSC_TRUE;
2027 
2028     if (m > 0) {
2029       ierr = PetscArraycpy(c->colidx,a->colidx,a->maxallocmat);CHKERRQ(ierr);
2030       if (cpvalues == MAT_COPY_VALUES) {
2031         ierr = PetscArraycpy(c->val,a->val,a->maxallocmat);CHKERRQ(ierr);
2032       } else {
2033         ierr = PetscArrayzero(c->val,a->maxallocmat);CHKERRQ(ierr);
2034       }
2035     }
2036   }
2037 
2038   c->ignorezeroentries = a->ignorezeroentries;
2039   c->roworiented       = a->roworiented;
2040   c->nonew             = a->nonew;
2041   if (a->diag) {
2042     ierr = PetscMalloc1(m,&c->diag);CHKERRQ(ierr);
2043     ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr);
2044     for (i=0; i<m; i++) {
2045       c->diag[i] = a->diag[i];
2046     }
2047   } else c->diag = NULL;
2048 
2049   c->solve_work         = NULL;
2050   c->saved_values       = NULL;
2051   c->idiag              = NULL;
2052   c->ssor_work          = NULL;
2053   c->keepnonzeropattern = a->keepnonzeropattern;
2054   c->free_val           = PETSC_TRUE;
2055   c->free_colidx        = PETSC_TRUE;
2056 
2057   c->maxallocmat  = a->maxallocmat;
2058   c->maxallocrow  = a->maxallocrow;
2059   c->rlenmax      = a->rlenmax;
2060   c->nz           = a->nz;
2061   C->preallocated = PETSC_TRUE;
2062 
2063   c->nonzerorowcnt = a->nonzerorowcnt;
2064   C->nonzerostate  = A->nonzerostate;
2065 
2066   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2067   PetscFunctionReturn(0);
2068 }
2069 
2070 PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B)
2071 {
2072   PetscErrorCode ierr;
2073 
2074   PetscFunctionBegin;
2075   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
2076   ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr);
2077   if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) {
2078     ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr);
2079   }
2080   ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2081   ierr = MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 /*@C
2086  MatCreateSeqSELL - Creates a sparse matrix in SELL format.
2087 
2088  Collective
2089 
2090  Input Parameters:
2091 +  comm - MPI communicator, set to PETSC_COMM_SELF
2092 .  m - number of rows
2093 .  n - number of columns
2094 .  rlenmax - maximum number of nonzeros in a row
2095 -  rlen - array containing the number of nonzeros in the various rows
2096  (possibly different for each row) or NULL
2097 
2098  Output Parameter:
2099 .  A - the matrix
2100 
2101  It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2102  MatXXXXSetPreallocation() paradigm instead of this routine directly.
2103  [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation]
2104 
2105  Notes:
2106  If nnz is given then nz is ignored
2107 
2108  Specify the preallocated storage with either rlenmax or rlen (not both).
2109  Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory
2110  allocation.  For large problems you MUST preallocate memory or you
2111  will get TERRIBLE performance, see the users' manual chapter on matrices.
2112 
2113  Level: intermediate
2114 
2115  .seealso: MatCreate(), MatCreateSELL(), MatSetValues()
2116 
2117  @*/
2118 PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A)
2119 {
2120   PetscErrorCode ierr;
2121 
2122   PetscFunctionBegin;
2123   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2124   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2125   ierr = MatSetType(*A,MATSEQSELL);CHKERRQ(ierr);
2126   ierr = MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);CHKERRQ(ierr);
2127   PetscFunctionReturn(0);
2128 }
2129 
2130 PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg)
2131 {
2132   Mat_SeqSELL     *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data;
2133   PetscInt       totalslices=a->totalslices;
2134   PetscErrorCode ierr;
2135 
2136   PetscFunctionBegin;
2137   /* If the  matrix dimensions are not equal,or no of nonzeros */
2138   if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) {
2139     *flg = PETSC_FALSE;
2140     PetscFunctionReturn(0);
2141   }
2142   /* if the a->colidx are the same */
2143   ierr = PetscArraycmp(a->colidx,b->colidx,a->sliidx[totalslices],flg);CHKERRQ(ierr);
2144   if (!*flg) PetscFunctionReturn(0);
2145   /* if a->val are the same */
2146   ierr = PetscArraycmp(a->val,b->val,a->sliidx[totalslices],flg);CHKERRQ(ierr);
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A)
2151 {
2152   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
2153 
2154   PetscFunctionBegin;
2155   a->idiagvalid  = PETSC_FALSE;
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 PetscErrorCode MatConjugate_SeqSELL(Mat A)
2160 {
2161 #if defined(PETSC_USE_COMPLEX)
2162   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
2163   PetscInt    i;
2164   PetscScalar *val = a->val;
2165 
2166   PetscFunctionBegin;
2167   for (i=0; i<a->sliidx[a->totalslices]; i++) {
2168     val[i] = PetscConj(val[i]);
2169   }
2170 #else
2171   PetscFunctionBegin;
2172 #endif
2173   PetscFunctionReturn(0);
2174 }
2175