xref: /petsc/src/mat/impls/sell/seq/sell.c (revision 7285fed1bca141d12bef6892cdaaa08da852c832)
1 
2 /*
3   Defines the basic matrix operations for the SELL matrix storage format.
4 */
5 #include <../src/mat/impls/sell/seq/sell.h>  /*I   "petscmat.h"  I*/
6 #include <petscblaslapack.h>
7 #include <petsc/private/kernels/blocktranspose.h>
8 #if defined(PETSC_HAVE_IMMINTRIN_H)
9 #include <immintrin.h>
10 
11 #if !defined(_MM_SCALE_8)
12 #define _MM_SCALE_8    8
13 #endif
14 
15 #if defined(__AVX512F__)
16 /* these do not work
17  vec_idx  = _mm512_loadunpackhi_epi32(vec_idx,acolidx);
18  vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval);
19 */
20 #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \
21 /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \
22 vec_idx  = _mm256_load_si256((__m256i const*)acolidx); \
23 vec_vals = _mm512_load_pd(aval); \
24 vec_x    = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \
25 vec_y    = _mm512_fmadd_pd(vec_x,vec_vals,vec_y);
26 
27 #endif
28 #endif  /* PETSC_HAVE_IMMINTRIN_H */
29 
30 /*@C
31  MatSeqSELLSetPreallocation - For good matrix assembly performance
32  the user should preallocate the matrix storage by setting the parameter nz
33  (or the array nnz).  By setting these parameters accurately, performance
34  during matrix assembly can be increased significantly.
35 
36  Collective on MPI_Comm
37 
38  Input Parameters:
39  +  B - The matrix
40  .  nz - number of nonzeros per row (same for all rows)
41  -  nnz - array containing the number of nonzeros in the various rows
42  (possibly different for each row) or NULL
43 
44  Notes:
45  If nnz is given then nz is ignored.
46 
47  Specify the preallocated storage with either nz or nnz (not both).
48  Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
49  allocation.  For large problems you MUST preallocate memory or you
50  will get TERRIBLE performance, see the users' manual chapter on matrices.
51 
52  You can call MatGetInfo() to get information on how effective the preallocation was;
53  for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
54  You can also run with the option -info and look for messages with the string
55  malloc in them to see if additional memory allocation was needed.
56 
57  Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix
58  entries or columns indices.
59 
60  The maximum number of nonzeos in any row should be as accuate as possible.
61  If it is underesitmated, you will get bad performance due to reallocation
62  (MatSeqXSELLReallocateSELL).
63 
64  Level: intermediate
65 
66  .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo()
67 
68  @*/
69 PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[])
70 {
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
75   PetscValidType(B,1);
76   ierr = PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[])
81 {
82   Mat_SeqSELL    *b;
83   PetscInt       i,j,totalslices;
84   PetscBool      skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE;
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE;
89   if (maxallocrow == MAT_SKIP_ALLOCATION) {
90     skipallocation = PETSC_TRUE;
91     maxallocrow    = 0;
92   }
93 
94   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
95   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
96 
97   /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */
98   if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5;
99   if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow);
100   if (rlen) {
101     for (i=0; i<B->rmap->n; i++) {
102       if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]);
103       if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n);
104     }
105   }
106 
107   B->preallocated = PETSC_TRUE;
108 
109   b = (Mat_SeqSELL*)B->data;
110 
111   totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */
112   b->totalslices = totalslices;
113   if (!skipallocation) {
114     if (B->rmap->n & 0x07) PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n);
115 
116     if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */
117       ierr = PetscMalloc1(totalslices+1,&b->sliidx);CHKERRQ(ierr);
118       ierr = PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr);
119     }
120     if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */
121       if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10;
122       else if (maxallocrow < 0) maxallocrow = 1;
123       for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow;
124     } else {
125       maxallocrow = 0;
126       b->sliidx[0] = 0;
127       for (i=1; i<totalslices; i++) {
128         b->sliidx[i] = 0;
129         for (j=0;j<8;j++) {
130           b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]);
131         }
132         maxallocrow = PetscMax(b->sliidx[i],maxallocrow);
133         b->sliidx[i] = b->sliidx[i-1] + 8*b->sliidx[i];
134       }
135       /* last slice */
136       b->sliidx[totalslices] = 0;
137       for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]);
138       maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow);
139       b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices];
140     }
141 
142     /* allocate space for val, colidx, rlen */
143     /* FIXME: should B's old memory be unlogged? */
144     ierr = MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);CHKERRQ(ierr);
145     /* FIXME: assuming an element of the bit array takes 8 bits */
146     ierr = PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);CHKERRQ(ierr);
147     ierr = PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
148     /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */
149     ierr = PetscCalloc1(8*totalslices,&b->rlen);CHKERRQ(ierr);
150     ierr = PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));CHKERRQ(ierr);
151 
152     b->singlemalloc = PETSC_TRUE;
153     b->free_val     = PETSC_TRUE;
154     b->free_colidx  = PETSC_TRUE;
155   } else {
156     b->free_val    = PETSC_FALSE;
157     b->free_colidx = PETSC_FALSE;
158   }
159 
160   b->nz               = 0;
161   b->maxallocrow      = maxallocrow;
162   b->rlenmax          = maxallocrow;
163   b->maxallocmat      = b->sliidx[totalslices];
164   B->info.nz_unneeded = (double)b->maxallocmat;
165   if (realalloc) {
166     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
167   }
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
172 {
173   Mat            B;
174   Mat_SeqSELL     *a=(Mat_SeqSELL*)A->data;
175   PetscInt       i,j,row;
176   PetscBool      isnonzero;
177   PetscErrorCode ierr;
178 
179   PetscFunctionBegin;
180   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
181   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
182   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
183   ierr = MatSeqAIJSetPreallocation(B,0,a->rlen);CHKERRQ(ierr);
184   ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
185 
186   for (i=0; i<a->totalslices; i++) { /* loop over slices */
187     for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
188       isnonzero = (PetscBool)((j-a->sliidx[i])/8 < a->rlen[8*i+row]);
189       if (isnonzero) {
190         ierr = MatSetValue(B,8*i+row,a->colidx[j],a->val[j],INSERT_VALUES);CHKERRQ(ierr);
191       }
192     }
193   }
194   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
195   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
196   B->rmap->bs = A->rmap->bs;
197 
198   if (reuse == MAT_INPLACE_MATRIX) {
199     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
200   } else {
201     *newmat = B;
202   }
203   PetscFunctionReturn(0);
204 }
205 
206 #include <../src/mat/impls/aij/seq/aij.h>
207 
208 PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
209 {
210   Mat               B;
211   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)A->data;
212   PetscInt          *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols;
213   const PetscInt    *cols;
214   const PetscScalar *vals;
215   PetscErrorCode    ierr;
216 
217   PetscFunctionBegin;
218   if (A->rmap->bs > 1) {
219     ierr = MatConvert_Basic(A,newtype,reuse,newmat);CHKERRQ(ierr);
220     PetscFunctionReturn(0);
221   }
222   /* Can we just use ilen? */
223   ierr = PetscMalloc1(m,&rowlengths);CHKERRQ(ierr);
224   for (i=0; i<m; i++) {
225     rowlengths[i] = ai[i+1] - ai[i];
226   }
227 
228   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
229   ierr = MatSetSizes(B,m,n,m,n);CHKERRQ(ierr);
230   ierr = MatSetType(B,MATSEQSELL);CHKERRQ(ierr);
231   ierr = MatSeqSELLSetPreallocation(B,0,rowlengths);CHKERRQ(ierr);
232   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
233 
234   ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
235 
236   for (row=0; row<m; row++) {
237     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
238     ierr = MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
239     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
240   }
241   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
242   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
243   B->rmap->bs = A->rmap->bs;
244 
245   if (reuse == MAT_INPLACE_MATRIX) {
246     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
247   } else {
248     *newmat = B;
249   }
250   PetscFunctionReturn(0);
251 }
252 
253 PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy)
254 {
255   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
256   PetscScalar       *y;
257   const PetscScalar *x;
258   const MatScalar   *aval=a->val;
259   PetscInt          totalslices=a->totalslices;
260   const PetscInt    *acolidx=a->colidx;
261   PetscInt          i,j;
262   PetscErrorCode    ierr;
263 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
264   __m512d           vec_x,vec_y,vec_vals;
265   __m256i           vec_idx;
266   __mmask8          mask;
267   __m512d           vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4;
268   __m256i           vec_idx2,vec_idx3,vec_idx4;
269 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
270   __m128d           vec_x_tmp;
271   __m256d           vec_x,vec_y,vec_y2,vec_vals;
272   MatScalar         yval;
273   PetscInt          r,rows_left,row,nnz_in_row;
274 #else
275   PetscScalar       sum[8];
276 #endif
277 
278 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
279 #pragma disjoint(*x,*y,*aval)
280 #endif
281 
282   PetscFunctionBegin;
283   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
284   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
285 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
286   for (i=0; i<totalslices; i++) { /* loop over slices */
287     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
288     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
289 
290     vec_y  = _mm512_setzero_pd();
291     vec_y2 = _mm512_setzero_pd();
292     vec_y3 = _mm512_setzero_pd();
293     vec_y4 = _mm512_setzero_pd();
294 
295     j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */
296     switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) {
297     case 3:
298       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
299       acolidx += 8; aval += 8;
300       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
301       acolidx += 8; aval += 8;
302       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
303       acolidx += 8; aval += 8;
304       j += 3;
305       break;
306     case 2:
307       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
308       acolidx += 8; aval += 8;
309       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
310       acolidx += 8; aval += 8;
311       j += 2;
312       break;
313     case 1:
314       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
315       acolidx += 8; aval += 8;
316       j += 1;
317       break;
318     }
319     #pragma novector
320     for (; j<(a->sliidx[i+1]>>3); j+=4) {
321       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
322       acolidx += 8; aval += 8;
323       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
324       acolidx += 8; aval += 8;
325       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
326       acolidx += 8; aval += 8;
327       AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4);
328       acolidx += 8; aval += 8;
329     }
330 
331     vec_y = _mm512_add_pd(vec_y,vec_y2);
332     vec_y = _mm512_add_pd(vec_y,vec_y3);
333     vec_y = _mm512_add_pd(vec_y,vec_y4);
334     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
335       mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
336       _mm512_mask_store_pd(&y[8*i],mask,vec_y);
337     } else {
338       _mm512_store_pd(&y[8*i],vec_y);
339     }
340   }
341 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
342   for (i=0; i<totalslices; i++) { /* loop over full slices */
343     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
344     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
345 
346     vec_y  = _mm256_setzero_pd();
347     vec_y2 = _mm256_setzero_pd();
348 
349     /* last slice may have padding rows. Don't use vectorization. */
350     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
351       rows_left = A->rmap->n - 8*i;
352       for (r=0; r<rows_left; ++r) {
353         yval = (MatScalar)0;
354         row = 8*i + r;
355         nnz_in_row = a->rlen[row];
356         for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]];
357         y[row] = yval;
358       }
359       break;
360     }
361 
362     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
363     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
364       vec_vals  = _mm256_loadu_pd(aval);
365       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
366       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
367       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
368       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
369       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
370       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
371       vec_y     = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y);
372       aval     += 4;
373 
374       vec_vals  = _mm256_loadu_pd(aval);
375       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
376       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
377       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
378       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
379       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
380       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
381       vec_y2    = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2);
382       aval     += 4;
383     }
384 
385     _mm256_storeu_pd(y + i*8,     vec_y);
386     _mm256_storeu_pd(y + i*8 + 4, vec_y2);
387   }
388 #else
389   for (i=0; i<totalslices; i++) { /* loop over slices */
390     for (j=0; j<8; j++) sum[j] = 0.0;
391     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
392       sum[0] += aval[j] * x[acolidx[j]];
393       sum[1] += aval[j+1] * x[acolidx[j+1]];
394       sum[2] += aval[j+2] * x[acolidx[j+2]];
395       sum[3] += aval[j+3] * x[acolidx[j+3]];
396       sum[4] += aval[j+4] * x[acolidx[j+4]];
397       sum[5] += aval[j+5] * x[acolidx[j+5]];
398       sum[6] += aval[j+6] * x[acolidx[j+6]];
399       sum[7] += aval[j+7] * x[acolidx[j+7]];
400     }
401     if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
402       for(j=0; j<(A->rmap->n & 0x07); j++) y[8*i+j] = sum[j];
403     } else {
404       for(j=0; j<8; j++) y[8*i+j] = sum[j];
405     }
406   }
407 #endif
408 
409   ierr = PetscLogFlops(2.0*a->nz-a->nonzerorowcnt);CHKERRQ(ierr); /* theoretical minimal FLOPs */
410   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
411   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
412   PetscFunctionReturn(0);
413 }
414 
415 #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h>
416 PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz)
417 {
418   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
419   PetscScalar       *y,*z;
420   const PetscScalar *x;
421   const MatScalar   *aval=a->val;
422   PetscInt          totalslices=a->totalslices;
423   const PetscInt    *acolidx=a->colidx;
424   PetscInt          i,j;
425   PetscErrorCode    ierr;
426 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
427   __m512d           vec_x,vec_y,vec_vals;
428   __m256i           vec_idx;
429   __mmask8          mask;
430   __m512d           vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4;
431   __m256i           vec_idx2,vec_idx3,vec_idx4;
432 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
433   __m128d           vec_x_tmp;
434   __m256d           vec_x,vec_y,vec_y2,vec_vals;
435   MatScalar         yval;
436   PetscInt          r,row,nnz_in_row;
437 #else
438   PetscScalar       sum[8];
439 #endif
440 
441 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
442 #pragma disjoint(*x,*y,*aval)
443 #endif
444 
445   PetscFunctionBegin;
446   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
447   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
448 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
449   for (i=0; i<totalslices; i++) { /* loop over slices */
450     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
451     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
452 
453     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
454       mask   = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
455       vec_y  = _mm512_mask_load_pd(vec_y,mask,&y[8*i]);
456     } else {
457       vec_y  = _mm512_load_pd(&y[8*i]);
458     }
459     vec_y2 = _mm512_setzero_pd();
460     vec_y3 = _mm512_setzero_pd();
461     vec_y4 = _mm512_setzero_pd();
462 
463     j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */
464     switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) {
465     case 3:
466       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
467       acolidx += 8; aval += 8;
468       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
469       acolidx += 8; aval += 8;
470       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
471       acolidx += 8; aval += 8;
472       j += 3;
473       break;
474     case 2:
475       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
476       acolidx += 8; aval += 8;
477       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
478       acolidx += 8; aval += 8;
479       j += 2;
480       break;
481     case 1:
482       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
483       acolidx += 8; aval += 8;
484       j += 1;
485       break;
486     }
487     #pragma novector
488     for (; j<(a->sliidx[i+1]>>3); j+=4) {
489       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
490       acolidx += 8; aval += 8;
491       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
492       acolidx += 8; aval += 8;
493       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
494       acolidx += 8; aval += 8;
495       AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4);
496       acolidx += 8; aval += 8;
497     }
498 
499     vec_y = _mm512_add_pd(vec_y,vec_y2);
500     vec_y = _mm512_add_pd(vec_y,vec_y3);
501     vec_y = _mm512_add_pd(vec_y,vec_y4);
502     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
503       _mm512_mask_store_pd(&z[8*i],mask,vec_y);
504     } else {
505       _mm512_store_pd(&z[8*i],vec_y);
506     }
507   }
508 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
509   for (i=0; i<totalslices; i++) { /* loop over full slices */
510     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
511     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
512 
513     /* last slice may have padding rows. Don't use vectorization. */
514     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
515       for (r=0; r<(A->rmap->n & 0x07); ++r) {
516         row        = 8*i + r;
517         yval       = (MatScalar)0.0;
518         nnz_in_row = a->rlen[row];
519         for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]];
520         z[row] = y[row] + yval;
521       }
522       break;
523     }
524 
525     vec_y  = _mm256_loadu_pd(y+8*i);
526     vec_y2 = _mm256_loadu_pd(y+8*i+4);
527 
528     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
529     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
530       vec_vals  = _mm256_loadu_pd(aval);
531       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
532       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
533       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
534       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
535       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
536       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
537       vec_y     = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y);
538       aval     += 4;
539 
540       vec_vals  = _mm256_loadu_pd(aval);
541       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
542       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
543       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
544       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
545       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
546       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
547       vec_y2    = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2);
548       aval     += 4;
549     }
550 
551     _mm256_storeu_pd(z+i*8,vec_y);
552     _mm256_storeu_pd(z+i*8+4,vec_y2);
553   }
554 #else
555   for (i=0; i<totalslices; i++) { /* loop over slices */
556     for (j=0; j<8; j++) sum[j] = 0.0;
557     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
558       sum[0] += aval[j] * x[acolidx[j]];
559       sum[1] += aval[j+1] * x[acolidx[j+1]];
560       sum[2] += aval[j+2] * x[acolidx[j+2]];
561       sum[3] += aval[j+3] * x[acolidx[j+3]];
562       sum[4] += aval[j+4] * x[acolidx[j+4]];
563       sum[5] += aval[j+5] * x[acolidx[j+5]];
564       sum[6] += aval[j+6] * x[acolidx[j+6]];
565       sum[7] += aval[j+7] * x[acolidx[j+7]];
566     }
567     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
568       for (j=0; j<(A->rmap->n & 0x07); j++) z[8*i+j] = y[8*i+j] + sum[j];
569     } else {
570       for (j=0; j<8; j++) z[8*i+j] = y[8*i+j] + sum[j];
571     }
572   }
573 #endif
574 
575   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
576   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
577   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
578   PetscFunctionReturn(0);
579 }
580 
581 PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy)
582 {
583   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
584   PetscScalar       *y;
585   const PetscScalar *x;
586   const MatScalar   *aval=a->val;
587   const PetscInt    *acolidx=a->colidx;
588   PetscInt          i,j,r,row,nnz_in_row,totalslices=a->totalslices;
589   PetscErrorCode    ierr;
590 
591 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
592 #pragma disjoint(*x,*y,*aval)
593 #endif
594 
595   PetscFunctionBegin;
596   if (zz != yy) { ierr = VecCopy(zz,yy);CHKERRQ(ierr); }
597   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
598   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
599   for (i=0; i<a->totalslices; i++) { /* loop over slices */
600     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
601       for (r=0; r<(A->rmap->n & 0x07); ++r) {
602         row        = 8*i + r;
603         nnz_in_row = a->rlen[row];
604         for (j=0; j<nnz_in_row; ++j) y[acolidx[8*j+r]] += aval[8*j+r] * x[row];
605       }
606       break;
607     }
608     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
609       y[acolidx[j]]   += aval[j] * x[8*i];
610       y[acolidx[j+1]] += aval[j+1] * x[8*i+1];
611       y[acolidx[j+2]] += aval[j+2] * x[8*i+2];
612       y[acolidx[j+3]] += aval[j+3] * x[8*i+3];
613       y[acolidx[j+4]] += aval[j+4] * x[8*i+4];
614       y[acolidx[j+5]] += aval[j+5] * x[8*i+5];
615       y[acolidx[j+6]] += aval[j+6] * x[8*i+6];
616       y[acolidx[j+7]] += aval[j+7] * x[8*i+7];
617     }
618   }
619   ierr = PetscLogFlops(2.0*a->sliidx[a->totalslices]);CHKERRQ(ierr);
620   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
621   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
622   PetscFunctionReturn(0);
623 }
624 
625 PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy)
626 {
627   PetscErrorCode ierr;
628 
629   PetscFunctionBegin;
630   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
631   ierr = MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);CHKERRQ(ierr);
632   PetscFunctionReturn(0);
633 }
634 
635 /*
636      Checks for missing diagonals
637 */
638 PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool  *missing,PetscInt *d)
639 {
640   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
641   PetscInt    *diag,i;
642 
643   PetscFunctionBegin;
644   *missing = PETSC_FALSE;
645   if (A->rmap->n > 0 && !(a->colidx)) {
646     *missing = PETSC_TRUE;
647     if (d) *d = 0;
648     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
649   } else {
650     diag = a->diag;
651     for (i=0; i<A->rmap->n; i++) {
652       if (diag[i] == -1) {
653         *missing = PETSC_TRUE;
654         if (d) *d = i;
655         PetscInfo1(A,"Matrix is missing diagonal number %D\n",i);
656         break;
657       }
658     }
659   }
660   PetscFunctionReturn(0);
661 }
662 
663 PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A)
664 {
665   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
666   PetscInt       i,j,m=A->rmap->n,shift;
667   PetscErrorCode ierr;
668 
669   PetscFunctionBegin;
670   if (!a->diag) {
671     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
672     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
673     a->free_diag = PETSC_TRUE;
674   }
675   for (i=0; i<m; i++) { /* loop over rows */
676     shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
677     a->diag[i] = -1;
678     for (j=0; j<a->rlen[i]; j++) {
679       if (a->colidx[shift+j*8] == i) {
680         a->diag[i] = shift+j*8;
681         break;
682       }
683     }
684   }
685   PetscFunctionReturn(0);
686 }
687 
688 /*
689   Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways
690 */
691 PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift)
692 {
693   Mat_SeqSELL    *a=(Mat_SeqSELL*) A->data;
694   PetscInt       i,*diag,m = A->rmap->n;
695   MatScalar      *val = a->val;
696   PetscScalar    *idiag,*mdiag;
697   PetscErrorCode ierr;
698 
699   PetscFunctionBegin;
700   if (a->idiagvalid) PetscFunctionReturn(0);
701   ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr);
702   diag = a->diag;
703   if (!a->idiag) {
704     ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr);
705     ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr);
706     val  = a->val;
707   }
708   mdiag = a->mdiag;
709   idiag = a->idiag;
710 
711   if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) {
712     for (i=0; i<m; i++) {
713       mdiag[i] = val[diag[i]];
714       if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */
715         if (PetscRealPart(fshift)) {
716           ierr = PetscInfo1(A,"Zero diagonal on row %D\n",i);CHKERRQ(ierr);
717           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
718           A->factorerror_zeropivot_value = 0.0;
719           A->factorerror_zeropivot_row   = i;
720         } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i);
721       }
722       idiag[i] = 1.0/val[diag[i]];
723     }
724     ierr = PetscLogFlops(m);CHKERRQ(ierr);
725   } else {
726     for (i=0; i<m; i++) {
727       mdiag[i] = val[diag[i]];
728       idiag[i] = omega/(fshift + val[diag[i]]);
729     }
730     ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr);
731   }
732   a->idiagvalid = PETSC_TRUE;
733   PetscFunctionReturn(0);
734 }
735 
736 PetscErrorCode MatZeroEntries_SeqSELL(Mat A)
737 {
738   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
739   PetscErrorCode ierr;
740 
741   PetscFunctionBegin;
742   ierr = PetscMemzero(a->val,(a->sliidx[a->totalslices])*sizeof(PetscScalar));CHKERRQ(ierr);
743   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
744   PetscFunctionReturn(0);
745 }
746 
747 PetscErrorCode MatDestroy_SeqSELL(Mat A)
748 {
749   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
750   PetscErrorCode ierr;
751 
752   PetscFunctionBegin;
753 #if defined(PETSC_USE_LOG)
754   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz);
755 #endif
756   ierr = MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);CHKERRQ(ierr);
757   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
758   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
759   ierr = PetscFree(a->diag);CHKERRQ(ierr);
760   ierr = PetscFree(a->ibdiag);CHKERRQ(ierr);
761   ierr = PetscFree(a->rlen);CHKERRQ(ierr);
762   ierr = PetscFree(a->sliidx);CHKERRQ(ierr);
763   ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr);
764   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
765   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
766   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
767 
768   ierr = PetscFree(A->data);CHKERRQ(ierr);
769 
770   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
771   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
772   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
773 #if defined(PETSC_HAVE_ELEMENTAL)
774 #endif
775   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);CHKERRQ(ierr);
776   PetscFunctionReturn(0);
777 }
778 
779 PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg)
780 {
781   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
782   PetscErrorCode ierr;
783 
784   PetscFunctionBegin;
785   switch (op) {
786   case MAT_ROW_ORIENTED:
787     a->roworiented = flg;
788     break;
789   case MAT_KEEP_NONZERO_PATTERN:
790     a->keepnonzeropattern = flg;
791     break;
792   case MAT_NEW_NONZERO_LOCATIONS:
793     a->nonew = (flg ? 0 : 1);
794     break;
795   case MAT_NEW_NONZERO_LOCATION_ERR:
796     a->nonew = (flg ? -1 : 0);
797     break;
798   case MAT_NEW_NONZERO_ALLOCATION_ERR:
799     a->nonew = (flg ? -2 : 0);
800     break;
801   case MAT_UNUSED_NONZERO_LOCATION_ERR:
802     a->nounused = (flg ? -1 : 0);
803     break;
804   case MAT_NEW_DIAGONALS:
805   case MAT_IGNORE_OFF_PROC_ENTRIES:
806   case MAT_USE_HASH_TABLE:
807     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
808     break;
809   case MAT_SPD:
810   case MAT_SYMMETRIC:
811   case MAT_STRUCTURALLY_SYMMETRIC:
812   case MAT_HERMITIAN:
813   case MAT_SYMMETRY_ETERNAL:
814     /* These options are handled directly by MatSetOption() */
815     break;
816   default:
817     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
818   }
819   PetscFunctionReturn(0);
820 }
821 
822 PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v)
823 {
824   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
825   PetscInt       i,j,n,shift;
826   PetscScalar    *x,zero=0.0;
827   PetscErrorCode ierr;
828 
829   PetscFunctionBegin;
830   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
831   if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
832 
833   if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) {
834     PetscInt *diag=a->diag;
835     ierr = VecGetArray(v,&x);CHKERRQ(ierr);
836     for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]];
837     ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
838     PetscFunctionReturn(0);
839   }
840 
841   ierr = VecSet(v,zero);CHKERRQ(ierr);
842   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
843   for (i=0; i<n; i++) { /* loop over rows */
844     shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
845     x[i] = 0;
846     for (j=0; j<a->rlen[i]; j++) {
847       if (a->colidx[shift+j*8] == i) {
848         x[i] = a->val[shift+j*8];
849         break;
850       }
851     }
852   }
853   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
854   PetscFunctionReturn(0);
855 }
856 
857 PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr)
858 {
859   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
860   const PetscScalar *l,*r;
861   PetscInt          i,j,m,n,row;
862   PetscErrorCode    ierr;
863 
864   PetscFunctionBegin;
865   if (ll) {
866     /* The local size is used so that VecMPI can be passed to this routine
867        by MatDiagonalScale_MPISELL */
868     ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr);
869     if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
870     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
871     for (i=0; i<a->totalslices; i++) { /* loop over slices */
872       for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
873         a->val[j] *= l[8*i+row];
874       }
875     }
876     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
877     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
878   }
879   if (rr) {
880     ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr);
881     if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
882     ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr);
883     for (i=0; i<a->totalslices; i++) { /* loop over slices */
884       for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) {
885         a->val[j] *= r[a->colidx[j]];
886       }
887     }
888     ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr);
889     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
890   }
891   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
892   PetscFunctionReturn(0);
893 }
894 
895 extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
896 
897 PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
898 {
899   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
900   PetscInt    *cp,i,k,low,high,t,row,col,l;
901   PetscInt    shift;
902   MatScalar   *vp;
903 
904   PetscFunctionBegin;
905   for (k=0; k<m; k++) { /* loop over added rows */
906     row = im[k];
907     if (row<0) continue;
908 #if defined(PETSC_USE_DEBUG)
909     if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
910 #endif
911     shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
912     cp = a->colidx+shift; /* pointer to the row */
913     vp = a->val+shift; /* pointer to the row */
914     for (l=0; l<n; l++) { /* loop over added rows */
915       col = in[l];
916       if (col<0) continue;
917 #if defined(PETSC_USE_DEBUG)
918       if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1);
919 #endif
920       high = a->rlen[row]; low = 0; /* assume unsorted */
921       while (high-low > 5) {
922         t = (low+high)/2;
923         if (*(cp+t*8) > col) high = t;
924         else low = t;
925       }
926       for (i=low; i<high; i++) {
927         if (*(cp+8*i) > col) break;
928         if (*(cp+8*i) == col) {
929           *v++ = *(vp+8*i);
930           goto finished;
931         }
932       }
933       *v++ = 0.0;
934     finished:;
935     }
936   }
937   PetscFunctionReturn(0);
938 }
939 
940 PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer)
941 {
942   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
943   PetscInt          i,j,m=A->rmap->n,shift;
944   const char        *name;
945   PetscViewerFormat format;
946   PetscErrorCode    ierr;
947 
948   PetscFunctionBegin;
949   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
950   if (format == PETSC_VIEWER_ASCII_MATLAB) {
951     PetscInt nofinalvalue = 0;
952     /*
953     if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) {
954       nofinalvalue = 1;
955     }
956     */
957     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
958     ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);CHKERRQ(ierr);
959     ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);CHKERRQ(ierr);
960 #if defined(PETSC_USE_COMPLEX)
961     ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);CHKERRQ(ierr);
962 #else
963     ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);CHKERRQ(ierr);
964 #endif
965     ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr);
966 
967     for (i=0; i<m; i++) {
968       shift = a->sliidx[i>>3]+(i&0x07);
969       for (j=0; j<a->rlen[i]; j++) {
970 #if defined(PETSC_USE_COMPLEX)
971         ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
972 #else
973         ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);CHKERRQ(ierr);
974 #endif
975       }
976     }
977     /*
978     if (nofinalvalue) {
979 #if defined(PETSC_USE_COMPLEX)
980       ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr);
981 #else
982       ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr);
983 #endif
984     }
985     */
986     ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
987     ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr);
988     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
989   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
990     PetscFunctionReturn(0);
991   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
992     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
993     for (i=0; i<m; i++) {
994       ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr);
995       shift = a->sliidx[i>>3]+(i&0x07);
996       for (j=0; j<a->rlen[i]; j++) {
997 #if defined(PETSC_USE_COMPLEX)
998         if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
999           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1000         } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
1001           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1002         } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) {
1003           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr);
1004         }
1005 #else
1006         if (a->val[shift+8*j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);}
1007 #endif
1008       }
1009       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1010     }
1011     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1012   } else if (format == PETSC_VIEWER_ASCII_DENSE) {
1013     PetscInt    cnt=0,jcnt;
1014     PetscScalar value;
1015 #if defined(PETSC_USE_COMPLEX)
1016     PetscBool   realonly=PETSC_TRUE;
1017     for (i=0; i<a->sliidx[a->totalslices]; i++) {
1018       if (PetscImaginaryPart(a->val[i]) != 0.0) {
1019         realonly = PETSC_FALSE;
1020         break;
1021       }
1022     }
1023 #endif
1024 
1025     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1026     for (i=0; i<m; i++) {
1027       jcnt = 0;
1028       shift = a->sliidx[i>>3]+(i&0x07);
1029       for (j=0; j<A->cmap->n; j++) {
1030         if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) {
1031           value = a->val[cnt++];
1032           jcnt++;
1033         } else {
1034           value = 0.0;
1035         }
1036 #if defined(PETSC_USE_COMPLEX)
1037         if (realonly) {
1038           ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr);
1039         } else {
1040           ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr);
1041         }
1042 #else
1043         ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr);
1044 #endif
1045       }
1046       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1047     }
1048     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1049   } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) {
1050     PetscInt fshift=1;
1051     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1052 #if defined(PETSC_USE_COMPLEX)
1053     ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr);
1054 #else
1055     ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr);
1056 #endif
1057     ierr = PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);CHKERRQ(ierr);
1058     for (i=0; i<m; i++) {
1059       shift = a->sliidx[i>>3]+(i&0x07);
1060       for (j=0; j<a->rlen[i]; j++) {
1061 #if defined(PETSC_USE_COMPLEX)
1062         ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1063 #else
1064         ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);CHKERRQ(ierr);
1065 #endif
1066       }
1067     }
1068     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1069   } else {
1070     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1071     if (A->factortype) {
1072       for (i=0; i<m; i++) {
1073         shift = a->sliidx[i>>3]+(i&0x07);
1074         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr);
1075         /* L part */
1076         for (j=shift; j<a->diag[i]; j+=8) {
1077 #if defined(PETSC_USE_COMPLEX)
1078           if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) {
1079             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr);
1080           } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) {
1081             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr);
1082           } else {
1083             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr);
1084           }
1085 #else
1086           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr);
1087 #endif
1088         }
1089         /* diagonal */
1090         j = a->diag[i];
1091 #if defined(PETSC_USE_COMPLEX)
1092         if (PetscImaginaryPart(a->val[j]) > 0.0) {
1093           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));CHKERRQ(ierr);
1094         } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1095           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));CHKERRQ(ierr);
1096         } else {
1097           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));CHKERRQ(ierr);
1098         }
1099 #else
1100         ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));CHKERRQ(ierr);
1101 #endif
1102 
1103         /* U part */
1104         for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) {
1105 #if defined(PETSC_USE_COMPLEX)
1106           if (PetscImaginaryPart(a->val[j]) > 0.0) {
1107             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr);
1108           } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1109             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr);
1110           } else {
1111             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr);
1112           }
1113 #else
1114           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr);
1115 #endif
1116         }
1117         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1118       }
1119     } else {
1120       for (i=0; i<m; i++) {
1121         shift = a->sliidx[i>>3]+(i&0x07);
1122         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr);
1123         for (j=0; j<a->rlen[i]; j++) {
1124 #if defined(PETSC_USE_COMPLEX)
1125           if (PetscImaginaryPart(a->val[j]) > 0.0) {
1126             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1127           } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1128             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1129           } else {
1130             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr);
1131           }
1132 #else
1133           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);
1134 #endif
1135         }
1136         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1137       }
1138     }
1139     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1140   }
1141   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #include <petscdraw.h>
1146 PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa)
1147 {
1148   Mat               A=(Mat)Aa;
1149   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
1150   PetscInt          i,j,m=A->rmap->n,shift;
1151   int               color;
1152   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1153   PetscViewer       viewer;
1154   PetscViewerFormat format;
1155   PetscErrorCode    ierr;
1156 
1157   PetscFunctionBegin;
1158   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1159   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1160   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1161 
1162   /* loop over matrix elements drawing boxes */
1163 
1164   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1165     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1166     /* Blue for negative, Cyan for zero and  Red for positive */
1167     color = PETSC_DRAW_BLUE;
1168     for (i=0; i<m; i++) {
1169       shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1170       y_l = m - i - 1.0; y_r = y_l + 1.0;
1171       for (j=0; j<a->rlen[i]; j++) {
1172         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1173         if (PetscRealPart(a->val[shift+8*j]) >=  0.) continue;
1174         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1175       }
1176     }
1177     color = PETSC_DRAW_CYAN;
1178     for (i=0; i<m; i++) {
1179       shift = a->sliidx[i>>3]+(i&0x07);
1180       y_l = m - i - 1.0; y_r = y_l + 1.0;
1181       for (j=0; j<a->rlen[i]; j++) {
1182         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1183         if (a->val[shift+8*j] !=  0.) continue;
1184         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1185       }
1186     }
1187     color = PETSC_DRAW_RED;
1188     for (i=0; i<m; i++) {
1189       shift = a->sliidx[i>>3]+(i&0x07);
1190       y_l = m - i - 1.0; y_r = y_l + 1.0;
1191       for (j=0; j<a->rlen[i]; j++) {
1192         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1193         if (PetscRealPart(a->val[shift+8*j]) <=  0.) continue;
1194         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1195       }
1196     }
1197     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1198   } else {
1199     /* use contour shading to indicate magnitude of values */
1200     /* first determine max of all nonzero values */
1201     PetscReal minv=0.0,maxv=0.0;
1202     PetscInt  count=0;
1203     PetscDraw popup;
1204     for (i=0; i<a->sliidx[a->totalslices]; i++) {
1205       if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]);
1206     }
1207     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1208     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1209     ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr);
1210 
1211     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1212     for (i=0; i<m; i++) {
1213       shift = a->sliidx[i>>3]+(i&0x07);
1214       y_l = m - i - 1.0;
1215       y_r = y_l + 1.0;
1216       for (j=0; j<a->rlen[i]; j++) {
1217         x_l = a->colidx[shift+j*8];
1218         x_r = x_l + 1.0;
1219         color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv);
1220         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1221         count++;
1222       }
1223     }
1224     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1225   }
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 #include <petscdraw.h>
1230 PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer)
1231 {
1232   PetscDraw      draw;
1233   PetscReal      xr,yr,xl,yl,h,w;
1234   PetscBool      isnull;
1235   PetscErrorCode ierr;
1236 
1237   PetscFunctionBegin;
1238   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1239   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1240   if (isnull) PetscFunctionReturn(0);
1241 
1242   xr   = A->cmap->n; yr  = A->rmap->n; h = yr/10.0; w = xr/10.0;
1243   xr  += w;          yr += h;         xl = -w;     yl = -h;
1244   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1245   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1246   ierr = PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);CHKERRQ(ierr);
1247   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1248   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer)
1253 {
1254   PetscBool      iascii,isbinary,isdraw;
1255   PetscErrorCode ierr;
1256 
1257   PetscFunctionBegin;
1258   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1259   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1260   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1261   if (iascii) {
1262     ierr = MatView_SeqSELL_ASCII(A,viewer);CHKERRQ(ierr);
1263   } else if (isbinary) {
1264     /* ierr = MatView_SeqSELL_Binary(A,viewer);CHKERRQ(ierr); */
1265   } else if (isdraw) {
1266     ierr = MatView_SeqSELL_Draw(A,viewer);CHKERRQ(ierr);
1267   }
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode)
1272 {
1273   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1274   PetscInt       i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k;
1275   MatScalar      *vp;
1276   PetscErrorCode ierr;
1277 
1278   PetscFunctionBegin;
1279   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1280   /* To do: compress out the unused elements */
1281   ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr);
1282   ierr = PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);CHKERRQ(ierr);
1283   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr);
1284   ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);CHKERRQ(ierr);
1285   /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */
1286   for (i=0; i<a->totalslices; ++i) {
1287     shift = a->sliidx[i];    /* starting index of the slice */
1288     cp    = a->colidx+shift; /* pointer to the column indices of the slice */
1289     vp    = a->val+shift;    /* pointer to the nonzero values of the slice */
1290     for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */
1291       row  = 8*i + row_in_slice;
1292       nrow = a->rlen[row]; /* number of nonzeros in row */
1293       /*
1294         Search for the nearest nonzero. Normally setting the index to zero may cause extra communication.
1295         But if the entire slice are empty, it is fine to use 0 since the index will not be loaded.
1296       */
1297       lastcol = 0;
1298       if (nrow>0) { /* nonempty row */
1299         lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */
1300       } else if (!row_in_slice) { /* first row of the currect slice is empty */
1301         for (j=1;j<8;j++) {
1302           if (a->rlen[8*i+j]) {
1303             lastcol = cp[j];
1304             break;
1305           }
1306         }
1307       } else {
1308         if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */
1309       }
1310 
1311       for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) {
1312         cp[8*k+row_in_slice] = lastcol;
1313         vp[8*k+row_in_slice] = (MatScalar)0;
1314       }
1315     }
1316   }
1317 
1318   A->info.mallocs += a->reallocs;
1319   a->reallocs      = 0;
1320 
1321   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info)
1326 {
1327   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1328 
1329   PetscFunctionBegin;
1330   info->block_size   = 1.0;
1331   info->nz_allocated = (double)a->maxallocmat;
1332   info->nz_used      = (double)a->sliidx[a->totalslices]; /* include padding zeros */
1333   info->nz_unneeded  = (double)(a->maxallocmat-a->sliidx[a->totalslices]);
1334   info->assemblies   = (double)A->num_ass;
1335   info->mallocs      = (double)A->info.mallocs;
1336   info->memory       = ((PetscObject)A)->mem;
1337   if (A->factortype) {
1338     info->fill_ratio_given  = A->info.fill_ratio_given;
1339     info->fill_ratio_needed = A->info.fill_ratio_needed;
1340     info->factor_mallocs    = A->info.factor_mallocs;
1341   } else {
1342     info->fill_ratio_given  = 0;
1343     info->fill_ratio_needed = 0;
1344     info->factor_mallocs    = 0;
1345   }
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1350 {
1351   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1352   PetscInt       shift,i,k,l,low,high,t,ii,row,col,nrow;
1353   PetscInt       *cp,nonew=a->nonew,lastcol=-1;
1354   MatScalar      *vp,value;
1355   PetscErrorCode ierr;
1356 
1357   PetscFunctionBegin;
1358   for (k=0; k<m; k++) { /* loop over added rows */
1359     row = im[k];
1360     if (row < 0) continue;
1361 #if defined(PETSC_USE_DEBUG)
1362     if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
1363 #endif
1364     shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
1365     cp    = a->colidx+shift; /* pointer to the row */
1366     vp    = a->val+shift; /* pointer to the row */
1367     nrow  = a->rlen[row];
1368     low   = 0;
1369     high  = nrow;
1370 
1371     for (l=0; l<n; l++) { /* loop over added columns */
1372       col = in[l];
1373       if (col<0) continue;
1374 #if defined(PETSC_USE_DEBUG)
1375       if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1);
1376 #endif
1377       if (a->roworiented) {
1378         value = v[l+k*n];
1379       } else {
1380         value = v[k+l*m];
1381       }
1382       if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue;
1383 
1384       /* search in this row for the specified colmun, i indicates the column to be set */
1385       if (col <= lastcol) low = 0;
1386       else high = nrow;
1387       lastcol = col;
1388       while (high-low > 5) {
1389         t = (low+high)/2;
1390         if (*(cp+t*8) > col) high = t;
1391         else low = t;
1392       }
1393       for (i=low; i<high; i++) {
1394         if (*(cp+i*8) > col) break;
1395         if (*(cp+i*8) == col) {
1396           if (is == ADD_VALUES) *(vp+i*8) += value;
1397           else *(vp+i*8) = value;
1398           low = i + 1;
1399           goto noinsert;
1400         }
1401       }
1402       if (value == 0.0 && a->ignorezeroentries) goto noinsert;
1403       if (nonew == 1) goto noinsert;
1404       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1405       /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */
1406       MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar);
1407       /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */
1408       for (ii=nrow-1; ii>=i; ii--) {
1409         *(cp+(ii+1)*8) = *(cp+ii*8);
1410         *(vp+(ii+1)*8) = *(vp+ii*8);
1411       }
1412       a->rlen[row]++;
1413       *(cp+i*8) = col;
1414       *(vp+i*8) = value;
1415       a->nz++;
1416       A->nonzerostate++;
1417       low = i+1; high++; nrow++;
1418 noinsert:;
1419     }
1420     a->rlen[row] = nrow;
1421   }
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str)
1426 {
1427   PetscErrorCode ierr;
1428 
1429   PetscFunctionBegin;
1430   /* If the two matrices have the same copy implementation, use fast copy. */
1431   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1432     Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1433     Mat_SeqSELL *b=(Mat_SeqSELL*)B->data;
1434 
1435     if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1436     ierr = PetscMemcpy(b->val,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr);
1437   } else {
1438     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1439   }
1440   PetscFunctionReturn(0);
1441 }
1442 
1443 PetscErrorCode MatSetUp_SeqSELL(Mat A)
1444 {
1445   PetscErrorCode ierr;
1446 
1447   PetscFunctionBegin;
1448   ierr = MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,0);CHKERRQ(ierr);
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[])
1453 {
1454   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1455 
1456   PetscFunctionBegin;
1457   *array = a->val;
1458   PetscFunctionReturn(0);
1459 }
1460 
1461 PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[])
1462 {
1463   PetscFunctionBegin;
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 PetscErrorCode MatRealPart_SeqSELL(Mat A)
1468 {
1469   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1470   PetscInt    i;
1471   MatScalar   *aval=a->val;
1472 
1473   PetscFunctionBegin;
1474   for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]);
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 PetscErrorCode MatImaginaryPart_SeqSELL(Mat A)
1479 {
1480   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1481   PetscInt       i;
1482   MatScalar      *aval=a->val;
1483   PetscErrorCode ierr;
1484 
1485   PetscFunctionBegin;
1486   for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]);
1487   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
1488   PetscFunctionReturn(0);
1489 }
1490 
1491 PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha)
1492 {
1493   Mat_SeqSELL    *a=(Mat_SeqSELL*)inA->data;
1494   MatScalar      *aval=a->val;
1495   PetscScalar    oalpha=alpha;
1496   PetscBLASInt   one=1,size;
1497   PetscErrorCode ierr;
1498 
1499   PetscFunctionBegin;
1500   ierr = PetscBLASIntCast(a->sliidx[a->totalslices],&size);CHKERRQ(ierr);
1501   PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one));
1502   ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1503   ierr = MatSeqSELLInvalidateDiagonal(inA);CHKERRQ(ierr);
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a)
1508 {
1509   Mat_SeqSELL    *y=(Mat_SeqSELL*)Y->data;
1510   PetscErrorCode ierr;
1511 
1512   PetscFunctionBegin;
1513   if (!Y->preallocated || !y->nz) {
1514     ierr = MatSeqSELLSetPreallocation(Y,1,NULL);CHKERRQ(ierr);
1515   }
1516   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1521 {
1522   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
1523   PetscScalar       *x,sum,*t;
1524   const MatScalar   *idiag=0,*mdiag;
1525   const PetscScalar *b,*xb;
1526   PetscInt          n,m=A->rmap->n,i,j,shift;
1527   const PetscInt    *diag;
1528   PetscErrorCode    ierr;
1529 
1530   PetscFunctionBegin;
1531   its = its*lits;
1532 
1533   if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */
1534   if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqSELL(A,omega,fshift);CHKERRQ(ierr);}
1535   a->fshift = fshift;
1536   a->omega  = omega;
1537 
1538   diag  = a->diag;
1539   t     = a->ssor_work;
1540   idiag = a->idiag;
1541   mdiag = a->mdiag;
1542 
1543   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1544   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1545   /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */
1546   if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented");
1547   if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented");
1548   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
1549 
1550   if (flag & SOR_ZERO_INITIAL_GUESS) {
1551     if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1552       for (i=0; i<m; i++) {
1553         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1554         sum   = b[i];
1555         n     = (diag[i]-shift)/8;
1556         for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1557         t[i]  = sum;
1558         x[i]  = sum*idiag[i];
1559       }
1560       xb   = t;
1561       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1562     } else xb = b;
1563     if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1564       for (i=m-1; i>=0; i--) {
1565         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1566         sum   = xb[i];
1567         n     = a->rlen[i]-(diag[i]-shift)/8-1;
1568         for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1569         if (xb == b) {
1570           x[i] = sum*idiag[i];
1571         } else {
1572           x[i] = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1573         }
1574       }
1575       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */
1576     }
1577     its--;
1578   }
1579   while (its--) {
1580     if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1581       for (i=0; i<m; i++) {
1582         /* lower */
1583         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1584         sum   = b[i];
1585         n     = (diag[i]-shift)/8;
1586         for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1587         t[i]  = sum;             /* save application of the lower-triangular part */
1588         /* upper */
1589         n     = a->rlen[i]-(diag[i]-shift)/8-1;
1590         for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1591         x[i]  = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1592       }
1593       xb   = t;
1594       ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1595     } else xb = b;
1596     if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1597       for (i=m-1; i>=0; i--) {
1598         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1599         sum = xb[i];
1600         if (xb == b) {
1601           /* whole matrix (no checkpointing available) */
1602           n     = a->rlen[i];
1603           for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1604           x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i];
1605         } else { /* lower-triangular part has been saved, so only apply upper-triangular */
1606           n     = a->rlen[i]-(diag[i]-shift)/8-1;
1607           for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1608           x[i]  = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1609         }
1610       }
1611       if (xb == b) {
1612         ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1613       } else {
1614         ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */
1615       }
1616     }
1617   }
1618   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1619   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1620   PetscFunctionReturn(0);
1621 }
1622 
1623 /* -------------------------------------------------------------------*/
1624 static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL,
1625                                        0,
1626                                        0,
1627                                        MatMult_SeqSELL,
1628                                /* 4*/  MatMultAdd_SeqSELL,
1629                                        MatMultTranspose_SeqSELL,
1630                                        MatMultTransposeAdd_SeqSELL,
1631                                        0,
1632                                        0,
1633                                        0,
1634                                /* 10*/ 0,
1635                                        0,
1636                                        0,
1637                                        MatSOR_SeqSELL,
1638                                        0,
1639                                /* 15*/ MatGetInfo_SeqSELL,
1640                                        MatEqual_SeqSELL,
1641                                        MatGetDiagonal_SeqSELL,
1642                                        MatDiagonalScale_SeqSELL,
1643                                        0,
1644                                /* 20*/ 0,
1645                                        MatAssemblyEnd_SeqSELL,
1646                                        MatSetOption_SeqSELL,
1647                                        MatZeroEntries_SeqSELL,
1648                                /* 24*/ 0,
1649                                        0,
1650                                        0,
1651                                        0,
1652                                        0,
1653                                /* 29*/ MatSetUp_SeqSELL,
1654                                        0,
1655                                        0,
1656                                        0,
1657                                        0,
1658                                /* 34*/ MatDuplicate_SeqSELL,
1659                                        0,
1660                                        0,
1661                                        0,
1662                                        0,
1663                                /* 39*/ 0,
1664                                        0,
1665                                        0,
1666                                        MatGetValues_SeqSELL,
1667                                        MatCopy_SeqSELL,
1668                                /* 44*/ 0,
1669                                        MatScale_SeqSELL,
1670                                        MatShift_SeqSELL,
1671                                        0,
1672                                        0,
1673                                /* 49*/ 0,
1674                                        0,
1675                                        0,
1676                                        0,
1677                                        0,
1678                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
1679                                        0,
1680                                        0,
1681                                        0,
1682                                        0,
1683                                /* 59*/ 0,
1684                                        MatDestroy_SeqSELL,
1685                                        MatView_SeqSELL,
1686                                        0,
1687                                        0,
1688                                /* 64*/ 0,
1689                                        0,
1690                                        0,
1691                                        0,
1692                                        0,
1693                                /* 69*/ 0,
1694                                        0,
1695                                        0,
1696                                        0,
1697                                        0,
1698                                /* 74*/ 0,
1699                                        MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */
1700                                        0,
1701                                        0,
1702                                        0,
1703                                /* 79*/ 0,
1704                                        0,
1705                                        0,
1706                                        0,
1707                                        0,
1708                                /* 84*/ 0,
1709                                        0,
1710                                        0,
1711                                        0,
1712                                        0,
1713                                /* 89*/ 0,
1714                                        0,
1715                                        0,
1716                                        0,
1717                                        0,
1718                                /* 94*/ 0,
1719                                        0,
1720                                        0,
1721                                        0,
1722                                        0,
1723                                /* 99*/ 0,
1724                                        0,
1725                                        0,
1726                                        MatConjugate_SeqSELL,
1727                                        0,
1728                                /*104*/ 0,
1729                                        0,
1730                                        0,
1731                                        0,
1732                                        0,
1733                                /*109*/ 0,
1734                                        0,
1735                                        0,
1736                                        0,
1737                                        MatMissingDiagonal_SeqSELL,
1738                                /*114*/ 0,
1739                                        0,
1740                                        0,
1741                                        0,
1742                                        0,
1743                                /*119*/ 0,
1744                                        0,
1745                                        0,
1746                                        0,
1747                                        0,
1748                                /*124*/ 0,
1749                                        0,
1750                                        0,
1751                                        0,
1752                                        0,
1753                                /*129*/ 0,
1754                                        0,
1755                                        0,
1756                                        0,
1757                                        0,
1758                                /*134*/ 0,
1759                                        0,
1760                                        0,
1761                                        0,
1762                                        0,
1763                                /*139*/ 0,
1764                                        0,
1765                                        0,
1766                                        MatFDColoringSetUp_SeqXAIJ,
1767                                        0,
1768                                 /*144*/0
1769 };
1770 
1771 PetscErrorCode MatStoreValues_SeqSELL(Mat mat)
1772 {
1773   Mat_SeqSELL    *a=(Mat_SeqSELL*)mat->data;
1774   PetscErrorCode ierr;
1775 
1776   PetscFunctionBegin;
1777   if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1778 
1779   /* allocate space for values if not already there */
1780   if (!a->saved_values) {
1781     ierr = PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);CHKERRQ(ierr);
1782     ierr = PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));CHKERRQ(ierr);
1783   }
1784 
1785   /* copy values over */
1786   ierr = PetscMemcpy(a->saved_values,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr);
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat)
1791 {
1792   Mat_SeqSELL    *a=(Mat_SeqSELL*)mat->data;
1793   PetscErrorCode ierr;
1794 
1795   PetscFunctionBegin;
1796   if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1797   if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1798   /* copy values over */
1799   ierr = PetscMemcpy(a->val,a->saved_values,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr);
1800   PetscFunctionReturn(0);
1801 }
1802 
1803 /*@C
1804  MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray()
1805 
1806  Not Collective
1807 
1808  Input Parameters:
1809  .  mat - a MATSEQSELL matrix
1810  .  array - pointer to the data
1811 
1812  Level: intermediate
1813 
1814  .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90()
1815  @*/
1816 PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array)
1817 {
1818   PetscErrorCode ierr;
1819 
1820   PetscFunctionBegin;
1821   ierr = PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B)
1826 {
1827   Mat_SeqSELL    *b;
1828   PetscMPIInt    size;
1829   PetscErrorCode ierr;
1830 
1831   PetscFunctionBegin;
1832   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
1833   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1");
1834 
1835   ierr = PetscNewLog(B,&b);CHKERRQ(ierr);
1836 
1837   B->data = (void*)b;
1838 
1839   ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1840 
1841   b->row                = 0;
1842   b->col                = 0;
1843   b->icol               = 0;
1844   b->reallocs           = 0;
1845   b->ignorezeroentries  = PETSC_FALSE;
1846   b->roworiented        = PETSC_TRUE;
1847   b->nonew              = 0;
1848   b->diag               = 0;
1849   b->solve_work         = 0;
1850   B->spptr              = 0;
1851   b->saved_values       = 0;
1852   b->idiag              = 0;
1853   b->mdiag              = 0;
1854   b->ssor_work          = 0;
1855   b->omega              = 1.0;
1856   b->fshift             = 0.0;
1857   b->idiagvalid         = PETSC_FALSE;
1858   b->ibdiagvalid        = PETSC_FALSE;
1859   b->keepnonzeropattern = PETSC_FALSE;
1860 
1861   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);CHKERRQ(ierr);
1862   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);CHKERRQ(ierr);
1863   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);CHKERRQ(ierr);
1864   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);CHKERRQ(ierr);
1865   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);CHKERRQ(ierr);
1866   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);CHKERRQ(ierr);
1867   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);CHKERRQ(ierr);
1868   PetscFunctionReturn(0);
1869 }
1870 
1871 /*
1872  Given a matrix generated with MatGetFactor() duplicates all the information in A into B
1873  */
1874 PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
1875 {
1876   Mat_SeqSELL    *c,*a=(Mat_SeqSELL*)A->data;
1877   PetscInt       i,m=A->rmap->n;
1878   PetscInt       totalslices=a->totalslices;
1879   PetscErrorCode ierr;
1880 
1881   PetscFunctionBegin;
1882   c = (Mat_SeqSELL*)C->data;
1883 
1884   C->factortype = A->factortype;
1885   c->row        = 0;
1886   c->col        = 0;
1887   c->icol       = 0;
1888   c->reallocs   = 0;
1889 
1890   C->assembled = PETSC_TRUE;
1891 
1892   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
1893   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
1894 
1895   ierr = PetscMalloc1(8*totalslices,&c->rlen);CHKERRQ(ierr);
1896   ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr);
1897   ierr = PetscMalloc1(totalslices+1,&c->sliidx);CHKERRQ(ierr);
1898   ierr = PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr);
1899 
1900   for (i=0; i<m; i++) c->rlen[i] = a->rlen[i];
1901   for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i];
1902 
1903   /* allocate the matrix space */
1904   if (mallocmatspace) {
1905     ierr = PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);CHKERRQ(ierr);
1906     ierr = PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
1907 
1908     c->singlemalloc = PETSC_TRUE;
1909 
1910     if (m > 0) {
1911       ierr = PetscMemcpy(c->colidx,a->colidx,(a->maxallocmat)*sizeof(PetscInt));CHKERRQ(ierr);
1912       if (cpvalues == MAT_COPY_VALUES) {
1913         ierr = PetscMemcpy(c->val,a->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr);
1914       } else {
1915         ierr = PetscMemzero(c->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr);
1916       }
1917     }
1918   }
1919 
1920   c->ignorezeroentries = a->ignorezeroentries;
1921   c->roworiented       = a->roworiented;
1922   c->nonew             = a->nonew;
1923   if (a->diag) {
1924     ierr = PetscMalloc1(m,&c->diag);CHKERRQ(ierr);
1925     ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr);
1926     for (i=0; i<m; i++) {
1927       c->diag[i] = a->diag[i];
1928     }
1929   } else c->diag = 0;
1930 
1931   c->solve_work         = 0;
1932   c->saved_values       = 0;
1933   c->idiag              = 0;
1934   c->ssor_work          = 0;
1935   c->keepnonzeropattern = a->keepnonzeropattern;
1936   c->free_val           = PETSC_TRUE;
1937   c->free_colidx        = PETSC_TRUE;
1938 
1939   c->maxallocmat  = a->maxallocmat;
1940   c->maxallocrow  = a->maxallocrow;
1941   c->rlenmax      = a->rlenmax;
1942   c->nz           = a->nz;
1943   C->preallocated = PETSC_TRUE;
1944 
1945   c->nonzerorowcnt = a->nonzerorowcnt;
1946   C->nonzerostate  = A->nonzerostate;
1947 
1948   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
1949   PetscFunctionReturn(0);
1950 }
1951 
1952 PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B)
1953 {
1954   PetscErrorCode ierr;
1955 
1956   PetscFunctionBegin;
1957   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
1958   ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr);
1959   if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) {
1960     ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr);
1961   }
1962   ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1963   ierr = MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
1964   PetscFunctionReturn(0);
1965 }
1966 
1967 /*@C
1968  MatCreateSeqSELL - Creates a sparse matrix in SELL format.
1969 
1970  Collective on MPI_Comm
1971 
1972  Input Parameters:
1973  +  comm - MPI communicator, set to PETSC_COMM_SELF
1974  .  m - number of rows
1975  .  n - number of columns
1976  .  rlenmax - maximum number of nonzeros in a row
1977  -  rlen - array containing the number of nonzeros in the various rows
1978  (possibly different for each row) or NULL
1979 
1980  Output Parameter:
1981  .  A - the matrix
1982 
1983  It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1984  MatXXXXSetPreallocation() paradgm instead of this routine directly.
1985  [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation]
1986 
1987  Notes:
1988  If nnz is given then nz is ignored
1989 
1990  Specify the preallocated storage with either rlenmax or rlen (not both).
1991  Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory
1992  allocation.  For large problems you MUST preallocate memory or you
1993  will get TERRIBLE performance, see the users' manual chapter on matrices.
1994 
1995  Level: intermediate
1996 
1997  .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatCreateSeqSELLWithArrays()
1998 
1999  @*/
2000 PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A)
2001 {
2002   PetscErrorCode ierr;
2003 
2004   PetscFunctionBegin;
2005   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2006   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2007   ierr = MatSetType(*A,MATSEQSELL);CHKERRQ(ierr);
2008   ierr = MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);CHKERRQ(ierr);
2009   PetscFunctionReturn(0);
2010 }
2011 
2012 PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg)
2013 {
2014   Mat_SeqSELL     *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data;
2015   PetscInt       totalslices=a->totalslices;
2016   PetscErrorCode ierr;
2017 
2018   PetscFunctionBegin;
2019   /* If the  matrix dimensions are not equal,or no of nonzeros */
2020   if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) {
2021     *flg = PETSC_FALSE;
2022     PetscFunctionReturn(0);
2023   }
2024   /* if the a->colidx are the same */
2025   ierr = PetscMemcmp(a->colidx,b->colidx,a->sliidx[totalslices]*sizeof(PetscInt),flg);CHKERRQ(ierr);
2026   if (!*flg) PetscFunctionReturn(0);
2027   /* if a->val are the same */
2028   ierr = PetscMemcmp(a->val,b->val,a->sliidx[totalslices]*sizeof(PetscScalar),flg);CHKERRQ(ierr);
2029   PetscFunctionReturn(0);
2030 }
2031 
2032 PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A)
2033 {
2034   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
2035 
2036   PetscFunctionBegin;
2037   a->idiagvalid  = PETSC_FALSE;
2038   a->ibdiagvalid = PETSC_FALSE;
2039   PetscFunctionReturn(0);
2040 }
2041 
2042 PetscErrorCode MatConjugate_SeqSELL(Mat A)
2043 {
2044 #if defined(PETSC_USE_COMPLEX)
2045   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
2046   PetscInt    i;
2047   PetscScalar *val = a->val;
2048 
2049   PetscFunctionBegin;
2050   for (i=0; i<a->sliidx[a->totalslices]; i++) {
2051     val[i] = PetscConj(val[i]);
2052   }
2053 #else
2054   PetscFunctionBegin;
2055 #endif
2056   PetscFunctionReturn(0);
2057 }
2058