xref: /petsc/src/mat/impls/sell/seq/sell.c (revision 38efe8ef79557fb58b73d4ce033b31201f20abd0)
1 
2 /*
3   Defines the basic matrix operations for the SELL matrix storage format.
4 */
5 #include <../src/mat/impls/sell/seq/sell.h>  /*I   "petscmat.h"  I*/
6 #include <petscblaslapack.h>
7 #include <petsc/private/kernels/blocktranspose.h>
8 #if defined(PETSC_HAVE_IMMINTRIN_H)
9 #include <immintrin.h>
10 
11 #if !defined(_MM_SCALE_8)
12 #define _MM_SCALE_8    8
13 #endif
14 
15 #if defined(__AVX512F__)
16 /* these do not work
17  vec_idx  = _mm512_loadunpackhi_epi32(vec_idx,acolidx);
18  vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval);
19 */
20 #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \
21 /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \
22 vec_idx  = _mm256_load_si256((__m256i const*)acolidx); \
23 vec_vals = _mm512_load_pd(aval); \
24 vec_x    = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \
25 vec_y    = _mm512_fmadd_pd(vec_x,vec_vals,vec_y);
26 
27 #endif
28 #endif  /* PETSC_HAVE_IMMINTRIN_H */
29 
30 /*@C
31  MatSeqSELLSetPreallocation - For good matrix assembly performance
32  the user should preallocate the matrix storage by setting the parameter nz
33  (or the array nnz).  By setting these parameters accurately, performance
34  during matrix assembly can be increased significantly.
35 
36  Collective on MPI_Comm
37 
38  Input Parameters:
39  +  B - The matrix
40  .  nz - number of nonzeros per row (same for all rows)
41  -  nnz - array containing the number of nonzeros in the various rows
42  (possibly different for each row) or NULL
43 
44  Notes:
45  If nnz is given then nz is ignored.
46 
47  Specify the preallocated storage with either nz or nnz (not both).
48  Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
49  allocation.  For large problems you MUST preallocate memory or you
50  will get TERRIBLE performance, see the users' manual chapter on matrices.
51 
52  You can call MatGetInfo() to get information on how effective the preallocation was;
53  for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
54  You can also run with the option -info and look for messages with the string
55  malloc in them to see if additional memory allocation was needed.
56 
57  Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix
58  entries or columns indices.
59 
60  The maximum number of nonzeos in any row should be as accuate as possible.
61  If it is underesitmated, you will get bad performance due to reallocation
62  (MatSeqXSELLReallocateSELL).
63 
64  Level: intermediate
65 
66  .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo()
67 
68  @*/
69 PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[])
70 {
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
75   PetscValidType(B,1);
76   ierr = PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[])
81 {
82   Mat_SeqSELL    *b;
83   PetscInt       i,j,totalslices;
84   PetscBool      skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE;
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE;
89   if (maxallocrow == MAT_SKIP_ALLOCATION) {
90     skipallocation = PETSC_TRUE;
91     maxallocrow    = 0;
92   }
93 
94   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
95   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
96 
97   /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */
98   if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5;
99   if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow);
100   if (rlen) {
101     for (i=0; i<B->rmap->n; i++) {
102       if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]);
103       if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n);
104     }
105   }
106 
107   B->preallocated = PETSC_TRUE;
108 
109   b = (Mat_SeqSELL*)B->data;
110 
111   totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */
112   b->totalslices = totalslices;
113   if (!skipallocation) {
114     if (B->rmap->n & 0x07) PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n);
115 
116     if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */
117       ierr = PetscMalloc1(totalslices+1,&b->sliidx);CHKERRQ(ierr);
118       ierr = PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr);
119     }
120     if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */
121       if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10;
122       else if (maxallocrow < 0) maxallocrow = 1;
123       for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow;
124     } else {
125       maxallocrow = 0;
126       b->sliidx[0] = 0;
127       for (i=1; i<totalslices; i++) {
128         b->sliidx[i] = 0;
129         for (j=0;j<8;j++) {
130           b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]);
131         }
132         maxallocrow = PetscMax(b->sliidx[i],maxallocrow);
133         b->sliidx[i] = b->sliidx[i-1] + 8*b->sliidx[i];
134       }
135       /* last slice */
136       b->sliidx[totalslices] = 0;
137       for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]);
138       maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow);
139       b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices];
140     }
141 
142     /* allocate space for val, colidx, rlen */
143     /* FIXME: should B's old memory be unlogged? */
144     ierr = MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);CHKERRQ(ierr);
145     /* FIXME: assuming an element of the bit array takes 8 bits */
146     ierr = PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);CHKERRQ(ierr);
147     ierr = PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
148     /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */
149     ierr = PetscCalloc1(8*totalslices,&b->rlen);CHKERRQ(ierr);
150     ierr = PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));CHKERRQ(ierr);
151 
152     b->singlemalloc = PETSC_TRUE;
153     b->free_val     = PETSC_TRUE;
154     b->free_colidx  = PETSC_TRUE;
155   } else {
156     b->free_val    = PETSC_FALSE;
157     b->free_colidx = PETSC_FALSE;
158   }
159 
160   b->nz               = 0;
161   b->maxallocrow      = maxallocrow;
162   b->rlenmax          = maxallocrow;
163   b->maxallocmat      = b->sliidx[totalslices];
164   B->info.nz_unneeded = (double)b->maxallocmat;
165   if (realalloc) {
166     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
167   }
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
172 {
173   Mat            B;
174   Mat_SeqSELL     *a=(Mat_SeqSELL*)A->data;
175   PetscInt       i,j,row;
176   PetscBool      isnonzero;
177   PetscErrorCode ierr;
178 
179   PetscFunctionBegin;
180   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
181   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
182   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
183   ierr = MatSeqAIJSetPreallocation(B,0,a->rlen);CHKERRQ(ierr);
184   ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
185 
186   for (i=0; i<a->totalslices; i++) { /* loop over slices */
187     for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
188       isnonzero = (PetscBool)((j-a->sliidx[i])/8 < a->rlen[8*i+row]);
189       if (isnonzero) {
190         ierr = MatSetValue(B,8*i+row,a->colidx[j],a->val[j],INSERT_VALUES);CHKERRQ(ierr);
191       }
192     }
193   }
194   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
195   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
196   B->rmap->bs = A->rmap->bs;
197 
198   if (reuse == MAT_INPLACE_MATRIX) {
199     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
200   } else {
201     *newmat = B;
202   }
203   PetscFunctionReturn(0);
204 }
205 
206 #include <../src/mat/impls/aij/seq/aij.h>
207 
208 PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
209 {
210   Mat               B;
211   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)A->data;
212   PetscInt          *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols;
213   const PetscInt    *cols;
214   const PetscScalar *vals;
215   PetscErrorCode    ierr;
216 
217   PetscFunctionBegin;
218   if (A->rmap->bs > 1) {
219     ierr = MatConvert_Basic(A,newtype,reuse,newmat);CHKERRQ(ierr);
220     PetscFunctionReturn(0);
221   }
222   /* Can we just use ilen? */
223   ierr = PetscMalloc1(m,&rowlengths);CHKERRQ(ierr);
224   for (i=0; i<m; i++) {
225     rowlengths[i] = ai[i+1] - ai[i];
226   }
227 
228   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
229   ierr = MatSetSizes(B,m,n,m,n);CHKERRQ(ierr);
230   ierr = MatSetType(B,MATSEQSELL);CHKERRQ(ierr);
231   ierr = MatSeqSELLSetPreallocation(B,0,rowlengths);CHKERRQ(ierr);
232   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
233 
234   ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
235 
236   for (row=0; row<m; row++) {
237     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
238     ierr = MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
239     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
240   }
241   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
242   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
243   B->rmap->bs = A->rmap->bs;
244 
245   if (reuse == MAT_INPLACE_MATRIX) {
246     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
247   } else {
248     *newmat = B;
249   }
250   PetscFunctionReturn(0);
251 }
252 
253 PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy)
254 {
255   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
256   PetscScalar       *y;
257   const PetscScalar *x;
258   const MatScalar   *aval=a->val;
259   PetscInt          totalslices=a->totalslices;
260   const PetscInt    *acolidx=a->colidx;
261   PetscInt          i,j=0;
262   PetscErrorCode    ierr;
263 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
264   __m512d           vec_x,vec_y,vec_vals;
265   __m256i           vec_idx;
266   __mmask8          mask;
267   __m512d           vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4;
268   __m256i           vec_idx2,vec_idx3,vec_idx4;
269 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
270   __m128d           vec_x_tmp;
271   __m256d           vec_x,vec_y,vec_y2,vec_vals;
272   MatScalar         yval;
273   PetscInt          r,rows_left,row,nnz_in_row;
274 #else
275   PetscScalar       sum[8];
276 #endif
277 
278 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
279 #pragma disjoint(*x,*y,*aval)
280 #endif
281 
282   PetscFunctionBegin;
283   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
284   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
285 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
286   for (i=0; i<totalslices; i++) { /* loop over slices */
287     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
288     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
289 
290     vec_y  = _mm512_setzero_pd();
291     vec_y2 = _mm512_setzero_pd();
292     vec_y3 = _mm512_setzero_pd();
293     vec_y4 = _mm512_setzero_pd();
294 
295     j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */
296     switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) {
297     case 3:
298       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
299       acolidx += 8; aval += 8;
300       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
301       acolidx += 8; aval += 8;
302       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
303       acolidx += 8; aval += 8;
304       j += 3;
305       break;
306     case 2:
307       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
308       acolidx += 8; aval += 8;
309       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
310       acolidx += 8; aval += 8;
311       j += 2;
312       break;
313     case 1:
314       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
315       acolidx += 8; aval += 8;
316       j += 1;
317       break;
318     }
319     #pragma novector
320     for (; j<(a->sliidx[i+1]>>3); j+=4) {
321       /*
322       vec_idx  = _mm512_loadunpackhi_epi32(vec_idx,acolidx);
323       vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval);
324       */
325       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
326       acolidx += 8; aval += 8;
327       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
328       acolidx += 8; aval += 8;
329       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
330       acolidx += 8; aval += 8;
331       AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4);
332       acolidx += 8; aval += 8;
333     }
334 
335     vec_y = _mm512_add_pd(vec_y,vec_y2);
336     vec_y = _mm512_add_pd(vec_y,vec_y3);
337     vec_y = _mm512_add_pd(vec_y,vec_y4);
338     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
339       mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
340       _mm512_mask_store_pd(&y[8*i],mask,vec_y);
341     } else {
342       _mm512_store_pd(&y[8*i],vec_y);
343     }
344   }
345 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
346   for (i=0; i<totalslices; i++) { /* loop over full slices */
347     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
348     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
349 
350     vec_y  = _mm256_setzero_pd();
351     vec_y2 = _mm256_setzero_pd();
352 
353     /* last slice may have padding rows. Don't use vectorization. */
354     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
355       rows_left = A->rmap->n - 8*i;
356       for (r=0; r<rows_left; ++r) {
357         yval = (MatScalar)0;
358         row = 8*i + r;
359         nnz_in_row = a->rlen[row];
360         for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]];
361         y[row] = yval;
362       }
363       break;
364     }
365 
366     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
367     for (; j<a->sliidx[i+1]; j+=8) {
368       vec_vals  = _mm256_loadu_pd(aval);
369       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
370       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
371       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
372       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
373       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
374       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
375       vec_y     = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y);
376       aval     += 4;
377 
378       vec_vals  = _mm256_loadu_pd(aval);
379       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
380       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
381       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
382       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
383       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
384       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
385       vec_y2    = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2);
386       aval     += 4;
387     }
388 
389     _mm256_storeu_pd(y + i*8,     vec_y);
390     _mm256_storeu_pd(y + i*8 + 4, vec_y2);
391   }
392 
393 #else
394   for (i=0; i<totalslices; i++) { /* loop over slices */
395     for (j=0; j<8; j++) sum[j] = 0.0;
396 
397     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
398       sum[0] += aval[j]*x[acolidx[j]];
399       sum[1] += aval[j+1]*x[acolidx[j+1]];
400       sum[2] += aval[j+2]*x[acolidx[j+2]];
401       sum[3] += aval[j+3]*x[acolidx[j+3]];
402       sum[4] += aval[j+4]*x[acolidx[j+4]];
403       sum[5] += aval[j+5]*x[acolidx[j+5]];
404       sum[6] += aval[j+6]*x[acolidx[j+6]];
405       sum[7] += aval[j+7]*x[acolidx[j+7]];
406     }
407     if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
408       for(j=0;j<(A->rmap->n & 0x07);j++) y[8*i+j] = sum[j];
409     } else {
410       y[8*i]   = sum[0];
411       y[8*i+1] = sum[1];
412       y[8*i+2] = sum[2];
413       y[8*i+3] = sum[3];
414       y[8*i+4] = sum[4];
415       y[8*i+5] = sum[5];
416       y[8*i+6] = sum[6];
417       y[8*i+7] = sum[7];
418     }
419   }
420 #endif
421 
422   ierr = PetscLogFlops(2.0*a->nz-a->nonzerorowcnt);CHKERRQ(ierr); /* theoretical minimal FLOPs */
423   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
424   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
425   PetscFunctionReturn(0);
426 }
427 
428 #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h>
429 PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz)
430 {
431   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
432   PetscScalar       *y,*z;
433   const PetscScalar *x;
434   const MatScalar   *aval=a->val;
435   PetscInt          totalslices=a->totalslices;
436   const PetscInt    *acolidx=a->colidx;
437   PetscInt          i,j;
438   PetscErrorCode    ierr;
439 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)  && !defined(PETSC_USE_64BIT_INDICES)
440   __m512d           vec_x,vec_y,vec_z,vec_vals;
441   __m256i           vec_idx;
442   __mmask8          mask;
443 #else
444   PetscScalar       sum[8];
445 #endif
446 
447 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
448 #pragma disjoint(*x,*y,*aval)
449 #endif
450 
451   PetscFunctionBegin;
452   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
453   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
454   for (i=0; i<totalslices; i++) { /* loop over slices */
455 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
456     vec_y = _mm512_load_pd(&y[8*i]);
457     #pragma novector
458     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
459       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
460       acolidx += 8; aval += 8;
461     }
462     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
463       mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
464       _mm512_mask_store_pd(&z[8*i],mask,vec_y);
465     } else {
466       _mm512_store_pd(&z[8*i],vec_y);
467     }
468 #else
469     sum[0] = y[8*i];
470     sum[1] = y[8*i+1];
471     sum[2] = y[8*i+2];
472     sum[3] = y[8*i+3];
473     sum[4] = y[8*i+4];
474     sum[5] = y[8*i+5];
475     sum[6] = y[8*i+6];
476     sum[7] = y[8*i+7];
477     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
478       sum[0] += aval[j]*x[acolidx[j]];
479       sum[1] += aval[j+1]*x[acolidx[j+1]];
480       sum[2] += aval[j+2]*x[acolidx[j+2]];
481       sum[3] += aval[j+3]*x[acolidx[j+3]];
482       sum[4] += aval[j+4]*x[acolidx[j+4]];
483       sum[5] += aval[j+5]*x[acolidx[j+5]];
484       sum[6] += aval[j+6]*x[acolidx[j+6]];
485       sum[7] += aval[j+7]*x[acolidx[j+7]];
486     }
487     if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
488       for(j=0;j<(A->rmap->n & 0x07);j++) z[8*i+j] = sum[j];
489     } else {
490       z[8*i]   = sum[0];
491       z[8*i+1] = sum[1];
492       z[8*i+2] = sum[2];
493       z[8*i+3] = sum[3];
494       z[8*i+4] = sum[4];
495       z[8*i+5] = sum[5];
496       z[8*i+6] = sum[6];
497       z[8*i+7] = sum[7];
498     }
499 #endif
500   }
501 
502   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
503   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
504   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
505   PetscFunctionReturn(0);
506 }
507 
508 PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy)
509 {
510   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
511   PetscScalar       *y;
512   const PetscScalar *x;
513   const MatScalar   *aval=a->val;
514   const PetscInt    *acolidx=a->colidx;
515   PetscInt          i,j,row;
516   PetscBool         isnonzero;
517   PetscErrorCode    ierr;
518 
519 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
520 #pragma disjoint(*x,*y,*aval)
521 #endif
522 
523   PetscFunctionBegin;
524   if (zz != yy) { ierr = VecCopy(zz,yy);CHKERRQ(ierr); }
525   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
526   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
527   for (i=0; i<a->totalslices; i++) { /* loop over slices */
528     for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
529       isnonzero = (PetscBool)((j-a->sliidx[i])/8 < a->rlen[8*i+row]);
530       if (isnonzero) y[acolidx[j]] += aval[j]*x[8*i+row];
531     }
532   }
533   ierr = PetscLogFlops(2.0*a->sliidx[a->totalslices]);CHKERRQ(ierr);
534   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
535   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy)
540 {
541   PetscErrorCode ierr;
542 
543   PetscFunctionBegin;
544   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
545   ierr = MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);CHKERRQ(ierr);
546   PetscFunctionReturn(0);
547 }
548 
549 /*
550      Checks for missing diagonals
551 */
552 PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool  *missing,PetscInt *d)
553 {
554   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
555   PetscInt    *diag,i;
556 
557   PetscFunctionBegin;
558   *missing = PETSC_FALSE;
559   if (A->rmap->n > 0 && !(a->colidx)) {
560     *missing = PETSC_TRUE;
561     if (d) *d = 0;
562     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
563   } else {
564     diag = a->diag;
565     for (i=0; i<A->rmap->n; i++) {
566       if (diag[i] == -1) {
567         *missing = PETSC_TRUE;
568         if (d) *d = i;
569         PetscInfo1(A,"Matrix is missing diagonal number %D\n",i);
570         break;
571       }
572     }
573   }
574   PetscFunctionReturn(0);
575 }
576 
577 PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A)
578 {
579   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
580   PetscInt       i,j,m=A->rmap->n,shift;
581   PetscErrorCode ierr;
582 
583   PetscFunctionBegin;
584   if (!a->diag) {
585     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
586     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
587     a->free_diag = PETSC_TRUE;
588   }
589   for (i=0; i<m; i++) { /* loop over rows */
590     shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
591     a->diag[i] = -1;
592     for (j=0; j<a->rlen[i]; j++) {
593       if (a->colidx[shift+j*8] == i) {
594         a->diag[i] = shift+j*8;
595         break;
596       }
597     }
598   }
599   PetscFunctionReturn(0);
600 }
601 
602 /*
603   Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways
604 */
605 PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift)
606 {
607   Mat_SeqSELL    *a=(Mat_SeqSELL*) A->data;
608   PetscInt       i,*diag,m = A->rmap->n;
609   MatScalar      *val = a->val;
610   PetscScalar    *idiag,*mdiag;
611   PetscErrorCode ierr;
612 
613   PetscFunctionBegin;
614   if (a->idiagvalid) PetscFunctionReturn(0);
615   ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr);
616   diag = a->diag;
617   if (!a->idiag) {
618     ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr);
619     ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr);
620     val  = a->val;
621   }
622   mdiag = a->mdiag;
623   idiag = a->idiag;
624 
625   if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) {
626     for (i=0; i<m; i++) {
627       mdiag[i] = val[diag[i]];
628       if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */
629         if (PetscRealPart(fshift)) {
630           ierr = PetscInfo1(A,"Zero diagonal on row %D\n",i);CHKERRQ(ierr);
631           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
632           A->factorerror_zeropivot_value = 0.0;
633           A->factorerror_zeropivot_row   = i;
634         } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i);
635       }
636       idiag[i] = 1.0/val[diag[i]];
637     }
638     ierr = PetscLogFlops(m);CHKERRQ(ierr);
639   } else {
640     for (i=0; i<m; i++) {
641       mdiag[i] = val[diag[i]];
642       idiag[i] = omega/(fshift + val[diag[i]]);
643     }
644     ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr);
645   }
646   a->idiagvalid = PETSC_TRUE;
647   PetscFunctionReturn(0);
648 }
649 
650 PetscErrorCode MatZeroEntries_SeqSELL(Mat A)
651 {
652   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
653   PetscErrorCode ierr;
654 
655   PetscFunctionBegin;
656   ierr = PetscMemzero(a->val,(a->sliidx[a->totalslices])*sizeof(PetscScalar));CHKERRQ(ierr);
657   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
658   PetscFunctionReturn(0);
659 }
660 
661 PetscErrorCode MatDestroy_SeqSELL(Mat A)
662 {
663   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
664   PetscErrorCode ierr;
665 
666   PetscFunctionBegin;
667 #if defined(PETSC_USE_LOG)
668   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz);
669 #endif
670   ierr = MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);CHKERRQ(ierr);
671   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
672   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
673   ierr = PetscFree(a->diag);CHKERRQ(ierr);
674   ierr = PetscFree(a->ibdiag);CHKERRQ(ierr);
675   ierr = PetscFree(a->rlen);CHKERRQ(ierr);
676   ierr = PetscFree(a->sliidx);CHKERRQ(ierr);
677   ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr);
678   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
679   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
680   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
681 
682   ierr = PetscFree(A->data);CHKERRQ(ierr);
683 
684   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
685   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
686   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
687 #if defined(PETSC_HAVE_ELEMENTAL)
688 #endif
689   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);CHKERRQ(ierr);
690   PetscFunctionReturn(0);
691 }
692 
693 PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg)
694 {
695   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
696   PetscErrorCode ierr;
697 
698   PetscFunctionBegin;
699   switch (op) {
700   case MAT_ROW_ORIENTED:
701     a->roworiented = flg;
702     break;
703   case MAT_KEEP_NONZERO_PATTERN:
704     a->keepnonzeropattern = flg;
705     break;
706   case MAT_NEW_NONZERO_LOCATIONS:
707     a->nonew = (flg ? 0 : 1);
708     break;
709   case MAT_NEW_NONZERO_LOCATION_ERR:
710     a->nonew = (flg ? -1 : 0);
711     break;
712   case MAT_NEW_NONZERO_ALLOCATION_ERR:
713     a->nonew = (flg ? -2 : 0);
714     break;
715   case MAT_UNUSED_NONZERO_LOCATION_ERR:
716     a->nounused = (flg ? -1 : 0);
717     break;
718   case MAT_NEW_DIAGONALS:
719   case MAT_IGNORE_OFF_PROC_ENTRIES:
720   case MAT_USE_HASH_TABLE:
721     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
722     break;
723   case MAT_SPD:
724   case MAT_SYMMETRIC:
725   case MAT_STRUCTURALLY_SYMMETRIC:
726   case MAT_HERMITIAN:
727   case MAT_SYMMETRY_ETERNAL:
728     /* These options are handled directly by MatSetOption() */
729     break;
730   default:
731     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
732   }
733   PetscFunctionReturn(0);
734 }
735 
736 PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v)
737 {
738   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
739   PetscInt       i,j,n,shift;
740   PetscScalar    *x,zero=0.0;
741   PetscErrorCode ierr;
742 
743   PetscFunctionBegin;
744   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
745   if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
746 
747   if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) {
748     PetscInt *diag=a->diag;
749     ierr = VecGetArray(v,&x);CHKERRQ(ierr);
750     for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]];
751     ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
752     PetscFunctionReturn(0);
753   }
754 
755   ierr = VecSet(v,zero);CHKERRQ(ierr);
756   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
757   for (i=0; i<n; i++) { /* loop over rows */
758     shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
759     x[i] = 0;
760     for (j=0; j<a->rlen[i]; j++) {
761       if (a->colidx[shift+j*8] == i) {
762         x[i] = a->val[shift+j*8];
763         break;
764       }
765     }
766   }
767   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
768   PetscFunctionReturn(0);
769 }
770 
771 PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr)
772 {
773   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
774   const PetscScalar *l,*r;
775   PetscInt          i,j,m,n,row;
776   PetscErrorCode    ierr;
777 
778   PetscFunctionBegin;
779   if (ll) {
780     /* The local size is used so that VecMPI can be passed to this routine
781        by MatDiagonalScale_MPISELL */
782     ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr);
783     if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
784     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
785     for (i=0; i<a->totalslices; i++) { /* loop over slices */
786       for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
787         a->val[j] *= l[8*i+row];
788       }
789     }
790     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
791     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
792   }
793   if (rr) {
794     ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr);
795     if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
796     ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr);
797     for (i=0; i<a->totalslices; i++) { /* loop over slices */
798       for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) {
799         a->val[j] *= r[a->colidx[j]];
800       }
801     }
802     ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr);
803     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
804   }
805   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
806   PetscFunctionReturn(0);
807 }
808 
809 extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
810 
811 PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
812 {
813   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
814   PetscInt    *cp,i,k,low,high,t,row,col,l;
815   PetscInt    shift;
816   MatScalar   *vp;
817 
818   PetscFunctionBegin;
819   for (k=0; k<m; k++) { /* loop over added rows */
820     row = im[k];
821     if (row<0) continue;
822 #if defined(PETSC_USE_DEBUG)
823     if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
824 #endif
825     shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
826     cp = a->colidx+shift; /* pointer to the row */
827     vp = a->val+shift; /* pointer to the row */
828     for (l=0; l<n; l++) { /* loop over added rows */
829       col = in[l];
830       if (col<0) continue;
831 #if defined(PETSC_USE_DEBUG)
832       if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1);
833 #endif
834       high = a->rlen[row]; low = 0; /* assume unsorted */
835       while (high-low > 5) {
836         t = (low+high)/2;
837         if (*(cp+t*8) > col) high = t;
838         else low = t;
839       }
840       for (i=low; i<high; i++) {
841         if (*(cp+8*i) > col) break;
842         if (*(cp+8*i) == col) {
843           *v++ = *(vp+8*i);
844           goto finished;
845         }
846       }
847       *v++ = 0.0;
848     finished:;
849     }
850   }
851   PetscFunctionReturn(0);
852 }
853 
854 PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer)
855 {
856   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
857   PetscInt          i,j,m=A->rmap->n,shift;
858   const char        *name;
859   PetscViewerFormat format;
860   PetscErrorCode    ierr;
861 
862   PetscFunctionBegin;
863   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
864   if (format == PETSC_VIEWER_ASCII_MATLAB) {
865     PetscInt nofinalvalue = 0;
866     /*
867     if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) {
868       nofinalvalue = 1;
869     }
870     */
871     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
872     ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);CHKERRQ(ierr);
873     ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);CHKERRQ(ierr);
874 #if defined(PETSC_USE_COMPLEX)
875     ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);CHKERRQ(ierr);
876 #else
877     ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);CHKERRQ(ierr);
878 #endif
879     ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr);
880 
881     for (i=0; i<m; i++) {
882       shift = a->sliidx[i>>3]+(i&0x07);
883       for (j=0; j<a->rlen[i]; j++) {
884 #if defined(PETSC_USE_COMPLEX)
885         ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
886 #else
887         ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);CHKERRQ(ierr);
888 #endif
889       }
890     }
891     /*
892     if (nofinalvalue) {
893 #if defined(PETSC_USE_COMPLEX)
894       ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr);
895 #else
896       ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr);
897 #endif
898     }
899     */
900     ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
901     ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr);
902     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
903   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
904     PetscFunctionReturn(0);
905   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
906     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
907     for (i=0; i<m; i++) {
908       ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr);
909       shift = a->sliidx[i>>3]+(i&0x07);
910       for (j=0; j<a->rlen[i]; j++) {
911 #if defined(PETSC_USE_COMPLEX)
912         if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
913           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
914         } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
915           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
916         } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) {
917           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr);
918         }
919 #else
920         if (a->val[shift+8*j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);}
921 #endif
922       }
923       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
924     }
925     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
926   } else if (format == PETSC_VIEWER_ASCII_DENSE) {
927     PetscInt    cnt=0,jcnt;
928     PetscScalar value;
929 #if defined(PETSC_USE_COMPLEX)
930     PetscBool   realonly=PETSC_TRUE;
931     for (i=0; i<a->sliidx[a->totalslices]; i++) {
932       if (PetscImaginaryPart(a->val[i]) != 0.0) {
933         realonly = PETSC_FALSE;
934         break;
935       }
936     }
937 #endif
938 
939     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
940     for (i=0; i<m; i++) {
941       jcnt = 0;
942       shift = a->sliidx[i>>3]+(i&0x07);
943       for (j=0; j<A->cmap->n; j++) {
944         if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) {
945           value = a->val[cnt++];
946           jcnt++;
947         } else {
948           value = 0.0;
949         }
950 #if defined(PETSC_USE_COMPLEX)
951         if (realonly) {
952           ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr);
953         } else {
954           ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr);
955         }
956 #else
957         ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr);
958 #endif
959       }
960       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
961     }
962     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
963   } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) {
964     PetscInt fshift=1;
965     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
966 #if defined(PETSC_USE_COMPLEX)
967     ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr);
968 #else
969     ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr);
970 #endif
971     ierr = PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);CHKERRQ(ierr);
972     for (i=0; i<m; i++) {
973       shift = a->sliidx[i>>3]+(i&0x07);
974       for (j=0; j<a->rlen[i]; j++) {
975 #if defined(PETSC_USE_COMPLEX)
976         ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
977 #else
978         ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);CHKERRQ(ierr);
979 #endif
980       }
981     }
982     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
983   } else {
984     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
985     if (A->factortype) {
986       for (i=0; i<m; i++) {
987         shift = a->sliidx[i>>3]+(i&0x07);
988         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr);
989         /* L part */
990         for (j=shift; j<a->diag[i]; j+=8) {
991 #if defined(PETSC_USE_COMPLEX)
992           if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) {
993             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr);
994           } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) {
995             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr);
996           } else {
997             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr);
998           }
999 #else
1000           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr);
1001 #endif
1002         }
1003         /* diagonal */
1004         j = a->diag[i];
1005 #if defined(PETSC_USE_COMPLEX)
1006         if (PetscImaginaryPart(a->val[j]) > 0.0) {
1007           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));CHKERRQ(ierr);
1008         } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1009           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));CHKERRQ(ierr);
1010         } else {
1011           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));CHKERRQ(ierr);
1012         }
1013 #else
1014         ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));CHKERRQ(ierr);
1015 #endif
1016 
1017         /* U part */
1018         for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) {
1019 #if defined(PETSC_USE_COMPLEX)
1020           if (PetscImaginaryPart(a->val[j]) > 0.0) {
1021             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr);
1022           } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1023             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr);
1024           } else {
1025             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr);
1026           }
1027 #else
1028           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr);
1029 #endif
1030         }
1031         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1032       }
1033     } else {
1034       for (i=0; i<m; i++) {
1035         shift = a->sliidx[i>>3]+(i&0x07);
1036         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr);
1037         for (j=0; j<a->rlen[i]; j++) {
1038 #if defined(PETSC_USE_COMPLEX)
1039           if (PetscImaginaryPart(a->val[j]) > 0.0) {
1040             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1041           } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1042             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1043           } else {
1044             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr);
1045           }
1046 #else
1047           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);
1048 #endif
1049         }
1050         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1051       }
1052     }
1053     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1054   }
1055   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 #include <petscdraw.h>
1060 PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa)
1061 {
1062   Mat               A=(Mat)Aa;
1063   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
1064   PetscInt          i,j,m=A->rmap->n,shift;
1065   int               color;
1066   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1067   PetscViewer       viewer;
1068   PetscViewerFormat format;
1069   PetscErrorCode    ierr;
1070 
1071   PetscFunctionBegin;
1072   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1073   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1074   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1075 
1076   /* loop over matrix elements drawing boxes */
1077 
1078   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1079     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1080     /* Blue for negative, Cyan for zero and  Red for positive */
1081     color = PETSC_DRAW_BLUE;
1082     for (i=0; i<m; i++) {
1083       shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1084       y_l = m - i - 1.0; y_r = y_l + 1.0;
1085       for (j=0; j<a->rlen[i]; j++) {
1086         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1087         if (PetscRealPart(a->val[shift+8*j]) >=  0.) continue;
1088         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1089       }
1090     }
1091     color = PETSC_DRAW_CYAN;
1092     for (i=0; i<m; i++) {
1093       shift = a->sliidx[i>>3]+(i&0x07);
1094       y_l = m - i - 1.0; y_r = y_l + 1.0;
1095       for (j=0; j<a->rlen[i]; j++) {
1096         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1097         if (a->val[shift+8*j] !=  0.) continue;
1098         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1099       }
1100     }
1101     color = PETSC_DRAW_RED;
1102     for (i=0; i<m; i++) {
1103       shift = a->sliidx[i>>3]+(i&0x07);
1104       y_l = m - i - 1.0; y_r = y_l + 1.0;
1105       for (j=0; j<a->rlen[i]; j++) {
1106         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1107         if (PetscRealPart(a->val[shift+8*j]) <=  0.) continue;
1108         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1109       }
1110     }
1111     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1112   } else {
1113     /* use contour shading to indicate magnitude of values */
1114     /* first determine max of all nonzero values */
1115     PetscReal minv=0.0,maxv=0.0;
1116     PetscInt  count=0;
1117     PetscDraw popup;
1118     for (i=0; i<a->sliidx[a->totalslices]; i++) {
1119       if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]);
1120     }
1121     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1122     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1123     ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr);
1124 
1125     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1126     for (i=0; i<m; i++) {
1127       shift = a->sliidx[i>>3]+(i&0x07);
1128       y_l = m - i - 1.0;
1129       y_r = y_l + 1.0;
1130       for (j=0; j<a->rlen[i]; j++) {
1131         x_l = a->colidx[shift+j*8];
1132         x_r = x_l + 1.0;
1133         color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv);
1134         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1135         count++;
1136       }
1137     }
1138     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1139   }
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #include <petscdraw.h>
1144 PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer)
1145 {
1146   PetscDraw      draw;
1147   PetscReal      xr,yr,xl,yl,h,w;
1148   PetscBool      isnull;
1149   PetscErrorCode ierr;
1150 
1151   PetscFunctionBegin;
1152   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1153   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1154   if (isnull) PetscFunctionReturn(0);
1155 
1156   xr   = A->cmap->n; yr  = A->rmap->n; h = yr/10.0; w = xr/10.0;
1157   xr  += w;          yr += h;         xl = -w;     yl = -h;
1158   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1159   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1160   ierr = PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);CHKERRQ(ierr);
1161   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1162   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1163   PetscFunctionReturn(0);
1164 }
1165 
1166 PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer)
1167 {
1168   PetscBool      iascii,isbinary,isdraw;
1169   PetscErrorCode ierr;
1170 
1171   PetscFunctionBegin;
1172   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1173   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1174   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1175   if (iascii) {
1176     ierr = MatView_SeqSELL_ASCII(A,viewer);CHKERRQ(ierr);
1177   } else if (isbinary) {
1178     /* ierr = MatView_SeqSELL_Binary(A,viewer);CHKERRQ(ierr); */
1179   } else if (isdraw) {
1180     ierr = MatView_SeqSELL_Draw(A,viewer);CHKERRQ(ierr);
1181   }
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode)
1186 {
1187   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1188   PetscInt       i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k;
1189   MatScalar      *vp;
1190   PetscErrorCode ierr;
1191 
1192   PetscFunctionBegin;
1193   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1194   /* To do: compress out the unused elements */
1195   ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr);
1196   ierr = PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);CHKERRQ(ierr);
1197   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr);
1198   ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);CHKERRQ(ierr);
1199   /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */
1200   for (i=0; i<a->totalslices; ++i) {
1201     shift = a->sliidx[i];    /* starting index of the slice */
1202     cp    = a->colidx+shift; /* pointer to the column indices of the slice */
1203     vp    = a->val+shift;    /* pointer to the nonzero values of the slice */
1204     for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */
1205       row  = 8*i + row_in_slice;
1206       nrow = a->rlen[row]; /* number of nonzeros in row */
1207       /*
1208         Search for the nearest nonzero. Normally setting the index to zero may cause extra communication.
1209         But if the entire slice are empty, it is fine to use 0 since the index will not be loaded.
1210       */
1211       lastcol = 0;
1212       if (nrow>0) { /* nonempty row */
1213         lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */
1214       } else if (!row_in_slice) { /* first row of the currect slice is empty */
1215         for (j=1;j<8;j++) {
1216           if (a->rlen[8*i+j]) {
1217             lastcol = cp[j];
1218             break;
1219           }
1220         }
1221       } else {
1222         if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */
1223       }
1224 
1225       for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) {
1226         cp[8*k+row_in_slice] = lastcol;
1227         vp[8*k+row_in_slice] = (MatScalar)0;
1228       }
1229     }
1230   }
1231 
1232   A->info.mallocs += a->reallocs;
1233   a->reallocs      = 0;
1234 
1235   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info)
1240 {
1241   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1242 
1243   PetscFunctionBegin;
1244   info->block_size   = 1.0;
1245   info->nz_allocated = (double)a->maxallocmat;
1246   info->nz_used      = (double)a->sliidx[a->totalslices]; /* include padding zeros */
1247   info->nz_unneeded  = (double)(a->maxallocmat-a->sliidx[a->totalslices]);
1248   info->assemblies   = (double)A->num_ass;
1249   info->mallocs      = (double)A->info.mallocs;
1250   info->memory       = ((PetscObject)A)->mem;
1251   if (A->factortype) {
1252     info->fill_ratio_given  = A->info.fill_ratio_given;
1253     info->fill_ratio_needed = A->info.fill_ratio_needed;
1254     info->factor_mallocs    = A->info.factor_mallocs;
1255   } else {
1256     info->fill_ratio_given  = 0;
1257     info->fill_ratio_needed = 0;
1258     info->factor_mallocs    = 0;
1259   }
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1264 {
1265   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1266   PetscInt       shift,i,k,l,low,high,t,ii,row,col,nrow;
1267   PetscInt       *cp,nonew=a->nonew,lastcol=-1;
1268   MatScalar      *vp,value;
1269   PetscErrorCode ierr;
1270 
1271   PetscFunctionBegin;
1272   for (k=0; k<m; k++) { /* loop over added rows */
1273     row = im[k];
1274     if (row < 0) continue;
1275 #if defined(PETSC_USE_DEBUG)
1276     if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
1277 #endif
1278     shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
1279     cp    = a->colidx+shift; /* pointer to the row */
1280     vp    = a->val+shift; /* pointer to the row */
1281     nrow  = a->rlen[row];
1282     low   = 0;
1283     high  = nrow;
1284 
1285     for (l=0; l<n; l++) { /* loop over added columns */
1286       col = in[l];
1287       if (col<0) continue;
1288 #if defined(PETSC_USE_DEBUG)
1289       if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1);
1290 #endif
1291       if (a->roworiented) {
1292         value = v[l+k*n];
1293       } else {
1294         value = v[k+l*m];
1295       }
1296       if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue;
1297 
1298       /* search in this row for the specified colmun, i indicates the column to be set */
1299       if (col <= lastcol) low = 0;
1300       else high = nrow;
1301       lastcol = col;
1302       while (high-low > 5) {
1303         t = (low+high)/2;
1304         if (*(cp+t*8) > col) high = t;
1305         else low = t;
1306       }
1307       for (i=low; i<high; i++) {
1308         if (*(cp+i*8) > col) break;
1309         if (*(cp+i*8) == col) {
1310           if (is == ADD_VALUES) *(vp+i*8) += value;
1311           else *(vp+i*8) = value;
1312           low = i + 1;
1313           goto noinsert;
1314         }
1315       }
1316       if (value == 0.0 && a->ignorezeroentries) goto noinsert;
1317       if (nonew == 1) goto noinsert;
1318       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1319       /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */
1320       MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar);
1321       /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */
1322       for (ii=nrow-1; ii>=i; ii--) {
1323         *(cp+(ii+1)*8) = *(cp+ii*8);
1324         *(vp+(ii+1)*8) = *(vp+ii*8);
1325       }
1326       a->rlen[row]++;
1327       *(cp+i*8) = col;
1328       *(vp+i*8) = value;
1329       a->nz++;
1330       A->nonzerostate++;
1331       low = i+1; high++; nrow++;
1332 noinsert:;
1333     }
1334     a->rlen[row] = nrow;
1335   }
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str)
1340 {
1341   PetscErrorCode ierr;
1342 
1343   PetscFunctionBegin;
1344   /* If the two matrices have the same copy implementation, use fast copy. */
1345   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1346     Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1347     Mat_SeqSELL *b=(Mat_SeqSELL*)B->data;
1348 
1349     if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1350     ierr = PetscMemcpy(b->val,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr);
1351   } else {
1352     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1353   }
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 PetscErrorCode MatSetUp_SeqSELL(Mat A)
1358 {
1359   PetscErrorCode ierr;
1360 
1361   PetscFunctionBegin;
1362   ierr = MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,0);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[])
1367 {
1368   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1369 
1370   PetscFunctionBegin;
1371   *array = a->val;
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[])
1376 {
1377   PetscFunctionBegin;
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 PetscErrorCode MatRealPart_SeqSELL(Mat A)
1382 {
1383   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1384   PetscInt    i;
1385   MatScalar   *aval=a->val;
1386 
1387   PetscFunctionBegin;
1388   for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]);
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 PetscErrorCode MatImaginaryPart_SeqSELL(Mat A)
1393 {
1394   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1395   PetscInt       i;
1396   MatScalar      *aval=a->val;
1397   PetscErrorCode ierr;
1398 
1399   PetscFunctionBegin;
1400   for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]);
1401   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha)
1406 {
1407   Mat_SeqSELL    *a=(Mat_SeqSELL*)inA->data;
1408   MatScalar      *aval=a->val;
1409   PetscScalar    oalpha=alpha;
1410   PetscBLASInt   one=1,size;
1411   PetscErrorCode ierr;
1412 
1413   PetscFunctionBegin;
1414   ierr = PetscBLASIntCast(a->sliidx[a->totalslices],&size);CHKERRQ(ierr);
1415   PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one));
1416   ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1417   ierr = MatSeqSELLInvalidateDiagonal(inA);CHKERRQ(ierr);
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a)
1422 {
1423   Mat_SeqSELL    *y=(Mat_SeqSELL*)Y->data;
1424   PetscErrorCode ierr;
1425 
1426   PetscFunctionBegin;
1427   if (!Y->preallocated || !y->nz) {
1428     ierr = MatSeqSELLSetPreallocation(Y,1,NULL);CHKERRQ(ierr);
1429   }
1430   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1435 {
1436   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
1437   PetscScalar       *x,sum,*t;
1438   const MatScalar   *idiag=0,*mdiag;
1439   const PetscScalar *b,*xb;
1440   PetscInt          n,m=A->rmap->n,i,j,shift;
1441   const PetscInt    *diag;
1442   PetscErrorCode    ierr;
1443 
1444   PetscFunctionBegin;
1445   its = its*lits;
1446 
1447   if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */
1448   if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqSELL(A,omega,fshift);CHKERRQ(ierr);}
1449   a->fshift = fshift;
1450   a->omega  = omega;
1451 
1452   diag  = a->diag;
1453   t     = a->ssor_work;
1454   idiag = a->idiag;
1455   mdiag = a->mdiag;
1456 
1457   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1458   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1459   /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */
1460   if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented");
1461   if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented");
1462   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
1463 
1464   if (flag & SOR_ZERO_INITIAL_GUESS) {
1465     if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1466       for (i=0; i<m; i++) {
1467         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1468         sum   = b[i];
1469         n     = (diag[i]-shift)/8;
1470         for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1471         t[i]  = sum;
1472         x[i]  = sum*idiag[i];
1473       }
1474       xb   = t;
1475       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1476     } else xb = b;
1477     if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1478       for (i=m-1; i>=0; i--) {
1479         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1480         sum   = xb[i];
1481         n     = a->rlen[i]-(diag[i]-shift)/8-1;
1482         for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1483         if (xb == b) {
1484           x[i] = sum*idiag[i];
1485         } else {
1486           x[i] = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1487         }
1488       }
1489       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */
1490     }
1491     its--;
1492   }
1493   while (its--) {
1494     if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1495       for (i=0; i<m; i++) {
1496         /* lower */
1497         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1498         sum   = b[i];
1499         n     = (diag[i]-shift)/8;
1500         for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1501         t[i]  = sum;             /* save application of the lower-triangular part */
1502         /* upper */
1503         n     = a->rlen[i]-(diag[i]-shift)/8-1;
1504         for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1505         x[i]  = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1506       }
1507       xb   = t;
1508       ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1509     } else xb = b;
1510     if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1511       for (i=m-1; i>=0; i--) {
1512         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1513         sum = xb[i];
1514         if (xb == b) {
1515           /* whole matrix (no checkpointing available) */
1516           n     = a->rlen[i];
1517           for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1518           x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i];
1519         } else { /* lower-triangular part has been saved, so only apply upper-triangular */
1520           n     = a->rlen[i]-(diag[i]-shift)/8-1;
1521           for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1522           x[i]  = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1523         }
1524       }
1525       if (xb == b) {
1526         ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1527       } else {
1528         ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */
1529       }
1530     }
1531   }
1532   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1533   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1534   PetscFunctionReturn(0);
1535 }
1536 
1537 /* -------------------------------------------------------------------*/
1538 static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL,
1539                                        0,
1540                                        0,
1541                                        MatMult_SeqSELL,
1542                                /* 4*/  MatMultAdd_SeqSELL,
1543                                        MatMultTranspose_SeqSELL,
1544                                        MatMultTransposeAdd_SeqSELL,
1545                                        0,
1546                                        0,
1547                                        0,
1548                                /* 10*/ 0,
1549                                        0,
1550                                        0,
1551                                        MatSOR_SeqSELL,
1552                                        0,
1553                                /* 15*/ MatGetInfo_SeqSELL,
1554                                        MatEqual_SeqSELL,
1555                                        MatGetDiagonal_SeqSELL,
1556                                        MatDiagonalScale_SeqSELL,
1557                                        0,
1558                                /* 20*/ 0,
1559                                        MatAssemblyEnd_SeqSELL,
1560                                        MatSetOption_SeqSELL,
1561                                        MatZeroEntries_SeqSELL,
1562                                /* 24*/ 0,
1563                                        0,
1564                                        0,
1565                                        0,
1566                                        0,
1567                                /* 29*/ MatSetUp_SeqSELL,
1568                                        0,
1569                                        0,
1570                                        0,
1571                                        0,
1572                                /* 34*/ MatDuplicate_SeqSELL,
1573                                        0,
1574                                        0,
1575                                        0,
1576                                        0,
1577                                /* 39*/ 0,
1578                                        0,
1579                                        0,
1580                                        MatGetValues_SeqSELL,
1581                                        MatCopy_SeqSELL,
1582                                /* 44*/ 0,
1583                                        MatScale_SeqSELL,
1584                                        MatShift_SeqSELL,
1585                                        0,
1586                                        0,
1587                                /* 49*/ 0,
1588                                        0,
1589                                        0,
1590                                        0,
1591                                        0,
1592                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
1593                                        0,
1594                                        0,
1595                                        0,
1596                                        0,
1597                                /* 59*/ 0,
1598                                        MatDestroy_SeqSELL,
1599                                        MatView_SeqSELL,
1600                                        0,
1601                                        0,
1602                                /* 64*/ 0,
1603                                        0,
1604                                        0,
1605                                        0,
1606                                        0,
1607                                /* 69*/ 0,
1608                                        0,
1609                                        0,
1610                                        0,
1611                                        0,
1612                                /* 74*/ 0,
1613                                        MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */
1614                                        0,
1615                                        0,
1616                                        0,
1617                                /* 79*/ 0,
1618                                        0,
1619                                        0,
1620                                        0,
1621                                        0,
1622                                /* 84*/ 0,
1623                                        0,
1624                                        0,
1625                                        0,
1626                                        0,
1627                                /* 89*/ 0,
1628                                        0,
1629                                        0,
1630                                        0,
1631                                        0,
1632                                /* 94*/ 0,
1633                                        0,
1634                                        0,
1635                                        0,
1636                                        0,
1637                                /* 99*/ 0,
1638                                        0,
1639                                        0,
1640                                        MatConjugate_SeqSELL,
1641                                        0,
1642                                /*104*/ 0,
1643                                        0,
1644                                        0,
1645                                        0,
1646                                        0,
1647                                /*109*/ 0,
1648                                        0,
1649                                        0,
1650                                        0,
1651                                        MatMissingDiagonal_SeqSELL,
1652                                /*114*/ 0,
1653                                        0,
1654                                        0,
1655                                        0,
1656                                        0,
1657                                /*119*/ 0,
1658                                        0,
1659                                        0,
1660                                        0,
1661                                        0,
1662                                /*124*/ 0,
1663                                        0,
1664                                        0,
1665                                        0,
1666                                        0,
1667                                /*129*/ 0,
1668                                        0,
1669                                        0,
1670                                        0,
1671                                        0,
1672                                /*134*/ 0,
1673                                        0,
1674                                        0,
1675                                        0,
1676                                        0,
1677                                /*139*/ 0,
1678                                        0,
1679                                        0,
1680                                        MatFDColoringSetUp_SeqXAIJ,
1681                                        0,
1682                                 /*144*/0
1683 };
1684 
1685 PetscErrorCode MatStoreValues_SeqSELL(Mat mat)
1686 {
1687   Mat_SeqSELL    *a=(Mat_SeqSELL*)mat->data;
1688   PetscErrorCode ierr;
1689 
1690   PetscFunctionBegin;
1691   if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1692 
1693   /* allocate space for values if not already there */
1694   if (!a->saved_values) {
1695     ierr = PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);CHKERRQ(ierr);
1696     ierr = PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));CHKERRQ(ierr);
1697   }
1698 
1699   /* copy values over */
1700   ierr = PetscMemcpy(a->saved_values,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr);
1701   PetscFunctionReturn(0);
1702 }
1703 
1704 PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat)
1705 {
1706   Mat_SeqSELL    *a=(Mat_SeqSELL*)mat->data;
1707   PetscErrorCode ierr;
1708 
1709   PetscFunctionBegin;
1710   if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1711   if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1712   /* copy values over */
1713   ierr = PetscMemcpy(a->val,a->saved_values,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr);
1714   PetscFunctionReturn(0);
1715 }
1716 
1717 /*@C
1718  MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray()
1719 
1720  Not Collective
1721 
1722  Input Parameters:
1723  .  mat - a MATSEQSELL matrix
1724  .  array - pointer to the data
1725 
1726  Level: intermediate
1727 
1728  .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90()
1729  @*/
1730 PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array)
1731 {
1732   PetscErrorCode ierr;
1733 
1734   PetscFunctionBegin;
1735   ierr = PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B)
1740 {
1741   Mat_SeqSELL    *b;
1742   PetscMPIInt    size;
1743   PetscErrorCode ierr;
1744 
1745   PetscFunctionBegin;
1746   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
1747   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1");
1748 
1749   ierr = PetscNewLog(B,&b);CHKERRQ(ierr);
1750 
1751   B->data = (void*)b;
1752 
1753   ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1754 
1755   b->row                = 0;
1756   b->col                = 0;
1757   b->icol               = 0;
1758   b->reallocs           = 0;
1759   b->ignorezeroentries  = PETSC_FALSE;
1760   b->roworiented        = PETSC_TRUE;
1761   b->nonew              = 0;
1762   b->diag               = 0;
1763   b->solve_work         = 0;
1764   B->spptr              = 0;
1765   b->saved_values       = 0;
1766   b->idiag              = 0;
1767   b->mdiag              = 0;
1768   b->ssor_work          = 0;
1769   b->omega              = 1.0;
1770   b->fshift             = 0.0;
1771   b->idiagvalid         = PETSC_FALSE;
1772   b->ibdiagvalid        = PETSC_FALSE;
1773   b->keepnonzeropattern = PETSC_FALSE;
1774 
1775   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);CHKERRQ(ierr);
1776   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);CHKERRQ(ierr);
1777   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);CHKERRQ(ierr);
1778   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);CHKERRQ(ierr);
1779   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);CHKERRQ(ierr);
1780   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);CHKERRQ(ierr);
1781   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);CHKERRQ(ierr);
1782   PetscFunctionReturn(0);
1783 }
1784 
1785 /*
1786  Given a matrix generated with MatGetFactor() duplicates all the information in A into B
1787  */
1788 PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
1789 {
1790   Mat_SeqSELL    *c,*a=(Mat_SeqSELL*)A->data;
1791   PetscInt       i,m=A->rmap->n;
1792   PetscInt       totalslices=a->totalslices;
1793   PetscErrorCode ierr;
1794 
1795   PetscFunctionBegin;
1796   c = (Mat_SeqSELL*)C->data;
1797 
1798   C->factortype = A->factortype;
1799   c->row        = 0;
1800   c->col        = 0;
1801   c->icol       = 0;
1802   c->reallocs   = 0;
1803 
1804   C->assembled = PETSC_TRUE;
1805 
1806   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
1807   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
1808 
1809   ierr = PetscMalloc1(8*totalslices,&c->rlen);CHKERRQ(ierr);
1810   ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr);
1811   ierr = PetscMalloc1(totalslices+1,&c->sliidx);CHKERRQ(ierr);
1812   ierr = PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr);
1813 
1814   for (i=0; i<m; i++) c->rlen[i] = a->rlen[i];
1815   for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i];
1816 
1817   /* allocate the matrix space */
1818   if (mallocmatspace) {
1819     ierr = PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);CHKERRQ(ierr);
1820     ierr = PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
1821 
1822     c->singlemalloc = PETSC_TRUE;
1823 
1824     if (m > 0) {
1825       ierr = PetscMemcpy(c->colidx,a->colidx,(a->maxallocmat)*sizeof(PetscInt));CHKERRQ(ierr);
1826       if (cpvalues == MAT_COPY_VALUES) {
1827         ierr = PetscMemcpy(c->val,a->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr);
1828       } else {
1829         ierr = PetscMemzero(c->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr);
1830       }
1831     }
1832   }
1833 
1834   c->ignorezeroentries = a->ignorezeroentries;
1835   c->roworiented       = a->roworiented;
1836   c->nonew             = a->nonew;
1837   if (a->diag) {
1838     ierr = PetscMalloc1(m,&c->diag);CHKERRQ(ierr);
1839     ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr);
1840     for (i=0; i<m; i++) {
1841       c->diag[i] = a->diag[i];
1842     }
1843   } else c->diag = 0;
1844 
1845   c->solve_work         = 0;
1846   c->saved_values       = 0;
1847   c->idiag              = 0;
1848   c->ssor_work          = 0;
1849   c->keepnonzeropattern = a->keepnonzeropattern;
1850   c->free_val           = PETSC_TRUE;
1851   c->free_colidx        = PETSC_TRUE;
1852 
1853   c->maxallocmat  = a->maxallocmat;
1854   c->maxallocrow  = a->maxallocrow;
1855   c->rlenmax      = a->rlenmax;
1856   c->nz           = a->nz;
1857   C->preallocated = PETSC_TRUE;
1858 
1859   c->nonzerorowcnt = a->nonzerorowcnt;
1860   C->nonzerostate  = A->nonzerostate;
1861 
1862   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
1863   PetscFunctionReturn(0);
1864 }
1865 
1866 PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B)
1867 {
1868   PetscErrorCode ierr;
1869 
1870   PetscFunctionBegin;
1871   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
1872   ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr);
1873   if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) {
1874     ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr);
1875   }
1876   ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1877   ierr = MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
1878   PetscFunctionReturn(0);
1879 }
1880 
1881 /*@C
1882  MatCreateSeqSELL - Creates a sparse matrix in SELL format.
1883 
1884  Collective on MPI_Comm
1885 
1886  Input Parameters:
1887  +  comm - MPI communicator, set to PETSC_COMM_SELF
1888  .  m - number of rows
1889  .  n - number of columns
1890  .  rlenmax - maximum number of nonzeros in a row
1891  -  rlen - array containing the number of nonzeros in the various rows
1892  (possibly different for each row) or NULL
1893 
1894  Output Parameter:
1895  .  A - the matrix
1896 
1897  It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1898  MatXXXXSetPreallocation() paradgm instead of this routine directly.
1899  [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation]
1900 
1901  Notes:
1902  If nnz is given then nz is ignored
1903 
1904  Specify the preallocated storage with either rlenmax or rlen (not both).
1905  Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory
1906  allocation.  For large problems you MUST preallocate memory or you
1907  will get TERRIBLE performance, see the users' manual chapter on matrices.
1908 
1909  Level: intermediate
1910 
1911  .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatCreateSeqSELLWithArrays()
1912 
1913  @*/
1914 PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A)
1915 {
1916   PetscErrorCode ierr;
1917 
1918   PetscFunctionBegin;
1919   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1920   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1921   ierr = MatSetType(*A,MATSEQSELL);CHKERRQ(ierr);
1922   ierr = MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);CHKERRQ(ierr);
1923   PetscFunctionReturn(0);
1924 }
1925 
1926 PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg)
1927 {
1928   Mat_SeqSELL     *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data;
1929   PetscInt       totalslices=a->totalslices;
1930   PetscErrorCode ierr;
1931 
1932   PetscFunctionBegin;
1933   /* If the  matrix dimensions are not equal,or no of nonzeros */
1934   if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) {
1935     *flg = PETSC_FALSE;
1936     PetscFunctionReturn(0);
1937   }
1938   /* if the a->colidx are the same */
1939   ierr = PetscMemcmp(a->colidx,b->colidx,a->sliidx[totalslices]*sizeof(PetscInt),flg);CHKERRQ(ierr);
1940   if (!*flg) PetscFunctionReturn(0);
1941   /* if a->val are the same */
1942   ierr = PetscMemcmp(a->val,b->val,a->sliidx[totalslices]*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1943   PetscFunctionReturn(0);
1944 }
1945 
1946 PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A)
1947 {
1948   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1949 
1950   PetscFunctionBegin;
1951   a->idiagvalid  = PETSC_FALSE;
1952   a->ibdiagvalid = PETSC_FALSE;
1953   PetscFunctionReturn(0);
1954 }
1955 
1956 PetscErrorCode MatConjugate_SeqSELL(Mat A)
1957 {
1958 #if defined(PETSC_USE_COMPLEX)
1959   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1960   PetscInt    i;
1961   PetscScalar *val = a->val;
1962 
1963   PetscFunctionBegin;
1964   for (i=0; i<a->sliidx[a->totalslices]; i++) {
1965     val[i] = PetscConj(val[i]);
1966   }
1967 #else
1968   PetscFunctionBegin;
1969 #endif
1970   PetscFunctionReturn(0);
1971 }
1972