xref: /petsc/src/mat/impls/sell/seq/sell.c (revision 4e22ec7992ea0a5a8658409cbeedb7110d765e7d)
1 
2 /*
3   Defines the basic matrix operations for the SELL matrix storage format.
4 */
5 #include <../src/mat/impls/sell/seq/sell.h>  /*I   "petscmat.h"  I*/
6 #include <petscblaslapack.h>
7 #include <petsc/private/kernels/blocktranspose.h>
8 #if defined(PETSC_HAVE_IMMINTRIN_H)
9   #include <immintrin.h>
10 
11   #if !defined(_MM_SCALE_8)
12   #define _MM_SCALE_8    8
13   #endif
14 
15   #if defined(__AVX512F__)
16   /* these do not work
17    vec_idx  = _mm512_loadunpackhi_epi32(vec_idx,acolidx);
18    vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval);
19   */
20     #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \
21     /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \
22     vec_idx  = _mm256_load_si256((__m256i const*)acolidx); \
23     vec_vals = _mm512_load_pd(aval); \
24     vec_x    = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \
25     vec_y    = _mm512_fmadd_pd(vec_x,vec_vals,vec_y)
26   #elif defined(__AVX2__)
27     #define AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \
28     vec_vals = _mm256_load_pd(aval); \
29     vec_idx  =  _mm_load_si128((__m128i const*)acolidx); /* SSE2 */ \
30     vec_x    = _mm256_i32gather_pd(x,vec_idx,_MM_SCALE_8); \
31     vec_y    = _mm256_fmadd_pd(vec_x,vec_vals,vec_y)
32   #endif
33 #endif  /* PETSC_HAVE_IMMINTRIN_H */
34 
35 /*@C
36  MatSeqSELLSetPreallocation - For good matrix assembly performance
37  the user should preallocate the matrix storage by setting the parameter nz
38  (or the array nnz).  By setting these parameters accurately, performance
39  during matrix assembly can be increased significantly.
40 
41  Collective on MPI_Comm
42 
43  Input Parameters:
44  +  B - The matrix
45  .  nz - number of nonzeros per row (same for all rows)
46  -  nnz - array containing the number of nonzeros in the various rows
47  (possibly different for each row) or NULL
48 
49  Notes:
50  If nnz is given then nz is ignored.
51 
52  Specify the preallocated storage with either nz or nnz (not both).
53  Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
54  allocation.  For large problems you MUST preallocate memory or you
55  will get TERRIBLE performance, see the users' manual chapter on matrices.
56 
57  You can call MatGetInfo() to get information on how effective the preallocation was;
58  for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
59  You can also run with the option -info and look for messages with the string
60  malloc in them to see if additional memory allocation was needed.
61 
62  Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix
63  entries or columns indices.
64 
65  The maximum number of nonzeos in any row should be as accuate as possible.
66  If it is underesitmated, you will get bad performance due to reallocation
67  (MatSeqXSELLReallocateSELL).
68 
69  Level: intermediate
70 
71  .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo()
72 
73  @*/
74 PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[])
75 {
76   PetscErrorCode ierr;
77 
78   PetscFunctionBegin;
79   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
80   PetscValidType(B,1);
81   ierr = PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));CHKERRQ(ierr);
82   PetscFunctionReturn(0);
83 }
84 
85 PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[])
86 {
87   Mat_SeqSELL    *b;
88   PetscInt       i,j,totalslices;
89   PetscBool      skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE;
90   PetscErrorCode ierr;
91 
92   PetscFunctionBegin;
93   if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE;
94   if (maxallocrow == MAT_SKIP_ALLOCATION) {
95     skipallocation = PETSC_TRUE;
96     maxallocrow    = 0;
97   }
98 
99   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
100   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
101 
102   /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */
103   if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5;
104   if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow);
105   if (rlen) {
106     for (i=0; i<B->rmap->n; i++) {
107       if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]);
108       if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n);
109     }
110   }
111 
112   B->preallocated = PETSC_TRUE;
113 
114   b = (Mat_SeqSELL*)B->data;
115 
116   totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */
117   b->totalslices = totalslices;
118   if (!skipallocation) {
119     if (B->rmap->n & 0x07) PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n);
120 
121     if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */
122       ierr = PetscMalloc1(totalslices+1,&b->sliidx);CHKERRQ(ierr);
123       ierr = PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr);
124     }
125     if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */
126       if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10;
127       else if (maxallocrow < 0) maxallocrow = 1;
128       for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow;
129     } else {
130       maxallocrow = 0;
131       b->sliidx[0] = 0;
132       for (i=1; i<totalslices; i++) {
133         b->sliidx[i] = 0;
134         for (j=0;j<8;j++) {
135           b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]);
136         }
137         maxallocrow = PetscMax(b->sliidx[i],maxallocrow);
138         b->sliidx[i] = b->sliidx[i-1] + 8*b->sliidx[i];
139       }
140       /* last slice */
141       b->sliidx[totalslices] = 0;
142       for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]);
143       maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow);
144       b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices];
145     }
146 
147     /* allocate space for val, colidx, rlen */
148     /* FIXME: should B's old memory be unlogged? */
149     ierr = MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);CHKERRQ(ierr);
150     /* FIXME: assuming an element of the bit array takes 8 bits */
151     ierr = PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);CHKERRQ(ierr);
152     ierr = PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
153     /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */
154     ierr = PetscCalloc1(8*totalslices,&b->rlen);CHKERRQ(ierr);
155     ierr = PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));CHKERRQ(ierr);
156 
157     b->singlemalloc = PETSC_TRUE;
158     b->free_val     = PETSC_TRUE;
159     b->free_colidx  = PETSC_TRUE;
160   } else {
161     b->free_val    = PETSC_FALSE;
162     b->free_colidx = PETSC_FALSE;
163   }
164 
165   b->nz               = 0;
166   b->maxallocrow      = maxallocrow;
167   b->rlenmax          = maxallocrow;
168   b->maxallocmat      = b->sliidx[totalslices];
169   B->info.nz_unneeded = (double)b->maxallocmat;
170   if (realalloc) {
171     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
172   }
173   PetscFunctionReturn(0);
174 }
175 
176 PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
177 {
178   Mat            B;
179   Mat_SeqSELL     *a=(Mat_SeqSELL*)A->data;
180   PetscInt       i,j,row;
181   PetscBool      isnonzero;
182   PetscErrorCode ierr;
183 
184   PetscFunctionBegin;
185   if (reuse == MAT_REUSE_MATRIX) {
186     B = *newmat;
187   } else {
188     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
189     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
190     ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
191     ierr = MatSeqAIJSetPreallocation(B,0,a->rlen);CHKERRQ(ierr);
192     ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
193   }
194 
195   for (i=0; i<a->totalslices; i++) { /* loop over slices */
196     for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
197       isnonzero = (PetscBool)((j-a->sliidx[i])/8 < a->rlen[8*i+row]);
198       if (isnonzero) {
199         ierr = MatSetValue(B,8*i+row,a->colidx[j],a->val[j],INSERT_VALUES);CHKERRQ(ierr);
200       }
201     }
202   }
203   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
204   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
205   B->rmap->bs = A->rmap->bs;
206 
207   if (reuse == MAT_INPLACE_MATRIX) {
208     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
209   } else {
210     *newmat = B;
211   }
212   PetscFunctionReturn(0);
213 }
214 
215 #include <../src/mat/impls/aij/seq/aij.h>
216 
217 PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
218 {
219   Mat               B;
220   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)A->data;
221   PetscInt          *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols;
222   const PetscInt    *cols;
223   const PetscScalar *vals;
224   PetscErrorCode    ierr;
225 
226   PetscFunctionBegin;
227   if (A->rmap->bs > 1) {
228     ierr = MatConvert_Basic(A,newtype,reuse,newmat);CHKERRQ(ierr);
229     PetscFunctionReturn(0);
230   }
231 
232   if (reuse == MAT_REUSE_MATRIX) {
233     B = *newmat;
234   } else {
235     /* Can we just use ilen? */
236     ierr = PetscMalloc1(m,&rowlengths);CHKERRQ(ierr);
237     for (i=0; i<m; i++) {
238       rowlengths[i] = ai[i+1] - ai[i];
239     }
240 
241     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
242     ierr = MatSetSizes(B,m,n,m,n);CHKERRQ(ierr);
243     ierr = MatSetType(B,MATSEQSELL);CHKERRQ(ierr);
244     ierr = MatSeqSELLSetPreallocation(B,0,rowlengths);CHKERRQ(ierr);
245     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
246   }
247 
248   ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
249 
250   for (row=0; row<m; row++) {
251     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
252     ierr = MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
253     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
254   }
255   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
256   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
257   B->rmap->bs = A->rmap->bs;
258 
259   if (reuse == MAT_INPLACE_MATRIX) {
260     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
261   } else {
262     *newmat = B;
263   }
264   PetscFunctionReturn(0);
265 }
266 
267 PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy)
268 {
269   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
270   PetscScalar       *y;
271   const PetscScalar *x;
272   const MatScalar   *aval=a->val;
273   PetscInt          totalslices=a->totalslices;
274   const PetscInt    *acolidx=a->colidx;
275   PetscInt          i,j;
276   PetscErrorCode    ierr;
277 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
278   __m512d           vec_x,vec_y,vec_vals;
279   __m256i           vec_idx;
280   __mmask8          mask;
281   __m512d           vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4;
282   __m256i           vec_idx2,vec_idx3,vec_idx4;
283 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
284   __m128i           vec_idx;
285   __m256d           vec_x,vec_y,vec_y2,vec_vals;
286   MatScalar         yval;
287   PetscInt          r,rows_left,row,nnz_in_row;
288 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
289   __m128d           vec_x_tmp;
290   __m256d           vec_x,vec_y,vec_y2,vec_vals;
291   MatScalar         yval;
292   PetscInt          r,rows_left,row,nnz_in_row;
293 #else
294   PetscScalar       sum[8];
295 #endif
296 
297 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
298 #pragma disjoint(*x,*y,*aval)
299 #endif
300 
301   PetscFunctionBegin;
302   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
303   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
304 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
305   for (i=0; i<totalslices; i++) { /* loop over slices */
306     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
307     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
308 
309     vec_y  = _mm512_setzero_pd();
310     vec_y2 = _mm512_setzero_pd();
311     vec_y3 = _mm512_setzero_pd();
312     vec_y4 = _mm512_setzero_pd();
313 
314     j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */
315     switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) {
316     case 3:
317       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
318       acolidx += 8; aval += 8;
319       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
320       acolidx += 8; aval += 8;
321       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
322       acolidx += 8; aval += 8;
323       j += 3;
324       break;
325     case 2:
326       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
327       acolidx += 8; aval += 8;
328       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
329       acolidx += 8; aval += 8;
330       j += 2;
331       break;
332     case 1:
333       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
334       acolidx += 8; aval += 8;
335       j += 1;
336       break;
337     }
338     #pragma novector
339     for (; j<(a->sliidx[i+1]>>3); j+=4) {
340       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
341       acolidx += 8; aval += 8;
342       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
343       acolidx += 8; aval += 8;
344       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
345       acolidx += 8; aval += 8;
346       AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4);
347       acolidx += 8; aval += 8;
348     }
349 
350     vec_y = _mm512_add_pd(vec_y,vec_y2);
351     vec_y = _mm512_add_pd(vec_y,vec_y3);
352     vec_y = _mm512_add_pd(vec_y,vec_y4);
353     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
354       mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
355       _mm512_mask_store_pd(&y[8*i],mask,vec_y);
356     } else {
357       _mm512_store_pd(&y[8*i],vec_y);
358     }
359   }
360 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
361   for (i=0; i<totalslices; i++) { /* loop over full slices */
362     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
363     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
364 
365     /* last slice may have padding rows. Don't use vectorization. */
366     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
367       rows_left = A->rmap->n - 8*i;
368       for (r=0; r<rows_left; ++r) {
369         yval = (MatScalar)0;
370         row = 8*i + r;
371         nnz_in_row = a->rlen[row];
372         for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]];
373         y[row] = yval;
374       }
375       break;
376     }
377 
378     vec_y  = _mm256_setzero_pd();
379     vec_y2 = _mm256_setzero_pd();
380 
381     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
382     #pragma novector
383     #pragma unroll(2)
384     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
385       AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
386       aval += 4; acolidx += 4;
387       AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y2);
388       aval += 4; acolidx += 4;
389     }
390 
391     _mm256_store_pd(y+i*8,vec_y);
392     _mm256_store_pd(y+i*8+4,vec_y2);
393   }
394 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
395   for (i=0; i<totalslices; i++) { /* loop over full slices */
396     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
397     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
398 
399     vec_y  = _mm256_setzero_pd();
400     vec_y2 = _mm256_setzero_pd();
401 
402     /* last slice may have padding rows. Don't use vectorization. */
403     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
404       rows_left = A->rmap->n - 8*i;
405       for (r=0; r<rows_left; ++r) {
406         yval = (MatScalar)0;
407         row = 8*i + r;
408         nnz_in_row = a->rlen[row];
409         for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]];
410         y[row] = yval;
411       }
412       break;
413     }
414 
415     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
416     #pragma novector
417     #pragma unroll(2)
418     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
419       vec_vals  = _mm256_loadu_pd(aval);
420       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
421       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
422       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
423       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
424       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
425       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
426       vec_y     = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y);
427       aval     += 4;
428 
429       vec_vals  = _mm256_loadu_pd(aval);
430       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
431       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
432       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
433       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
434       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
435       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
436       vec_y2    = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2);
437       aval     += 4;
438     }
439 
440     _mm256_storeu_pd(y + i*8,     vec_y);
441     _mm256_storeu_pd(y + i*8 + 4, vec_y2);
442   }
443 #else
444   for (i=0; i<totalslices; i++) { /* loop over slices */
445     for (j=0; j<8; j++) sum[j] = 0.0;
446     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
447       sum[0] += aval[j] * x[acolidx[j]];
448       sum[1] += aval[j+1] * x[acolidx[j+1]];
449       sum[2] += aval[j+2] * x[acolidx[j+2]];
450       sum[3] += aval[j+3] * x[acolidx[j+3]];
451       sum[4] += aval[j+4] * x[acolidx[j+4]];
452       sum[5] += aval[j+5] * x[acolidx[j+5]];
453       sum[6] += aval[j+6] * x[acolidx[j+6]];
454       sum[7] += aval[j+7] * x[acolidx[j+7]];
455     }
456     if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
457       for(j=0; j<(A->rmap->n & 0x07); j++) y[8*i+j] = sum[j];
458     } else {
459       for(j=0; j<8; j++) y[8*i+j] = sum[j];
460     }
461   }
462 #endif
463 
464   ierr = PetscLogFlops(2.0*a->nz-a->nonzerorowcnt);CHKERRQ(ierr); /* theoretical minimal FLOPs */
465   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
466   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
467   PetscFunctionReturn(0);
468 }
469 
470 #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h>
471 PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz)
472 {
473   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
474   PetscScalar       *y,*z;
475   const PetscScalar *x;
476   const MatScalar   *aval=a->val;
477   PetscInt          totalslices=a->totalslices;
478   const PetscInt    *acolidx=a->colidx;
479   PetscInt          i,j;
480   PetscErrorCode    ierr;
481 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
482   __m512d           vec_x,vec_y,vec_vals;
483   __m256i           vec_idx;
484   __mmask8          mask;
485   __m512d           vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4;
486   __m256i           vec_idx2,vec_idx3,vec_idx4;
487 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
488   __m128d           vec_x_tmp;
489   __m256d           vec_x,vec_y,vec_y2,vec_vals;
490   MatScalar         yval;
491   PetscInt          r,row,nnz_in_row;
492 #else
493   PetscScalar       sum[8];
494 #endif
495 
496 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
497 #pragma disjoint(*x,*y,*aval)
498 #endif
499 
500   PetscFunctionBegin;
501   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
502   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
503 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
504   for (i=0; i<totalslices; i++) { /* loop over slices */
505     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
506     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
507 
508     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
509       mask   = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
510       vec_y  = _mm512_mask_load_pd(vec_y,mask,&y[8*i]);
511     } else {
512       vec_y  = _mm512_load_pd(&y[8*i]);
513     }
514     vec_y2 = _mm512_setzero_pd();
515     vec_y3 = _mm512_setzero_pd();
516     vec_y4 = _mm512_setzero_pd();
517 
518     j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */
519     switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) {
520     case 3:
521       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
522       acolidx += 8; aval += 8;
523       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
524       acolidx += 8; aval += 8;
525       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
526       acolidx += 8; aval += 8;
527       j += 3;
528       break;
529     case 2:
530       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
531       acolidx += 8; aval += 8;
532       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
533       acolidx += 8; aval += 8;
534       j += 2;
535       break;
536     case 1:
537       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
538       acolidx += 8; aval += 8;
539       j += 1;
540       break;
541     }
542     #pragma novector
543     for (; j<(a->sliidx[i+1]>>3); j+=4) {
544       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
545       acolidx += 8; aval += 8;
546       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
547       acolidx += 8; aval += 8;
548       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
549       acolidx += 8; aval += 8;
550       AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4);
551       acolidx += 8; aval += 8;
552     }
553 
554     vec_y = _mm512_add_pd(vec_y,vec_y2);
555     vec_y = _mm512_add_pd(vec_y,vec_y3);
556     vec_y = _mm512_add_pd(vec_y,vec_y4);
557     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
558       _mm512_mask_store_pd(&z[8*i],mask,vec_y);
559     } else {
560       _mm512_store_pd(&z[8*i],vec_y);
561     }
562   }
563 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
564   for (i=0; i<totalslices; i++) { /* loop over full slices */
565     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
566     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
567 
568     /* last slice may have padding rows. Don't use vectorization. */
569     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
570       for (r=0; r<(A->rmap->n & 0x07); ++r) {
571         row        = 8*i + r;
572         yval       = (MatScalar)0.0;
573         nnz_in_row = a->rlen[row];
574         for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]];
575         z[row] = y[row] + yval;
576       }
577       break;
578     }
579 
580     vec_y  = _mm256_loadu_pd(y+8*i);
581     vec_y2 = _mm256_loadu_pd(y+8*i+4);
582 
583     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
584     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
585       vec_vals  = _mm256_loadu_pd(aval);
586       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
587       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
588       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
589       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
590       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
591       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
592       vec_y     = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y);
593       aval     += 4;
594 
595       vec_vals  = _mm256_loadu_pd(aval);
596       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
597       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
598       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
599       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
600       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
601       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
602       vec_y2    = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2);
603       aval     += 4;
604     }
605 
606     _mm256_storeu_pd(z+i*8,vec_y);
607     _mm256_storeu_pd(z+i*8+4,vec_y2);
608   }
609 #else
610   for (i=0; i<totalslices; i++) { /* loop over slices */
611     for (j=0; j<8; j++) sum[j] = 0.0;
612     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
613       sum[0] += aval[j] * x[acolidx[j]];
614       sum[1] += aval[j+1] * x[acolidx[j+1]];
615       sum[2] += aval[j+2] * x[acolidx[j+2]];
616       sum[3] += aval[j+3] * x[acolidx[j+3]];
617       sum[4] += aval[j+4] * x[acolidx[j+4]];
618       sum[5] += aval[j+5] * x[acolidx[j+5]];
619       sum[6] += aval[j+6] * x[acolidx[j+6]];
620       sum[7] += aval[j+7] * x[acolidx[j+7]];
621     }
622     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
623       for (j=0; j<(A->rmap->n & 0x07); j++) z[8*i+j] = y[8*i+j] + sum[j];
624     } else {
625       for (j=0; j<8; j++) z[8*i+j] = y[8*i+j] + sum[j];
626     }
627   }
628 #endif
629 
630   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
631   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
632   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
633   PetscFunctionReturn(0);
634 }
635 
636 PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy)
637 {
638   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
639   PetscScalar       *y;
640   const PetscScalar *x;
641   const MatScalar   *aval=a->val;
642   const PetscInt    *acolidx=a->colidx;
643   PetscInt          i,j,r,row,nnz_in_row,totalslices=a->totalslices;
644   PetscErrorCode    ierr;
645 
646 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
647 #pragma disjoint(*x,*y,*aval)
648 #endif
649 
650   PetscFunctionBegin;
651   if (zz != yy) { ierr = VecCopy(zz,yy);CHKERRQ(ierr); }
652   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
653   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
654   for (i=0; i<a->totalslices; i++) { /* loop over slices */
655     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
656       for (r=0; r<(A->rmap->n & 0x07); ++r) {
657         row        = 8*i + r;
658         nnz_in_row = a->rlen[row];
659         for (j=0; j<nnz_in_row; ++j) y[acolidx[8*j+r]] += aval[8*j+r] * x[row];
660       }
661       break;
662     }
663     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
664       y[acolidx[j]]   += aval[j] * x[8*i];
665       y[acolidx[j+1]] += aval[j+1] * x[8*i+1];
666       y[acolidx[j+2]] += aval[j+2] * x[8*i+2];
667       y[acolidx[j+3]] += aval[j+3] * x[8*i+3];
668       y[acolidx[j+4]] += aval[j+4] * x[8*i+4];
669       y[acolidx[j+5]] += aval[j+5] * x[8*i+5];
670       y[acolidx[j+6]] += aval[j+6] * x[8*i+6];
671       y[acolidx[j+7]] += aval[j+7] * x[8*i+7];
672     }
673   }
674   ierr = PetscLogFlops(2.0*a->sliidx[a->totalslices]);CHKERRQ(ierr);
675   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
676   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 
680 PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy)
681 {
682   PetscErrorCode ierr;
683 
684   PetscFunctionBegin;
685   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
686   ierr = MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 /*
691      Checks for missing diagonals
692 */
693 PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool  *missing,PetscInt *d)
694 {
695   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
696   PetscInt    *diag,i;
697 
698   PetscFunctionBegin;
699   *missing = PETSC_FALSE;
700   if (A->rmap->n > 0 && !(a->colidx)) {
701     *missing = PETSC_TRUE;
702     if (d) *d = 0;
703     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
704   } else {
705     diag = a->diag;
706     for (i=0; i<A->rmap->n; i++) {
707       if (diag[i] == -1) {
708         *missing = PETSC_TRUE;
709         if (d) *d = i;
710         PetscInfo1(A,"Matrix is missing diagonal number %D\n",i);
711         break;
712       }
713     }
714   }
715   PetscFunctionReturn(0);
716 }
717 
718 PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A)
719 {
720   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
721   PetscInt       i,j,m=A->rmap->n,shift;
722   PetscErrorCode ierr;
723 
724   PetscFunctionBegin;
725   if (!a->diag) {
726     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
727     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
728     a->free_diag = PETSC_TRUE;
729   }
730   for (i=0; i<m; i++) { /* loop over rows */
731     shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
732     a->diag[i] = -1;
733     for (j=0; j<a->rlen[i]; j++) {
734       if (a->colidx[shift+j*8] == i) {
735         a->diag[i] = shift+j*8;
736         break;
737       }
738     }
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 /*
744   Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways
745 */
746 PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift)
747 {
748   Mat_SeqSELL    *a=(Mat_SeqSELL*) A->data;
749   PetscInt       i,*diag,m = A->rmap->n;
750   MatScalar      *val = a->val;
751   PetscScalar    *idiag,*mdiag;
752   PetscErrorCode ierr;
753 
754   PetscFunctionBegin;
755   if (a->idiagvalid) PetscFunctionReturn(0);
756   ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr);
757   diag = a->diag;
758   if (!a->idiag) {
759     ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr);
760     ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr);
761     val  = a->val;
762   }
763   mdiag = a->mdiag;
764   idiag = a->idiag;
765 
766   if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) {
767     for (i=0; i<m; i++) {
768       mdiag[i] = val[diag[i]];
769       if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */
770         if (PetscRealPart(fshift)) {
771           ierr = PetscInfo1(A,"Zero diagonal on row %D\n",i);CHKERRQ(ierr);
772           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
773           A->factorerror_zeropivot_value = 0.0;
774           A->factorerror_zeropivot_row   = i;
775         } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i);
776       }
777       idiag[i] = 1.0/val[diag[i]];
778     }
779     ierr = PetscLogFlops(m);CHKERRQ(ierr);
780   } else {
781     for (i=0; i<m; i++) {
782       mdiag[i] = val[diag[i]];
783       idiag[i] = omega/(fshift + val[diag[i]]);
784     }
785     ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr);
786   }
787   a->idiagvalid = PETSC_TRUE;
788   PetscFunctionReturn(0);
789 }
790 
791 PetscErrorCode MatZeroEntries_SeqSELL(Mat A)
792 {
793   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
794   PetscErrorCode ierr;
795 
796   PetscFunctionBegin;
797   ierr = PetscMemzero(a->val,(a->sliidx[a->totalslices])*sizeof(PetscScalar));CHKERRQ(ierr);
798   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
799   PetscFunctionReturn(0);
800 }
801 
802 PetscErrorCode MatDestroy_SeqSELL(Mat A)
803 {
804   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
805   PetscErrorCode ierr;
806 
807   PetscFunctionBegin;
808 #if defined(PETSC_USE_LOG)
809   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz);
810 #endif
811   ierr = MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);CHKERRQ(ierr);
812   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
813   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
814   ierr = PetscFree(a->diag);CHKERRQ(ierr);
815   ierr = PetscFree(a->ibdiag);CHKERRQ(ierr);
816   ierr = PetscFree(a->rlen);CHKERRQ(ierr);
817   ierr = PetscFree(a->sliidx);CHKERRQ(ierr);
818   ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr);
819   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
820   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
821   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
822 
823   ierr = PetscFree(A->data);CHKERRQ(ierr);
824 
825   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
826   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
827   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
828 #if defined(PETSC_HAVE_ELEMENTAL)
829 #endif
830   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);CHKERRQ(ierr);
831   PetscFunctionReturn(0);
832 }
833 
834 PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg)
835 {
836   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
837   PetscErrorCode ierr;
838 
839   PetscFunctionBegin;
840   switch (op) {
841   case MAT_ROW_ORIENTED:
842     a->roworiented = flg;
843     break;
844   case MAT_KEEP_NONZERO_PATTERN:
845     a->keepnonzeropattern = flg;
846     break;
847   case MAT_NEW_NONZERO_LOCATIONS:
848     a->nonew = (flg ? 0 : 1);
849     break;
850   case MAT_NEW_NONZERO_LOCATION_ERR:
851     a->nonew = (flg ? -1 : 0);
852     break;
853   case MAT_NEW_NONZERO_ALLOCATION_ERR:
854     a->nonew = (flg ? -2 : 0);
855     break;
856   case MAT_UNUSED_NONZERO_LOCATION_ERR:
857     a->nounused = (flg ? -1 : 0);
858     break;
859   case MAT_NEW_DIAGONALS:
860   case MAT_IGNORE_OFF_PROC_ENTRIES:
861   case MAT_USE_HASH_TABLE:
862     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
863     break;
864   case MAT_SPD:
865   case MAT_SYMMETRIC:
866   case MAT_STRUCTURALLY_SYMMETRIC:
867   case MAT_HERMITIAN:
868   case MAT_SYMMETRY_ETERNAL:
869     /* These options are handled directly by MatSetOption() */
870     break;
871   default:
872     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
873   }
874   PetscFunctionReturn(0);
875 }
876 
877 PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v)
878 {
879   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
880   PetscInt       i,j,n,shift;
881   PetscScalar    *x,zero=0.0;
882   PetscErrorCode ierr;
883 
884   PetscFunctionBegin;
885   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
886   if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
887 
888   if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) {
889     PetscInt *diag=a->diag;
890     ierr = VecGetArray(v,&x);CHKERRQ(ierr);
891     for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]];
892     ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
893     PetscFunctionReturn(0);
894   }
895 
896   ierr = VecSet(v,zero);CHKERRQ(ierr);
897   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
898   for (i=0; i<n; i++) { /* loop over rows */
899     shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
900     x[i] = 0;
901     for (j=0; j<a->rlen[i]; j++) {
902       if (a->colidx[shift+j*8] == i) {
903         x[i] = a->val[shift+j*8];
904         break;
905       }
906     }
907   }
908   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
909   PetscFunctionReturn(0);
910 }
911 
912 PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr)
913 {
914   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
915   const PetscScalar *l,*r;
916   PetscInt          i,j,m,n,row;
917   PetscErrorCode    ierr;
918 
919   PetscFunctionBegin;
920   if (ll) {
921     /* The local size is used so that VecMPI can be passed to this routine
922        by MatDiagonalScale_MPISELL */
923     ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr);
924     if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
925     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
926     for (i=0; i<a->totalslices; i++) { /* loop over slices */
927       for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
928         a->val[j] *= l[8*i+row];
929       }
930     }
931     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
932     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
933   }
934   if (rr) {
935     ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr);
936     if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
937     ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr);
938     for (i=0; i<a->totalslices; i++) { /* loop over slices */
939       for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) {
940         a->val[j] *= r[a->colidx[j]];
941       }
942     }
943     ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr);
944     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
945   }
946   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
947   PetscFunctionReturn(0);
948 }
949 
950 extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
951 
952 PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
953 {
954   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
955   PetscInt    *cp,i,k,low,high,t,row,col,l;
956   PetscInt    shift;
957   MatScalar   *vp;
958 
959   PetscFunctionBegin;
960   for (k=0; k<m; k++) { /* loop over added rows */
961     row = im[k];
962     if (row<0) continue;
963 #if defined(PETSC_USE_DEBUG)
964     if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
965 #endif
966     shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
967     cp = a->colidx+shift; /* pointer to the row */
968     vp = a->val+shift; /* pointer to the row */
969     for (l=0; l<n; l++) { /* loop over added rows */
970       col = in[l];
971       if (col<0) continue;
972 #if defined(PETSC_USE_DEBUG)
973       if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1);
974 #endif
975       high = a->rlen[row]; low = 0; /* assume unsorted */
976       while (high-low > 5) {
977         t = (low+high)/2;
978         if (*(cp+t*8) > col) high = t;
979         else low = t;
980       }
981       for (i=low; i<high; i++) {
982         if (*(cp+8*i) > col) break;
983         if (*(cp+8*i) == col) {
984           *v++ = *(vp+8*i);
985           goto finished;
986         }
987       }
988       *v++ = 0.0;
989     finished:;
990     }
991   }
992   PetscFunctionReturn(0);
993 }
994 
995 PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer)
996 {
997   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
998   PetscInt          i,j,m=A->rmap->n,shift;
999   const char        *name;
1000   PetscViewerFormat format;
1001   PetscErrorCode    ierr;
1002 
1003   PetscFunctionBegin;
1004   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1005   if (format == PETSC_VIEWER_ASCII_MATLAB) {
1006     PetscInt nofinalvalue = 0;
1007     /*
1008     if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) {
1009       nofinalvalue = 1;
1010     }
1011     */
1012     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1013     ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);CHKERRQ(ierr);
1014     ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);CHKERRQ(ierr);
1015 #if defined(PETSC_USE_COMPLEX)
1016     ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);CHKERRQ(ierr);
1017 #else
1018     ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);CHKERRQ(ierr);
1019 #endif
1020     ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr);
1021 
1022     for (i=0; i<m; i++) {
1023       shift = a->sliidx[i>>3]+(i&0x07);
1024       for (j=0; j<a->rlen[i]; j++) {
1025 #if defined(PETSC_USE_COMPLEX)
1026         ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1027 #else
1028         ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);CHKERRQ(ierr);
1029 #endif
1030       }
1031     }
1032     /*
1033     if (nofinalvalue) {
1034 #if defined(PETSC_USE_COMPLEX)
1035       ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr);
1036 #else
1037       ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr);
1038 #endif
1039     }
1040     */
1041     ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
1042     ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr);
1043     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1044   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
1045     PetscFunctionReturn(0);
1046   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1047     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1048     for (i=0; i<m; i++) {
1049       ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr);
1050       shift = a->sliidx[i>>3]+(i&0x07);
1051       for (j=0; j<a->rlen[i]; j++) {
1052 #if defined(PETSC_USE_COMPLEX)
1053         if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
1054           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1055         } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
1056           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1057         } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) {
1058           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr);
1059         }
1060 #else
1061         if (a->val[shift+8*j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);}
1062 #endif
1063       }
1064       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1065     }
1066     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1067   } else if (format == PETSC_VIEWER_ASCII_DENSE) {
1068     PetscInt    cnt=0,jcnt;
1069     PetscScalar value;
1070 #if defined(PETSC_USE_COMPLEX)
1071     PetscBool   realonly=PETSC_TRUE;
1072     for (i=0; i<a->sliidx[a->totalslices]; i++) {
1073       if (PetscImaginaryPart(a->val[i]) != 0.0) {
1074         realonly = PETSC_FALSE;
1075         break;
1076       }
1077     }
1078 #endif
1079 
1080     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1081     for (i=0; i<m; i++) {
1082       jcnt = 0;
1083       shift = a->sliidx[i>>3]+(i&0x07);
1084       for (j=0; j<A->cmap->n; j++) {
1085         if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) {
1086           value = a->val[cnt++];
1087           jcnt++;
1088         } else {
1089           value = 0.0;
1090         }
1091 #if defined(PETSC_USE_COMPLEX)
1092         if (realonly) {
1093           ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr);
1094         } else {
1095           ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr);
1096         }
1097 #else
1098         ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr);
1099 #endif
1100       }
1101       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1102     }
1103     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1104   } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) {
1105     PetscInt fshift=1;
1106     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1107 #if defined(PETSC_USE_COMPLEX)
1108     ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr);
1109 #else
1110     ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr);
1111 #endif
1112     ierr = PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);CHKERRQ(ierr);
1113     for (i=0; i<m; i++) {
1114       shift = a->sliidx[i>>3]+(i&0x07);
1115       for (j=0; j<a->rlen[i]; j++) {
1116 #if defined(PETSC_USE_COMPLEX)
1117         ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1118 #else
1119         ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);CHKERRQ(ierr);
1120 #endif
1121       }
1122     }
1123     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1124   } else {
1125     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1126     if (A->factortype) {
1127       for (i=0; i<m; i++) {
1128         shift = a->sliidx[i>>3]+(i&0x07);
1129         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr);
1130         /* L part */
1131         for (j=shift; j<a->diag[i]; j+=8) {
1132 #if defined(PETSC_USE_COMPLEX)
1133           if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) {
1134             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr);
1135           } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) {
1136             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr);
1137           } else {
1138             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr);
1139           }
1140 #else
1141           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr);
1142 #endif
1143         }
1144         /* diagonal */
1145         j = a->diag[i];
1146 #if defined(PETSC_USE_COMPLEX)
1147         if (PetscImaginaryPart(a->val[j]) > 0.0) {
1148           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));CHKERRQ(ierr);
1149         } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1150           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));CHKERRQ(ierr);
1151         } else {
1152           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));CHKERRQ(ierr);
1153         }
1154 #else
1155         ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));CHKERRQ(ierr);
1156 #endif
1157 
1158         /* U part */
1159         for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) {
1160 #if defined(PETSC_USE_COMPLEX)
1161           if (PetscImaginaryPart(a->val[j]) > 0.0) {
1162             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr);
1163           } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1164             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr);
1165           } else {
1166             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr);
1167           }
1168 #else
1169           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr);
1170 #endif
1171         }
1172         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1173       }
1174     } else {
1175       for (i=0; i<m; i++) {
1176         shift = a->sliidx[i>>3]+(i&0x07);
1177         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr);
1178         for (j=0; j<a->rlen[i]; j++) {
1179 #if defined(PETSC_USE_COMPLEX)
1180           if (PetscImaginaryPart(a->val[j]) > 0.0) {
1181             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1182           } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1183             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1184           } else {
1185             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr);
1186           }
1187 #else
1188           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);
1189 #endif
1190         }
1191         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1192       }
1193     }
1194     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1195   }
1196   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 #include <petscdraw.h>
1201 PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa)
1202 {
1203   Mat               A=(Mat)Aa;
1204   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
1205   PetscInt          i,j,m=A->rmap->n,shift;
1206   int               color;
1207   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1208   PetscViewer       viewer;
1209   PetscViewerFormat format;
1210   PetscErrorCode    ierr;
1211 
1212   PetscFunctionBegin;
1213   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1214   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1215   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1216 
1217   /* loop over matrix elements drawing boxes */
1218 
1219   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1220     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1221     /* Blue for negative, Cyan for zero and  Red for positive */
1222     color = PETSC_DRAW_BLUE;
1223     for (i=0; i<m; i++) {
1224       shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1225       y_l = m - i - 1.0; y_r = y_l + 1.0;
1226       for (j=0; j<a->rlen[i]; j++) {
1227         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1228         if (PetscRealPart(a->val[shift+8*j]) >=  0.) continue;
1229         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1230       }
1231     }
1232     color = PETSC_DRAW_CYAN;
1233     for (i=0; i<m; i++) {
1234       shift = a->sliidx[i>>3]+(i&0x07);
1235       y_l = m - i - 1.0; y_r = y_l + 1.0;
1236       for (j=0; j<a->rlen[i]; j++) {
1237         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1238         if (a->val[shift+8*j] !=  0.) continue;
1239         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1240       }
1241     }
1242     color = PETSC_DRAW_RED;
1243     for (i=0; i<m; i++) {
1244       shift = a->sliidx[i>>3]+(i&0x07);
1245       y_l = m - i - 1.0; y_r = y_l + 1.0;
1246       for (j=0; j<a->rlen[i]; j++) {
1247         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1248         if (PetscRealPart(a->val[shift+8*j]) <=  0.) continue;
1249         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1250       }
1251     }
1252     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1253   } else {
1254     /* use contour shading to indicate magnitude of values */
1255     /* first determine max of all nonzero values */
1256     PetscReal minv=0.0,maxv=0.0;
1257     PetscInt  count=0;
1258     PetscDraw popup;
1259     for (i=0; i<a->sliidx[a->totalslices]; i++) {
1260       if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]);
1261     }
1262     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1263     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1264     ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr);
1265 
1266     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1267     for (i=0; i<m; i++) {
1268       shift = a->sliidx[i>>3]+(i&0x07);
1269       y_l = m - i - 1.0;
1270       y_r = y_l + 1.0;
1271       for (j=0; j<a->rlen[i]; j++) {
1272         x_l = a->colidx[shift+j*8];
1273         x_r = x_l + 1.0;
1274         color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv);
1275         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1276         count++;
1277       }
1278     }
1279     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1280   }
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 #include <petscdraw.h>
1285 PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer)
1286 {
1287   PetscDraw      draw;
1288   PetscReal      xr,yr,xl,yl,h,w;
1289   PetscBool      isnull;
1290   PetscErrorCode ierr;
1291 
1292   PetscFunctionBegin;
1293   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1294   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1295   if (isnull) PetscFunctionReturn(0);
1296 
1297   xr   = A->cmap->n; yr  = A->rmap->n; h = yr/10.0; w = xr/10.0;
1298   xr  += w;          yr += h;         xl = -w;     yl = -h;
1299   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1300   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1301   ierr = PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);CHKERRQ(ierr);
1302   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1303   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer)
1308 {
1309   PetscBool      iascii,isbinary,isdraw;
1310   PetscErrorCode ierr;
1311 
1312   PetscFunctionBegin;
1313   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1314   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1315   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1316   if (iascii) {
1317     ierr = MatView_SeqSELL_ASCII(A,viewer);CHKERRQ(ierr);
1318   } else if (isbinary) {
1319     /* ierr = MatView_SeqSELL_Binary(A,viewer);CHKERRQ(ierr); */
1320   } else if (isdraw) {
1321     ierr = MatView_SeqSELL_Draw(A,viewer);CHKERRQ(ierr);
1322   }
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode)
1327 {
1328   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1329   PetscInt       i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k;
1330   MatScalar      *vp;
1331   PetscErrorCode ierr;
1332 
1333   PetscFunctionBegin;
1334   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1335   /* To do: compress out the unused elements */
1336   ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr);
1337   ierr = PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);CHKERRQ(ierr);
1338   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr);
1339   ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);CHKERRQ(ierr);
1340   /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */
1341   for (i=0; i<a->totalslices; ++i) {
1342     shift = a->sliidx[i];    /* starting index of the slice */
1343     cp    = a->colidx+shift; /* pointer to the column indices of the slice */
1344     vp    = a->val+shift;    /* pointer to the nonzero values of the slice */
1345     for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */
1346       row  = 8*i + row_in_slice;
1347       nrow = a->rlen[row]; /* number of nonzeros in row */
1348       /*
1349         Search for the nearest nonzero. Normally setting the index to zero may cause extra communication.
1350         But if the entire slice are empty, it is fine to use 0 since the index will not be loaded.
1351       */
1352       lastcol = 0;
1353       if (nrow>0) { /* nonempty row */
1354         lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */
1355       } else if (!row_in_slice) { /* first row of the currect slice is empty */
1356         for (j=1;j<8;j++) {
1357           if (a->rlen[8*i+j]) {
1358             lastcol = cp[j];
1359             break;
1360           }
1361         }
1362       } else {
1363         if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */
1364       }
1365 
1366       for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) {
1367         cp[8*k+row_in_slice] = lastcol;
1368         vp[8*k+row_in_slice] = (MatScalar)0;
1369       }
1370     }
1371   }
1372 
1373   A->info.mallocs += a->reallocs;
1374   a->reallocs      = 0;
1375 
1376   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info)
1381 {
1382   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1383 
1384   PetscFunctionBegin;
1385   info->block_size   = 1.0;
1386   info->nz_allocated = (double)a->maxallocmat;
1387   info->nz_used      = (double)a->sliidx[a->totalslices]; /* include padding zeros */
1388   info->nz_unneeded  = (double)(a->maxallocmat-a->sliidx[a->totalslices]);
1389   info->assemblies   = (double)A->num_ass;
1390   info->mallocs      = (double)A->info.mallocs;
1391   info->memory       = ((PetscObject)A)->mem;
1392   if (A->factortype) {
1393     info->fill_ratio_given  = A->info.fill_ratio_given;
1394     info->fill_ratio_needed = A->info.fill_ratio_needed;
1395     info->factor_mallocs    = A->info.factor_mallocs;
1396   } else {
1397     info->fill_ratio_given  = 0;
1398     info->fill_ratio_needed = 0;
1399     info->factor_mallocs    = 0;
1400   }
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1405 {
1406   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1407   PetscInt       shift,i,k,l,low,high,t,ii,row,col,nrow;
1408   PetscInt       *cp,nonew=a->nonew,lastcol=-1;
1409   MatScalar      *vp,value;
1410   PetscErrorCode ierr;
1411 
1412   PetscFunctionBegin;
1413   for (k=0; k<m; k++) { /* loop over added rows */
1414     row = im[k];
1415     if (row < 0) continue;
1416 #if defined(PETSC_USE_DEBUG)
1417     if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
1418 #endif
1419     shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
1420     cp    = a->colidx+shift; /* pointer to the row */
1421     vp    = a->val+shift; /* pointer to the row */
1422     nrow  = a->rlen[row];
1423     low   = 0;
1424     high  = nrow;
1425 
1426     for (l=0; l<n; l++) { /* loop over added columns */
1427       col = in[l];
1428       if (col<0) continue;
1429 #if defined(PETSC_USE_DEBUG)
1430       if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1);
1431 #endif
1432       if (a->roworiented) {
1433         value = v[l+k*n];
1434       } else {
1435         value = v[k+l*m];
1436       }
1437       if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue;
1438 
1439       /* search in this row for the specified colmun, i indicates the column to be set */
1440       if (col <= lastcol) low = 0;
1441       else high = nrow;
1442       lastcol = col;
1443       while (high-low > 5) {
1444         t = (low+high)/2;
1445         if (*(cp+t*8) > col) high = t;
1446         else low = t;
1447       }
1448       for (i=low; i<high; i++) {
1449         if (*(cp+i*8) > col) break;
1450         if (*(cp+i*8) == col) {
1451           if (is == ADD_VALUES) *(vp+i*8) += value;
1452           else *(vp+i*8) = value;
1453           low = i + 1;
1454           goto noinsert;
1455         }
1456       }
1457       if (value == 0.0 && a->ignorezeroentries) goto noinsert;
1458       if (nonew == 1) goto noinsert;
1459       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1460       /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */
1461       MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar);
1462       /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */
1463       for (ii=nrow-1; ii>=i; ii--) {
1464         *(cp+(ii+1)*8) = *(cp+ii*8);
1465         *(vp+(ii+1)*8) = *(vp+ii*8);
1466       }
1467       a->rlen[row]++;
1468       *(cp+i*8) = col;
1469       *(vp+i*8) = value;
1470       a->nz++;
1471       A->nonzerostate++;
1472       low = i+1; high++; nrow++;
1473 noinsert:;
1474     }
1475     a->rlen[row] = nrow;
1476   }
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str)
1481 {
1482   PetscErrorCode ierr;
1483 
1484   PetscFunctionBegin;
1485   /* If the two matrices have the same copy implementation, use fast copy. */
1486   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1487     Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1488     Mat_SeqSELL *b=(Mat_SeqSELL*)B->data;
1489 
1490     if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1491     ierr = PetscMemcpy(b->val,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr);
1492   } else {
1493     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1494   }
1495   PetscFunctionReturn(0);
1496 }
1497 
1498 PetscErrorCode MatSetUp_SeqSELL(Mat A)
1499 {
1500   PetscErrorCode ierr;
1501 
1502   PetscFunctionBegin;
1503   ierr = MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,0);CHKERRQ(ierr);
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[])
1508 {
1509   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1510 
1511   PetscFunctionBegin;
1512   *array = a->val;
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[])
1517 {
1518   PetscFunctionBegin;
1519   PetscFunctionReturn(0);
1520 }
1521 
1522 PetscErrorCode MatRealPart_SeqSELL(Mat A)
1523 {
1524   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1525   PetscInt    i;
1526   MatScalar   *aval=a->val;
1527 
1528   PetscFunctionBegin;
1529   for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]);
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 PetscErrorCode MatImaginaryPart_SeqSELL(Mat A)
1534 {
1535   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1536   PetscInt       i;
1537   MatScalar      *aval=a->val;
1538   PetscErrorCode ierr;
1539 
1540   PetscFunctionBegin;
1541   for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]);
1542   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha)
1547 {
1548   Mat_SeqSELL    *a=(Mat_SeqSELL*)inA->data;
1549   MatScalar      *aval=a->val;
1550   PetscScalar    oalpha=alpha;
1551   PetscBLASInt   one=1,size;
1552   PetscErrorCode ierr;
1553 
1554   PetscFunctionBegin;
1555   ierr = PetscBLASIntCast(a->sliidx[a->totalslices],&size);CHKERRQ(ierr);
1556   PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one));
1557   ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1558   ierr = MatSeqSELLInvalidateDiagonal(inA);CHKERRQ(ierr);
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a)
1563 {
1564   Mat_SeqSELL    *y=(Mat_SeqSELL*)Y->data;
1565   PetscErrorCode ierr;
1566 
1567   PetscFunctionBegin;
1568   if (!Y->preallocated || !y->nz) {
1569     ierr = MatSeqSELLSetPreallocation(Y,1,NULL);CHKERRQ(ierr);
1570   }
1571   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1576 {
1577   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
1578   PetscScalar       *x,sum,*t;
1579   const MatScalar   *idiag=0,*mdiag;
1580   const PetscScalar *b,*xb;
1581   PetscInt          n,m=A->rmap->n,i,j,shift;
1582   const PetscInt    *diag;
1583   PetscErrorCode    ierr;
1584 
1585   PetscFunctionBegin;
1586   its = its*lits;
1587 
1588   if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */
1589   if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqSELL(A,omega,fshift);CHKERRQ(ierr);}
1590   a->fshift = fshift;
1591   a->omega  = omega;
1592 
1593   diag  = a->diag;
1594   t     = a->ssor_work;
1595   idiag = a->idiag;
1596   mdiag = a->mdiag;
1597 
1598   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1599   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1600   /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */
1601   if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented");
1602   if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented");
1603   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
1604 
1605   if (flag & SOR_ZERO_INITIAL_GUESS) {
1606     if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1607       for (i=0; i<m; i++) {
1608         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1609         sum   = b[i];
1610         n     = (diag[i]-shift)/8;
1611         for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1612         t[i]  = sum;
1613         x[i]  = sum*idiag[i];
1614       }
1615       xb   = t;
1616       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1617     } else xb = b;
1618     if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1619       for (i=m-1; i>=0; i--) {
1620         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1621         sum   = xb[i];
1622         n     = a->rlen[i]-(diag[i]-shift)/8-1;
1623         for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1624         if (xb == b) {
1625           x[i] = sum*idiag[i];
1626         } else {
1627           x[i] = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1628         }
1629       }
1630       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */
1631     }
1632     its--;
1633   }
1634   while (its--) {
1635     if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1636       for (i=0; i<m; i++) {
1637         /* lower */
1638         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1639         sum   = b[i];
1640         n     = (diag[i]-shift)/8;
1641         for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1642         t[i]  = sum;             /* save application of the lower-triangular part */
1643         /* upper */
1644         n     = a->rlen[i]-(diag[i]-shift)/8-1;
1645         for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1646         x[i]  = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1647       }
1648       xb   = t;
1649       ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1650     } else xb = b;
1651     if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1652       for (i=m-1; i>=0; i--) {
1653         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1654         sum = xb[i];
1655         if (xb == b) {
1656           /* whole matrix (no checkpointing available) */
1657           n     = a->rlen[i];
1658           for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1659           x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i];
1660         } else { /* lower-triangular part has been saved, so only apply upper-triangular */
1661           n     = a->rlen[i]-(diag[i]-shift)/8-1;
1662           for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1663           x[i]  = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1664         }
1665       }
1666       if (xb == b) {
1667         ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1668       } else {
1669         ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */
1670       }
1671     }
1672   }
1673   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1674   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 /* -------------------------------------------------------------------*/
1679 static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL,
1680                                        0,
1681                                        0,
1682                                        MatMult_SeqSELL,
1683                                /* 4*/  MatMultAdd_SeqSELL,
1684                                        MatMultTranspose_SeqSELL,
1685                                        MatMultTransposeAdd_SeqSELL,
1686                                        0,
1687                                        0,
1688                                        0,
1689                                /* 10*/ 0,
1690                                        0,
1691                                        0,
1692                                        MatSOR_SeqSELL,
1693                                        0,
1694                                /* 15*/ MatGetInfo_SeqSELL,
1695                                        MatEqual_SeqSELL,
1696                                        MatGetDiagonal_SeqSELL,
1697                                        MatDiagonalScale_SeqSELL,
1698                                        0,
1699                                /* 20*/ 0,
1700                                        MatAssemblyEnd_SeqSELL,
1701                                        MatSetOption_SeqSELL,
1702                                        MatZeroEntries_SeqSELL,
1703                                /* 24*/ 0,
1704                                        0,
1705                                        0,
1706                                        0,
1707                                        0,
1708                                /* 29*/ MatSetUp_SeqSELL,
1709                                        0,
1710                                        0,
1711                                        0,
1712                                        0,
1713                                /* 34*/ MatDuplicate_SeqSELL,
1714                                        0,
1715                                        0,
1716                                        0,
1717                                        0,
1718                                /* 39*/ 0,
1719                                        0,
1720                                        0,
1721                                        MatGetValues_SeqSELL,
1722                                        MatCopy_SeqSELL,
1723                                /* 44*/ 0,
1724                                        MatScale_SeqSELL,
1725                                        MatShift_SeqSELL,
1726                                        0,
1727                                        0,
1728                                /* 49*/ 0,
1729                                        0,
1730                                        0,
1731                                        0,
1732                                        0,
1733                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
1734                                        0,
1735                                        0,
1736                                        0,
1737                                        0,
1738                                /* 59*/ 0,
1739                                        MatDestroy_SeqSELL,
1740                                        MatView_SeqSELL,
1741                                        0,
1742                                        0,
1743                                /* 64*/ 0,
1744                                        0,
1745                                        0,
1746                                        0,
1747                                        0,
1748                                /* 69*/ 0,
1749                                        0,
1750                                        0,
1751                                        0,
1752                                        0,
1753                                /* 74*/ 0,
1754                                        MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */
1755                                        0,
1756                                        0,
1757                                        0,
1758                                /* 79*/ 0,
1759                                        0,
1760                                        0,
1761                                        0,
1762                                        0,
1763                                /* 84*/ 0,
1764                                        0,
1765                                        0,
1766                                        0,
1767                                        0,
1768                                /* 89*/ 0,
1769                                        0,
1770                                        0,
1771                                        0,
1772                                        0,
1773                                /* 94*/ 0,
1774                                        0,
1775                                        0,
1776                                        0,
1777                                        0,
1778                                /* 99*/ 0,
1779                                        0,
1780                                        0,
1781                                        MatConjugate_SeqSELL,
1782                                        0,
1783                                /*104*/ 0,
1784                                        0,
1785                                        0,
1786                                        0,
1787                                        0,
1788                                /*109*/ 0,
1789                                        0,
1790                                        0,
1791                                        0,
1792                                        MatMissingDiagonal_SeqSELL,
1793                                /*114*/ 0,
1794                                        0,
1795                                        0,
1796                                        0,
1797                                        0,
1798                                /*119*/ 0,
1799                                        0,
1800                                        0,
1801                                        0,
1802                                        0,
1803                                /*124*/ 0,
1804                                        0,
1805                                        0,
1806                                        0,
1807                                        0,
1808                                /*129*/ 0,
1809                                        0,
1810                                        0,
1811                                        0,
1812                                        0,
1813                                /*134*/ 0,
1814                                        0,
1815                                        0,
1816                                        0,
1817                                        0,
1818                                /*139*/ 0,
1819                                        0,
1820                                        0,
1821                                        MatFDColoringSetUp_SeqXAIJ,
1822                                        0,
1823                                 /*144*/0
1824 };
1825 
1826 PetscErrorCode MatStoreValues_SeqSELL(Mat mat)
1827 {
1828   Mat_SeqSELL    *a=(Mat_SeqSELL*)mat->data;
1829   PetscErrorCode ierr;
1830 
1831   PetscFunctionBegin;
1832   if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1833 
1834   /* allocate space for values if not already there */
1835   if (!a->saved_values) {
1836     ierr = PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);CHKERRQ(ierr);
1837     ierr = PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));CHKERRQ(ierr);
1838   }
1839 
1840   /* copy values over */
1841   ierr = PetscMemcpy(a->saved_values,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr);
1842   PetscFunctionReturn(0);
1843 }
1844 
1845 PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat)
1846 {
1847   Mat_SeqSELL    *a=(Mat_SeqSELL*)mat->data;
1848   PetscErrorCode ierr;
1849 
1850   PetscFunctionBegin;
1851   if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1852   if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1853   /* copy values over */
1854   ierr = PetscMemcpy(a->val,a->saved_values,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr);
1855   PetscFunctionReturn(0);
1856 }
1857 
1858 /*@C
1859  MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray()
1860 
1861  Not Collective
1862 
1863  Input Parameters:
1864  .  mat - a MATSEQSELL matrix
1865  .  array - pointer to the data
1866 
1867  Level: intermediate
1868 
1869  .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90()
1870  @*/
1871 PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array)
1872 {
1873   PetscErrorCode ierr;
1874 
1875   PetscFunctionBegin;
1876   ierr = PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B)
1881 {
1882   Mat_SeqSELL    *b;
1883   PetscMPIInt    size;
1884   PetscErrorCode ierr;
1885 
1886   PetscFunctionBegin;
1887   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
1888   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1");
1889 
1890   ierr = PetscNewLog(B,&b);CHKERRQ(ierr);
1891 
1892   B->data = (void*)b;
1893 
1894   ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1895 
1896   b->row                = 0;
1897   b->col                = 0;
1898   b->icol               = 0;
1899   b->reallocs           = 0;
1900   b->ignorezeroentries  = PETSC_FALSE;
1901   b->roworiented        = PETSC_TRUE;
1902   b->nonew              = 0;
1903   b->diag               = 0;
1904   b->solve_work         = 0;
1905   B->spptr              = 0;
1906   b->saved_values       = 0;
1907   b->idiag              = 0;
1908   b->mdiag              = 0;
1909   b->ssor_work          = 0;
1910   b->omega              = 1.0;
1911   b->fshift             = 0.0;
1912   b->idiagvalid         = PETSC_FALSE;
1913   b->ibdiagvalid        = PETSC_FALSE;
1914   b->keepnonzeropattern = PETSC_FALSE;
1915 
1916   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);CHKERRQ(ierr);
1917   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);CHKERRQ(ierr);
1918   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);CHKERRQ(ierr);
1919   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);CHKERRQ(ierr);
1920   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);CHKERRQ(ierr);
1921   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);CHKERRQ(ierr);
1922   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);CHKERRQ(ierr);
1923   PetscFunctionReturn(0);
1924 }
1925 
1926 /*
1927  Given a matrix generated with MatGetFactor() duplicates all the information in A into B
1928  */
1929 PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
1930 {
1931   Mat_SeqSELL    *c,*a=(Mat_SeqSELL*)A->data;
1932   PetscInt       i,m=A->rmap->n;
1933   PetscInt       totalslices=a->totalslices;
1934   PetscErrorCode ierr;
1935 
1936   PetscFunctionBegin;
1937   c = (Mat_SeqSELL*)C->data;
1938 
1939   C->factortype = A->factortype;
1940   c->row        = 0;
1941   c->col        = 0;
1942   c->icol       = 0;
1943   c->reallocs   = 0;
1944 
1945   C->assembled = PETSC_TRUE;
1946 
1947   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
1948   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
1949 
1950   ierr = PetscMalloc1(8*totalslices,&c->rlen);CHKERRQ(ierr);
1951   ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr);
1952   ierr = PetscMalloc1(totalslices+1,&c->sliidx);CHKERRQ(ierr);
1953   ierr = PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr);
1954 
1955   for (i=0; i<m; i++) c->rlen[i] = a->rlen[i];
1956   for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i];
1957 
1958   /* allocate the matrix space */
1959   if (mallocmatspace) {
1960     ierr = PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);CHKERRQ(ierr);
1961     ierr = PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
1962 
1963     c->singlemalloc = PETSC_TRUE;
1964 
1965     if (m > 0) {
1966       ierr = PetscMemcpy(c->colidx,a->colidx,(a->maxallocmat)*sizeof(PetscInt));CHKERRQ(ierr);
1967       if (cpvalues == MAT_COPY_VALUES) {
1968         ierr = PetscMemcpy(c->val,a->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr);
1969       } else {
1970         ierr = PetscMemzero(c->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr);
1971       }
1972     }
1973   }
1974 
1975   c->ignorezeroentries = a->ignorezeroentries;
1976   c->roworiented       = a->roworiented;
1977   c->nonew             = a->nonew;
1978   if (a->diag) {
1979     ierr = PetscMalloc1(m,&c->diag);CHKERRQ(ierr);
1980     ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr);
1981     for (i=0; i<m; i++) {
1982       c->diag[i] = a->diag[i];
1983     }
1984   } else c->diag = 0;
1985 
1986   c->solve_work         = 0;
1987   c->saved_values       = 0;
1988   c->idiag              = 0;
1989   c->ssor_work          = 0;
1990   c->keepnonzeropattern = a->keepnonzeropattern;
1991   c->free_val           = PETSC_TRUE;
1992   c->free_colidx        = PETSC_TRUE;
1993 
1994   c->maxallocmat  = a->maxallocmat;
1995   c->maxallocrow  = a->maxallocrow;
1996   c->rlenmax      = a->rlenmax;
1997   c->nz           = a->nz;
1998   C->preallocated = PETSC_TRUE;
1999 
2000   c->nonzerorowcnt = a->nonzerorowcnt;
2001   C->nonzerostate  = A->nonzerostate;
2002 
2003   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B)
2008 {
2009   PetscErrorCode ierr;
2010 
2011   PetscFunctionBegin;
2012   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
2013   ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr);
2014   if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) {
2015     ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr);
2016   }
2017   ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2018   ierr = MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 /*@C
2023  MatCreateSeqSELL - Creates a sparse matrix in SELL format.
2024 
2025  Collective on MPI_Comm
2026 
2027  Input Parameters:
2028  +  comm - MPI communicator, set to PETSC_COMM_SELF
2029  .  m - number of rows
2030  .  n - number of columns
2031  .  rlenmax - maximum number of nonzeros in a row
2032  -  rlen - array containing the number of nonzeros in the various rows
2033  (possibly different for each row) or NULL
2034 
2035  Output Parameter:
2036  .  A - the matrix
2037 
2038  It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2039  MatXXXXSetPreallocation() paradgm instead of this routine directly.
2040  [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation]
2041 
2042  Notes:
2043  If nnz is given then nz is ignored
2044 
2045  Specify the preallocated storage with either rlenmax or rlen (not both).
2046  Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory
2047  allocation.  For large problems you MUST preallocate memory or you
2048  will get TERRIBLE performance, see the users' manual chapter on matrices.
2049 
2050  Level: intermediate
2051 
2052  .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatCreateSeqSELLWithArrays()
2053 
2054  @*/
2055 PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A)
2056 {
2057   PetscErrorCode ierr;
2058 
2059   PetscFunctionBegin;
2060   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2061   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2062   ierr = MatSetType(*A,MATSEQSELL);CHKERRQ(ierr);
2063   ierr = MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);CHKERRQ(ierr);
2064   PetscFunctionReturn(0);
2065 }
2066 
2067 PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg)
2068 {
2069   Mat_SeqSELL     *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data;
2070   PetscInt       totalslices=a->totalslices;
2071   PetscErrorCode ierr;
2072 
2073   PetscFunctionBegin;
2074   /* If the  matrix dimensions are not equal,or no of nonzeros */
2075   if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) {
2076     *flg = PETSC_FALSE;
2077     PetscFunctionReturn(0);
2078   }
2079   /* if the a->colidx are the same */
2080   ierr = PetscMemcmp(a->colidx,b->colidx,a->sliidx[totalslices]*sizeof(PetscInt),flg);CHKERRQ(ierr);
2081   if (!*flg) PetscFunctionReturn(0);
2082   /* if a->val are the same */
2083   ierr = PetscMemcmp(a->val,b->val,a->sliidx[totalslices]*sizeof(PetscScalar),flg);CHKERRQ(ierr);
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A)
2088 {
2089   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
2090 
2091   PetscFunctionBegin;
2092   a->idiagvalid  = PETSC_FALSE;
2093   a->ibdiagvalid = PETSC_FALSE;
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 PetscErrorCode MatConjugate_SeqSELL(Mat A)
2098 {
2099 #if defined(PETSC_USE_COMPLEX)
2100   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
2101   PetscInt    i;
2102   PetscScalar *val = a->val;
2103 
2104   PetscFunctionBegin;
2105   for (i=0; i<a->sliidx[a->totalslices]; i++) {
2106     val[i] = PetscConj(val[i]);
2107   }
2108 #else
2109   PetscFunctionBegin;
2110 #endif
2111   PetscFunctionReturn(0);
2112 }
2113