xref: /petsc/src/mat/impls/sell/seq/sell.c (revision f4e9c46abafb7f28a0c51bff70fa1e3ed26b85c5)
1 
2 /*
3   Defines the basic matrix operations for the SELL matrix storage format.
4 */
5 #include <../src/mat/impls/sell/seq/sell.h>  /*I   "petscmat.h"  I*/
6 #include <petscblaslapack.h>
7 #include <petsc/private/kernels/blocktranspose.h>
8 #if defined(PETSC_HAVE_IMMINTRIN_H)
9   #include <immintrin.h>
10 
11   #if !defined(_MM_SCALE_8)
12   #define _MM_SCALE_8    8
13   #endif
14 
15   #if defined(__AVX512F__)
16   /* these do not work
17    vec_idx  = _mm512_loadunpackhi_epi32(vec_idx,acolidx);
18    vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval);
19   */
20     #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \
21     /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \
22     vec_idx  = _mm256_load_si256((__m256i const*)acolidx); \
23     vec_vals = _mm512_load_pd(aval); \
24     vec_x    = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \
25     vec_y    = _mm512_fmadd_pd(vec_x,vec_vals,vec_y)
26   #elif defined(__AVX2__)
27     #define AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \
28     vec_vals = _mm256_load_pd(aval); \
29     vec_idx  =  _mm_load_si128((__m128i const*)acolidx); /* SSE2 */ \
30     vec_x    = _mm256_i32gather_pd(x,vec_idx,_MM_SCALE_8); \
31     vec_y    = _mm256_fmadd_pd(vec_x,vec_vals,vec_y)
32   #endif
33 #endif  /* PETSC_HAVE_IMMINTRIN_H */
34 
35 /*@C
36  MatSeqSELLSetPreallocation - For good matrix assembly performance
37  the user should preallocate the matrix storage by setting the parameter nz
38  (or the array nnz).  By setting these parameters accurately, performance
39  during matrix assembly can be increased significantly.
40 
41  Collective on MPI_Comm
42 
43  Input Parameters:
44  +  B - The matrix
45  .  nz - number of nonzeros per row (same for all rows)
46  -  nnz - array containing the number of nonzeros in the various rows
47  (possibly different for each row) or NULL
48 
49  Notes:
50  If nnz is given then nz is ignored.
51 
52  Specify the preallocated storage with either nz or nnz (not both).
53  Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
54  allocation.  For large problems you MUST preallocate memory or you
55  will get TERRIBLE performance, see the users' manual chapter on matrices.
56 
57  You can call MatGetInfo() to get information on how effective the preallocation was;
58  for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
59  You can also run with the option -info and look for messages with the string
60  malloc in them to see if additional memory allocation was needed.
61 
62  Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix
63  entries or columns indices.
64 
65  The maximum number of nonzeos in any row should be as accuate as possible.
66  If it is underesitmated, you will get bad performance due to reallocation
67  (MatSeqXSELLReallocateSELL).
68 
69  Level: intermediate
70 
71  .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo()
72 
73  @*/
74 PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[])
75 {
76   PetscErrorCode ierr;
77 
78   PetscFunctionBegin;
79   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
80   PetscValidType(B,1);
81   ierr = PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));CHKERRQ(ierr);
82   PetscFunctionReturn(0);
83 }
84 
85 PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[])
86 {
87   Mat_SeqSELL    *b;
88   PetscInt       i,j,totalslices;
89   PetscBool      skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE;
90   PetscErrorCode ierr;
91 
92   PetscFunctionBegin;
93   if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE;
94   if (maxallocrow == MAT_SKIP_ALLOCATION) {
95     skipallocation = PETSC_TRUE;
96     maxallocrow    = 0;
97   }
98 
99   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
100   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
101 
102   /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */
103   if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5;
104   if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow);
105   if (rlen) {
106     for (i=0; i<B->rmap->n; i++) {
107       if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]);
108       if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n);
109     }
110   }
111 
112   B->preallocated = PETSC_TRUE;
113 
114   b = (Mat_SeqSELL*)B->data;
115 
116   totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */
117   b->totalslices = totalslices;
118   if (!skipallocation) {
119     if (B->rmap->n & 0x07) PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n);
120 
121     if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */
122       ierr = PetscMalloc1(totalslices+1,&b->sliidx);CHKERRQ(ierr);
123       ierr = PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr);
124     }
125     if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */
126       if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10;
127       else if (maxallocrow < 0) maxallocrow = 1;
128       for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow;
129     } else {
130       maxallocrow = 0;
131       b->sliidx[0] = 0;
132       for (i=1; i<totalslices; i++) {
133         b->sliidx[i] = 0;
134         for (j=0;j<8;j++) {
135           b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]);
136         }
137         maxallocrow = PetscMax(b->sliidx[i],maxallocrow);
138         b->sliidx[i] = b->sliidx[i-1] + 8*b->sliidx[i];
139       }
140       /* last slice */
141       b->sliidx[totalslices] = 0;
142       for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]);
143       maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow);
144       b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices];
145     }
146 
147     /* allocate space for val, colidx, rlen */
148     /* FIXME: should B's old memory be unlogged? */
149     ierr = MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);CHKERRQ(ierr);
150     /* FIXME: assuming an element of the bit array takes 8 bits */
151     ierr = PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);CHKERRQ(ierr);
152     ierr = PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
153     /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */
154     ierr = PetscCalloc1(8*totalslices,&b->rlen);CHKERRQ(ierr);
155     ierr = PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));CHKERRQ(ierr);
156 
157     b->singlemalloc = PETSC_TRUE;
158     b->free_val     = PETSC_TRUE;
159     b->free_colidx  = PETSC_TRUE;
160   } else {
161     b->free_val    = PETSC_FALSE;
162     b->free_colidx = PETSC_FALSE;
163   }
164 
165   b->nz               = 0;
166   b->maxallocrow      = maxallocrow;
167   b->rlenmax          = maxallocrow;
168   b->maxallocmat      = b->sliidx[totalslices];
169   B->info.nz_unneeded = (double)b->maxallocmat;
170   if (realalloc) {
171     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
172   }
173   PetscFunctionReturn(0);
174 }
175 
176 PetscErrorCode MatGetRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
177 {
178   Mat_SeqSELL *a = (Mat_SeqSELL*)A->data;
179   PetscInt    shift;
180 
181   PetscFunctionBegin;
182   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
183   if (nz) *nz = a->rlen[row];
184   shift = a->sliidx[row>>3]+(row&0x07);
185   if (!a->getrowcols) {
186     PetscErrorCode ierr;
187 
188     ierr = PetscMalloc2(a->rlenmax,&a->getrowcols,a->rlenmax,&a->getrowvals);CHKERRQ(ierr);
189   }
190   if (idx) {
191     PetscInt j;
192     for (j=0; j<a->rlen[row]; j++) a->getrowcols[j] = a->colidx[shift+8*j];
193     *idx = a->getrowcols;
194   }
195   if (v) {
196     PetscInt j;
197     for (j=0; j<a->rlen[row]; j++) a->getrowvals[j] = a->val[shift+8*j];
198     *v = a->getrowvals;
199   }
200   PetscFunctionReturn(0);
201 }
202 
203 PetscErrorCode MatRestoreRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
204 {
205   PetscFunctionBegin;
206   PetscFunctionReturn(0);
207 }
208 
209 PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
210 {
211   Mat            B;
212   Mat_SeqSELL     *a=(Mat_SeqSELL*)A->data;
213   PetscInt       i,j,row;
214   PetscBool      isnonzero;
215   PetscErrorCode ierr;
216 
217   PetscFunctionBegin;
218   if (reuse == MAT_REUSE_MATRIX) {
219     B = *newmat;
220   } else {
221     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
222     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
223     ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
224     ierr = MatSeqAIJSetPreallocation(B,0,a->rlen);CHKERRQ(ierr);
225     ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
226   }
227 
228   for (i=0; i<a->totalslices; i++) { /* loop over slices */
229     for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
230       isnonzero = (PetscBool)((j-a->sliidx[i])/8 < a->rlen[8*i+row]);
231       if (isnonzero) {
232         ierr = MatSetValue(B,8*i+row,a->colidx[j],a->val[j],INSERT_VALUES);CHKERRQ(ierr);
233       }
234     }
235   }
236   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
237   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
238   B->rmap->bs = A->rmap->bs;
239 
240   if (reuse == MAT_INPLACE_MATRIX) {
241     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
242   } else {
243     *newmat = B;
244   }
245   PetscFunctionReturn(0);
246 }
247 
248 #include <../src/mat/impls/aij/seq/aij.h>
249 
250 PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
251 {
252   Mat               B;
253   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)A->data;
254   PetscInt          *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols;
255   const PetscInt    *cols;
256   const PetscScalar *vals;
257   PetscErrorCode    ierr;
258 
259   PetscFunctionBegin;
260   if (A->rmap->bs > 1) {
261     ierr = MatConvert_Basic(A,newtype,reuse,newmat);CHKERRQ(ierr);
262     PetscFunctionReturn(0);
263   }
264 
265   if (reuse == MAT_REUSE_MATRIX) {
266     B = *newmat;
267   } else {
268     /* Can we just use ilen? */
269     ierr = PetscMalloc1(m,&rowlengths);CHKERRQ(ierr);
270     for (i=0; i<m; i++) {
271       rowlengths[i] = ai[i+1] - ai[i];
272     }
273 
274     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
275     ierr = MatSetSizes(B,m,n,m,n);CHKERRQ(ierr);
276     ierr = MatSetType(B,MATSEQSELL);CHKERRQ(ierr);
277     ierr = MatSeqSELLSetPreallocation(B,0,rowlengths);CHKERRQ(ierr);
278     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
279   }
280 
281   ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
282 
283   for (row=0; row<m; row++) {
284     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
285     ierr = MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
286     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
287   }
288   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
289   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
290   B->rmap->bs = A->rmap->bs;
291 
292   if (reuse == MAT_INPLACE_MATRIX) {
293     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
294   } else {
295     *newmat = B;
296   }
297   PetscFunctionReturn(0);
298 }
299 
300 PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy)
301 {
302   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
303   PetscScalar       *y;
304   const PetscScalar *x;
305   const MatScalar   *aval=a->val;
306   PetscInt          totalslices=a->totalslices;
307   const PetscInt    *acolidx=a->colidx;
308   PetscInt          i,j;
309   PetscErrorCode    ierr;
310 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
311   __m512d           vec_x,vec_y,vec_vals;
312   __m256i           vec_idx;
313   __mmask8          mask;
314   __m512d           vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4;
315   __m256i           vec_idx2,vec_idx3,vec_idx4;
316 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
317   __m128i           vec_idx;
318   __m256d           vec_x,vec_y,vec_y2,vec_vals;
319   MatScalar         yval;
320   PetscInt          r,rows_left,row,nnz_in_row;
321 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
322   __m128d           vec_x_tmp;
323   __m256d           vec_x,vec_y,vec_y2,vec_vals;
324   MatScalar         yval;
325   PetscInt          r,rows_left,row,nnz_in_row;
326 #else
327   PetscScalar       sum[8];
328 #endif
329 
330 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
331 #pragma disjoint(*x,*y,*aval)
332 #endif
333 
334   PetscFunctionBegin;
335   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
336   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
337 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
338   for (i=0; i<totalslices; i++) { /* loop over slices */
339     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
340     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
341 
342     vec_y  = _mm512_setzero_pd();
343     vec_y2 = _mm512_setzero_pd();
344     vec_y3 = _mm512_setzero_pd();
345     vec_y4 = _mm512_setzero_pd();
346 
347     j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */
348     switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) {
349     case 3:
350       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
351       acolidx += 8; aval += 8;
352       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
353       acolidx += 8; aval += 8;
354       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
355       acolidx += 8; aval += 8;
356       j += 3;
357       break;
358     case 2:
359       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
360       acolidx += 8; aval += 8;
361       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
362       acolidx += 8; aval += 8;
363       j += 2;
364       break;
365     case 1:
366       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
367       acolidx += 8; aval += 8;
368       j += 1;
369       break;
370     }
371     #pragma novector
372     for (; j<(a->sliidx[i+1]>>3); j+=4) {
373       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
374       acolidx += 8; aval += 8;
375       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
376       acolidx += 8; aval += 8;
377       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
378       acolidx += 8; aval += 8;
379       AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4);
380       acolidx += 8; aval += 8;
381     }
382 
383     vec_y = _mm512_add_pd(vec_y,vec_y2);
384     vec_y = _mm512_add_pd(vec_y,vec_y3);
385     vec_y = _mm512_add_pd(vec_y,vec_y4);
386     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
387       mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
388       _mm512_mask_store_pd(&y[8*i],mask,vec_y);
389     } else {
390       _mm512_store_pd(&y[8*i],vec_y);
391     }
392   }
393 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
394   for (i=0; i<totalslices; i++) { /* loop over full slices */
395     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
396     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
397 
398     /* last slice may have padding rows. Don't use vectorization. */
399     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
400       rows_left = A->rmap->n - 8*i;
401       for (r=0; r<rows_left; ++r) {
402         yval = (MatScalar)0;
403         row = 8*i + r;
404         nnz_in_row = a->rlen[row];
405         for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]];
406         y[row] = yval;
407       }
408       break;
409     }
410 
411     vec_y  = _mm256_setzero_pd();
412     vec_y2 = _mm256_setzero_pd();
413 
414     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
415     #pragma novector
416     #pragma unroll(2)
417     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
418       AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
419       aval += 4; acolidx += 4;
420       AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y2);
421       aval += 4; acolidx += 4;
422     }
423 
424     _mm256_store_pd(y+i*8,vec_y);
425     _mm256_store_pd(y+i*8+4,vec_y2);
426   }
427 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
428   for (i=0; i<totalslices; i++) { /* loop over full slices */
429     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
430     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
431 
432     vec_y  = _mm256_setzero_pd();
433     vec_y2 = _mm256_setzero_pd();
434 
435     /* last slice may have padding rows. Don't use vectorization. */
436     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
437       rows_left = A->rmap->n - 8*i;
438       for (r=0; r<rows_left; ++r) {
439         yval = (MatScalar)0;
440         row = 8*i + r;
441         nnz_in_row = a->rlen[row];
442         for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]];
443         y[row] = yval;
444       }
445       break;
446     }
447 
448     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
449     #pragma novector
450     #pragma unroll(2)
451     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
452       vec_vals  = _mm256_loadu_pd(aval);
453       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
454       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
455       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
456       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
457       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
458       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
459       vec_y     = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y);
460       aval     += 4;
461 
462       vec_vals  = _mm256_loadu_pd(aval);
463       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
464       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
465       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
466       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
467       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
468       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
469       vec_y2    = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2);
470       aval     += 4;
471     }
472 
473     _mm256_storeu_pd(y + i*8,     vec_y);
474     _mm256_storeu_pd(y + i*8 + 4, vec_y2);
475   }
476 #else
477   for (i=0; i<totalslices; i++) { /* loop over slices */
478     for (j=0; j<8; j++) sum[j] = 0.0;
479     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
480       sum[0] += aval[j] * x[acolidx[j]];
481       sum[1] += aval[j+1] * x[acolidx[j+1]];
482       sum[2] += aval[j+2] * x[acolidx[j+2]];
483       sum[3] += aval[j+3] * x[acolidx[j+3]];
484       sum[4] += aval[j+4] * x[acolidx[j+4]];
485       sum[5] += aval[j+5] * x[acolidx[j+5]];
486       sum[6] += aval[j+6] * x[acolidx[j+6]];
487       sum[7] += aval[j+7] * x[acolidx[j+7]];
488     }
489     if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
490       for(j=0; j<(A->rmap->n & 0x07); j++) y[8*i+j] = sum[j];
491     } else {
492       for(j=0; j<8; j++) y[8*i+j] = sum[j];
493     }
494   }
495 #endif
496 
497   ierr = PetscLogFlops(2.0*a->nz-a->nonzerorowcnt);CHKERRQ(ierr); /* theoretical minimal FLOPs */
498   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
499   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
500   PetscFunctionReturn(0);
501 }
502 
503 #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h>
504 PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz)
505 {
506   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
507   PetscScalar       *y,*z;
508   const PetscScalar *x;
509   const MatScalar   *aval=a->val;
510   PetscInt          totalslices=a->totalslices;
511   const PetscInt    *acolidx=a->colidx;
512   PetscInt          i,j;
513   PetscErrorCode    ierr;
514 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
515   __m512d           vec_x,vec_y,vec_vals;
516   __m256i           vec_idx;
517   __mmask8          mask;
518   __m512d           vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4;
519   __m256i           vec_idx2,vec_idx3,vec_idx4;
520 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
521   __m128d           vec_x_tmp;
522   __m256d           vec_x,vec_y,vec_y2,vec_vals;
523   MatScalar         yval;
524   PetscInt          r,row,nnz_in_row;
525 #else
526   PetscScalar       sum[8];
527 #endif
528 
529 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
530 #pragma disjoint(*x,*y,*aval)
531 #endif
532 
533   PetscFunctionBegin;
534   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
535   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
536 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
537   for (i=0; i<totalslices; i++) { /* loop over slices */
538     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
539     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
540 
541     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
542       mask   = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
543       vec_y  = _mm512_mask_load_pd(vec_y,mask,&y[8*i]);
544     } else {
545       vec_y  = _mm512_load_pd(&y[8*i]);
546     }
547     vec_y2 = _mm512_setzero_pd();
548     vec_y3 = _mm512_setzero_pd();
549     vec_y4 = _mm512_setzero_pd();
550 
551     j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */
552     switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) {
553     case 3:
554       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
555       acolidx += 8; aval += 8;
556       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
557       acolidx += 8; aval += 8;
558       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
559       acolidx += 8; aval += 8;
560       j += 3;
561       break;
562     case 2:
563       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
564       acolidx += 8; aval += 8;
565       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
566       acolidx += 8; aval += 8;
567       j += 2;
568       break;
569     case 1:
570       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
571       acolidx += 8; aval += 8;
572       j += 1;
573       break;
574     }
575     #pragma novector
576     for (; j<(a->sliidx[i+1]>>3); j+=4) {
577       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
578       acolidx += 8; aval += 8;
579       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
580       acolidx += 8; aval += 8;
581       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
582       acolidx += 8; aval += 8;
583       AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4);
584       acolidx += 8; aval += 8;
585     }
586 
587     vec_y = _mm512_add_pd(vec_y,vec_y2);
588     vec_y = _mm512_add_pd(vec_y,vec_y3);
589     vec_y = _mm512_add_pd(vec_y,vec_y4);
590     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
591       _mm512_mask_store_pd(&z[8*i],mask,vec_y);
592     } else {
593       _mm512_store_pd(&z[8*i],vec_y);
594     }
595   }
596 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
597   for (i=0; i<totalslices; i++) { /* loop over full slices */
598     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
599     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
600 
601     /* last slice may have padding rows. Don't use vectorization. */
602     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
603       for (r=0; r<(A->rmap->n & 0x07); ++r) {
604         row        = 8*i + r;
605         yval       = (MatScalar)0.0;
606         nnz_in_row = a->rlen[row];
607         for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]];
608         z[row] = y[row] + yval;
609       }
610       break;
611     }
612 
613     vec_y  = _mm256_loadu_pd(y+8*i);
614     vec_y2 = _mm256_loadu_pd(y+8*i+4);
615 
616     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
617     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
618       vec_vals  = _mm256_loadu_pd(aval);
619       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
620       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
621       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
622       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
623       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
624       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
625       vec_y     = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y);
626       aval     += 4;
627 
628       vec_vals  = _mm256_loadu_pd(aval);
629       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
630       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
631       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
632       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
633       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
634       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
635       vec_y2    = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2);
636       aval     += 4;
637     }
638 
639     _mm256_storeu_pd(z+i*8,vec_y);
640     _mm256_storeu_pd(z+i*8+4,vec_y2);
641   }
642 #else
643   for (i=0; i<totalslices; i++) { /* loop over slices */
644     for (j=0; j<8; j++) sum[j] = 0.0;
645     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
646       sum[0] += aval[j] * x[acolidx[j]];
647       sum[1] += aval[j+1] * x[acolidx[j+1]];
648       sum[2] += aval[j+2] * x[acolidx[j+2]];
649       sum[3] += aval[j+3] * x[acolidx[j+3]];
650       sum[4] += aval[j+4] * x[acolidx[j+4]];
651       sum[5] += aval[j+5] * x[acolidx[j+5]];
652       sum[6] += aval[j+6] * x[acolidx[j+6]];
653       sum[7] += aval[j+7] * x[acolidx[j+7]];
654     }
655     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
656       for (j=0; j<(A->rmap->n & 0x07); j++) z[8*i+j] = y[8*i+j] + sum[j];
657     } else {
658       for (j=0; j<8; j++) z[8*i+j] = y[8*i+j] + sum[j];
659     }
660   }
661 #endif
662 
663   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
664   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
665   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
666   PetscFunctionReturn(0);
667 }
668 
669 PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy)
670 {
671   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
672   PetscScalar       *y;
673   const PetscScalar *x;
674   const MatScalar   *aval=a->val;
675   const PetscInt    *acolidx=a->colidx;
676   PetscInt          i,j,r,row,nnz_in_row,totalslices=a->totalslices;
677   PetscErrorCode    ierr;
678 
679 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
680 #pragma disjoint(*x,*y,*aval)
681 #endif
682 
683   PetscFunctionBegin;
684   if (zz != yy) { ierr = VecCopy(zz,yy);CHKERRQ(ierr); }
685   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
686   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
687   for (i=0; i<a->totalslices; i++) { /* loop over slices */
688     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
689       for (r=0; r<(A->rmap->n & 0x07); ++r) {
690         row        = 8*i + r;
691         nnz_in_row = a->rlen[row];
692         for (j=0; j<nnz_in_row; ++j) y[acolidx[8*j+r]] += aval[8*j+r] * x[row];
693       }
694       break;
695     }
696     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
697       y[acolidx[j]]   += aval[j] * x[8*i];
698       y[acolidx[j+1]] += aval[j+1] * x[8*i+1];
699       y[acolidx[j+2]] += aval[j+2] * x[8*i+2];
700       y[acolidx[j+3]] += aval[j+3] * x[8*i+3];
701       y[acolidx[j+4]] += aval[j+4] * x[8*i+4];
702       y[acolidx[j+5]] += aval[j+5] * x[8*i+5];
703       y[acolidx[j+6]] += aval[j+6] * x[8*i+6];
704       y[acolidx[j+7]] += aval[j+7] * x[8*i+7];
705     }
706   }
707   ierr = PetscLogFlops(2.0*a->sliidx[a->totalslices]);CHKERRQ(ierr);
708   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
709   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
710   PetscFunctionReturn(0);
711 }
712 
713 PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy)
714 {
715   PetscErrorCode ierr;
716 
717   PetscFunctionBegin;
718   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
719   ierr = MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);CHKERRQ(ierr);
720   PetscFunctionReturn(0);
721 }
722 
723 /*
724      Checks for missing diagonals
725 */
726 PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool  *missing,PetscInt *d)
727 {
728   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
729   PetscInt    *diag,i;
730 
731   PetscFunctionBegin;
732   *missing = PETSC_FALSE;
733   if (A->rmap->n > 0 && !(a->colidx)) {
734     *missing = PETSC_TRUE;
735     if (d) *d = 0;
736     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
737   } else {
738     diag = a->diag;
739     for (i=0; i<A->rmap->n; i++) {
740       if (diag[i] == -1) {
741         *missing = PETSC_TRUE;
742         if (d) *d = i;
743         PetscInfo1(A,"Matrix is missing diagonal number %D\n",i);
744         break;
745       }
746     }
747   }
748   PetscFunctionReturn(0);
749 }
750 
751 PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A)
752 {
753   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
754   PetscInt       i,j,m=A->rmap->n,shift;
755   PetscErrorCode ierr;
756 
757   PetscFunctionBegin;
758   if (!a->diag) {
759     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
760     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
761     a->free_diag = PETSC_TRUE;
762   }
763   for (i=0; i<m; i++) { /* loop over rows */
764     shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
765     a->diag[i] = -1;
766     for (j=0; j<a->rlen[i]; j++) {
767       if (a->colidx[shift+j*8] == i) {
768         a->diag[i] = shift+j*8;
769         break;
770       }
771     }
772   }
773   PetscFunctionReturn(0);
774 }
775 
776 /*
777   Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways
778 */
779 PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift)
780 {
781   Mat_SeqSELL    *a=(Mat_SeqSELL*) A->data;
782   PetscInt       i,*diag,m = A->rmap->n;
783   MatScalar      *val = a->val;
784   PetscScalar    *idiag,*mdiag;
785   PetscErrorCode ierr;
786 
787   PetscFunctionBegin;
788   if (a->idiagvalid) PetscFunctionReturn(0);
789   ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr);
790   diag = a->diag;
791   if (!a->idiag) {
792     ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr);
793     ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr);
794     val  = a->val;
795   }
796   mdiag = a->mdiag;
797   idiag = a->idiag;
798 
799   if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) {
800     for (i=0; i<m; i++) {
801       mdiag[i] = val[diag[i]];
802       if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */
803         if (PetscRealPart(fshift)) {
804           ierr = PetscInfo1(A,"Zero diagonal on row %D\n",i);CHKERRQ(ierr);
805           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
806           A->factorerror_zeropivot_value = 0.0;
807           A->factorerror_zeropivot_row   = i;
808         } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i);
809       }
810       idiag[i] = 1.0/val[diag[i]];
811     }
812     ierr = PetscLogFlops(m);CHKERRQ(ierr);
813   } else {
814     for (i=0; i<m; i++) {
815       mdiag[i] = val[diag[i]];
816       idiag[i] = omega/(fshift + val[diag[i]]);
817     }
818     ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr);
819   }
820   a->idiagvalid = PETSC_TRUE;
821   PetscFunctionReturn(0);
822 }
823 
824 PetscErrorCode MatZeroEntries_SeqSELL(Mat A)
825 {
826   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
827   PetscErrorCode ierr;
828 
829   PetscFunctionBegin;
830   ierr = PetscMemzero(a->val,(a->sliidx[a->totalslices])*sizeof(PetscScalar));CHKERRQ(ierr);
831   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
832   PetscFunctionReturn(0);
833 }
834 
835 PetscErrorCode MatDestroy_SeqSELL(Mat A)
836 {
837   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
838   PetscErrorCode ierr;
839 
840   PetscFunctionBegin;
841 #if defined(PETSC_USE_LOG)
842   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz);
843 #endif
844   ierr = MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);CHKERRQ(ierr);
845   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
846   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
847   ierr = PetscFree(a->diag);CHKERRQ(ierr);
848   ierr = PetscFree(a->ibdiag);CHKERRQ(ierr);
849   ierr = PetscFree(a->rlen);CHKERRQ(ierr);
850   ierr = PetscFree(a->sliidx);CHKERRQ(ierr);
851   ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr);
852   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
853   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
854   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
855   ierr = PetscFree2(a->getrowcols,a->getrowvals);CHKERRQ(ierr);
856 
857   ierr = PetscFree(A->data);CHKERRQ(ierr);
858 
859   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
860   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
861   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
862 #if defined(PETSC_HAVE_ELEMENTAL)
863 #endif
864   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);CHKERRQ(ierr);
865   PetscFunctionReturn(0);
866 }
867 
868 PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg)
869 {
870   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
871   PetscErrorCode ierr;
872 
873   PetscFunctionBegin;
874   switch (op) {
875   case MAT_ROW_ORIENTED:
876     a->roworiented = flg;
877     break;
878   case MAT_KEEP_NONZERO_PATTERN:
879     a->keepnonzeropattern = flg;
880     break;
881   case MAT_NEW_NONZERO_LOCATIONS:
882     a->nonew = (flg ? 0 : 1);
883     break;
884   case MAT_NEW_NONZERO_LOCATION_ERR:
885     a->nonew = (flg ? -1 : 0);
886     break;
887   case MAT_NEW_NONZERO_ALLOCATION_ERR:
888     a->nonew = (flg ? -2 : 0);
889     break;
890   case MAT_UNUSED_NONZERO_LOCATION_ERR:
891     a->nounused = (flg ? -1 : 0);
892     break;
893   case MAT_NEW_DIAGONALS:
894   case MAT_IGNORE_OFF_PROC_ENTRIES:
895   case MAT_USE_HASH_TABLE:
896     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
897     break;
898   case MAT_SPD:
899   case MAT_SYMMETRIC:
900   case MAT_STRUCTURALLY_SYMMETRIC:
901   case MAT_HERMITIAN:
902   case MAT_SYMMETRY_ETERNAL:
903     /* These options are handled directly by MatSetOption() */
904     break;
905   default:
906     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
907   }
908   PetscFunctionReturn(0);
909 }
910 
911 PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v)
912 {
913   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
914   PetscInt       i,j,n,shift;
915   PetscScalar    *x,zero=0.0;
916   PetscErrorCode ierr;
917 
918   PetscFunctionBegin;
919   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
920   if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
921 
922   if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) {
923     PetscInt *diag=a->diag;
924     ierr = VecGetArray(v,&x);CHKERRQ(ierr);
925     for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]];
926     ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
927     PetscFunctionReturn(0);
928   }
929 
930   ierr = VecSet(v,zero);CHKERRQ(ierr);
931   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
932   for (i=0; i<n; i++) { /* loop over rows */
933     shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
934     x[i] = 0;
935     for (j=0; j<a->rlen[i]; j++) {
936       if (a->colidx[shift+j*8] == i) {
937         x[i] = a->val[shift+j*8];
938         break;
939       }
940     }
941   }
942   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
943   PetscFunctionReturn(0);
944 }
945 
946 PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr)
947 {
948   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
949   const PetscScalar *l,*r;
950   PetscInt          i,j,m,n,row;
951   PetscErrorCode    ierr;
952 
953   PetscFunctionBegin;
954   if (ll) {
955     /* The local size is used so that VecMPI can be passed to this routine
956        by MatDiagonalScale_MPISELL */
957     ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr);
958     if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
959     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
960     for (i=0; i<a->totalslices; i++) { /* loop over slices */
961       for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
962         a->val[j] *= l[8*i+row];
963       }
964     }
965     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
966     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
967   }
968   if (rr) {
969     ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr);
970     if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
971     ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr);
972     for (i=0; i<a->totalslices; i++) { /* loop over slices */
973       for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) {
974         a->val[j] *= r[a->colidx[j]];
975       }
976     }
977     ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr);
978     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
979   }
980   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
985 
986 PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
987 {
988   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
989   PetscInt    *cp,i,k,low,high,t,row,col,l;
990   PetscInt    shift;
991   MatScalar   *vp;
992 
993   PetscFunctionBegin;
994   for (k=0; k<m; k++) { /* loop over added rows */
995     row = im[k];
996     if (row<0) continue;
997 #if defined(PETSC_USE_DEBUG)
998     if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
999 #endif
1000     shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
1001     cp = a->colidx+shift; /* pointer to the row */
1002     vp = a->val+shift; /* pointer to the row */
1003     for (l=0; l<n; l++) { /* loop over added rows */
1004       col = in[l];
1005       if (col<0) continue;
1006 #if defined(PETSC_USE_DEBUG)
1007       if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1);
1008 #endif
1009       high = a->rlen[row]; low = 0; /* assume unsorted */
1010       while (high-low > 5) {
1011         t = (low+high)/2;
1012         if (*(cp+t*8) > col) high = t;
1013         else low = t;
1014       }
1015       for (i=low; i<high; i++) {
1016         if (*(cp+8*i) > col) break;
1017         if (*(cp+8*i) == col) {
1018           *v++ = *(vp+8*i);
1019           goto finished;
1020         }
1021       }
1022       *v++ = 0.0;
1023     finished:;
1024     }
1025   }
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer)
1030 {
1031   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
1032   PetscInt          i,j,m=A->rmap->n,shift;
1033   const char        *name;
1034   PetscViewerFormat format;
1035   PetscErrorCode    ierr;
1036 
1037   PetscFunctionBegin;
1038   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1039   if (format == PETSC_VIEWER_ASCII_MATLAB) {
1040     PetscInt nofinalvalue = 0;
1041     /*
1042     if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) {
1043       nofinalvalue = 1;
1044     }
1045     */
1046     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1047     ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);CHKERRQ(ierr);
1048     ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);CHKERRQ(ierr);
1049 #if defined(PETSC_USE_COMPLEX)
1050     ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);CHKERRQ(ierr);
1051 #else
1052     ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);CHKERRQ(ierr);
1053 #endif
1054     ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr);
1055 
1056     for (i=0; i<m; i++) {
1057       shift = a->sliidx[i>>3]+(i&0x07);
1058       for (j=0; j<a->rlen[i]; j++) {
1059 #if defined(PETSC_USE_COMPLEX)
1060         ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1061 #else
1062         ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);CHKERRQ(ierr);
1063 #endif
1064       }
1065     }
1066     /*
1067     if (nofinalvalue) {
1068 #if defined(PETSC_USE_COMPLEX)
1069       ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr);
1070 #else
1071       ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr);
1072 #endif
1073     }
1074     */
1075     ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
1076     ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr);
1077     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1078   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
1079     PetscFunctionReturn(0);
1080   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1081     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1082     for (i=0; i<m; i++) {
1083       ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr);
1084       shift = a->sliidx[i>>3]+(i&0x07);
1085       for (j=0; j<a->rlen[i]; j++) {
1086 #if defined(PETSC_USE_COMPLEX)
1087         if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
1088           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1089         } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
1090           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1091         } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) {
1092           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr);
1093         }
1094 #else
1095         if (a->val[shift+8*j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);}
1096 #endif
1097       }
1098       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1099     }
1100     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1101   } else if (format == PETSC_VIEWER_ASCII_DENSE) {
1102     PetscInt    cnt=0,jcnt;
1103     PetscScalar value;
1104 #if defined(PETSC_USE_COMPLEX)
1105     PetscBool   realonly=PETSC_TRUE;
1106     for (i=0; i<a->sliidx[a->totalslices]; i++) {
1107       if (PetscImaginaryPart(a->val[i]) != 0.0) {
1108         realonly = PETSC_FALSE;
1109         break;
1110       }
1111     }
1112 #endif
1113 
1114     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1115     for (i=0; i<m; i++) {
1116       jcnt = 0;
1117       shift = a->sliidx[i>>3]+(i&0x07);
1118       for (j=0; j<A->cmap->n; j++) {
1119         if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) {
1120           value = a->val[cnt++];
1121           jcnt++;
1122         } else {
1123           value = 0.0;
1124         }
1125 #if defined(PETSC_USE_COMPLEX)
1126         if (realonly) {
1127           ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr);
1128         } else {
1129           ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr);
1130         }
1131 #else
1132         ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr);
1133 #endif
1134       }
1135       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1136     }
1137     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1138   } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) {
1139     PetscInt fshift=1;
1140     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1141 #if defined(PETSC_USE_COMPLEX)
1142     ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr);
1143 #else
1144     ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr);
1145 #endif
1146     ierr = PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);CHKERRQ(ierr);
1147     for (i=0; i<m; i++) {
1148       shift = a->sliidx[i>>3]+(i&0x07);
1149       for (j=0; j<a->rlen[i]; j++) {
1150 #if defined(PETSC_USE_COMPLEX)
1151         ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1152 #else
1153         ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);CHKERRQ(ierr);
1154 #endif
1155       }
1156     }
1157     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1158   } else {
1159     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1160     if (A->factortype) {
1161       for (i=0; i<m; i++) {
1162         shift = a->sliidx[i>>3]+(i&0x07);
1163         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr);
1164         /* L part */
1165         for (j=shift; j<a->diag[i]; j+=8) {
1166 #if defined(PETSC_USE_COMPLEX)
1167           if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) {
1168             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr);
1169           } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) {
1170             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr);
1171           } else {
1172             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr);
1173           }
1174 #else
1175           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr);
1176 #endif
1177         }
1178         /* diagonal */
1179         j = a->diag[i];
1180 #if defined(PETSC_USE_COMPLEX)
1181         if (PetscImaginaryPart(a->val[j]) > 0.0) {
1182           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));CHKERRQ(ierr);
1183         } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1184           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));CHKERRQ(ierr);
1185         } else {
1186           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));CHKERRQ(ierr);
1187         }
1188 #else
1189         ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));CHKERRQ(ierr);
1190 #endif
1191 
1192         /* U part */
1193         for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) {
1194 #if defined(PETSC_USE_COMPLEX)
1195           if (PetscImaginaryPart(a->val[j]) > 0.0) {
1196             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr);
1197           } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1198             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr);
1199           } else {
1200             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr);
1201           }
1202 #else
1203           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr);
1204 #endif
1205         }
1206         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1207       }
1208     } else {
1209       for (i=0; i<m; i++) {
1210         shift = a->sliidx[i>>3]+(i&0x07);
1211         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr);
1212         for (j=0; j<a->rlen[i]; j++) {
1213 #if defined(PETSC_USE_COMPLEX)
1214           if (PetscImaginaryPart(a->val[j]) > 0.0) {
1215             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1216           } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1217             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1218           } else {
1219             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr);
1220           }
1221 #else
1222           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);
1223 #endif
1224         }
1225         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1226       }
1227     }
1228     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1229   }
1230   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #include <petscdraw.h>
1235 PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa)
1236 {
1237   Mat               A=(Mat)Aa;
1238   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
1239   PetscInt          i,j,m=A->rmap->n,shift;
1240   int               color;
1241   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1242   PetscViewer       viewer;
1243   PetscViewerFormat format;
1244   PetscErrorCode    ierr;
1245 
1246   PetscFunctionBegin;
1247   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1248   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1249   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1250 
1251   /* loop over matrix elements drawing boxes */
1252 
1253   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1254     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1255     /* Blue for negative, Cyan for zero and  Red for positive */
1256     color = PETSC_DRAW_BLUE;
1257     for (i=0; i<m; i++) {
1258       shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1259       y_l = m - i - 1.0; y_r = y_l + 1.0;
1260       for (j=0; j<a->rlen[i]; j++) {
1261         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1262         if (PetscRealPart(a->val[shift+8*j]) >=  0.) continue;
1263         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1264       }
1265     }
1266     color = PETSC_DRAW_CYAN;
1267     for (i=0; i<m; i++) {
1268       shift = a->sliidx[i>>3]+(i&0x07);
1269       y_l = m - i - 1.0; y_r = y_l + 1.0;
1270       for (j=0; j<a->rlen[i]; j++) {
1271         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1272         if (a->val[shift+8*j] !=  0.) continue;
1273         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1274       }
1275     }
1276     color = PETSC_DRAW_RED;
1277     for (i=0; i<m; i++) {
1278       shift = a->sliidx[i>>3]+(i&0x07);
1279       y_l = m - i - 1.0; y_r = y_l + 1.0;
1280       for (j=0; j<a->rlen[i]; j++) {
1281         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1282         if (PetscRealPart(a->val[shift+8*j]) <=  0.) continue;
1283         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1284       }
1285     }
1286     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1287   } else {
1288     /* use contour shading to indicate magnitude of values */
1289     /* first determine max of all nonzero values */
1290     PetscReal minv=0.0,maxv=0.0;
1291     PetscInt  count=0;
1292     PetscDraw popup;
1293     for (i=0; i<a->sliidx[a->totalslices]; i++) {
1294       if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]);
1295     }
1296     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1297     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1298     ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr);
1299 
1300     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1301     for (i=0; i<m; i++) {
1302       shift = a->sliidx[i>>3]+(i&0x07);
1303       y_l = m - i - 1.0;
1304       y_r = y_l + 1.0;
1305       for (j=0; j<a->rlen[i]; j++) {
1306         x_l = a->colidx[shift+j*8];
1307         x_r = x_l + 1.0;
1308         color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv);
1309         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1310         count++;
1311       }
1312     }
1313     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1314   }
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 #include <petscdraw.h>
1319 PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer)
1320 {
1321   PetscDraw      draw;
1322   PetscReal      xr,yr,xl,yl,h,w;
1323   PetscBool      isnull;
1324   PetscErrorCode ierr;
1325 
1326   PetscFunctionBegin;
1327   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1328   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1329   if (isnull) PetscFunctionReturn(0);
1330 
1331   xr   = A->cmap->n; yr  = A->rmap->n; h = yr/10.0; w = xr/10.0;
1332   xr  += w;          yr += h;         xl = -w;     yl = -h;
1333   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1334   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1335   ierr = PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);CHKERRQ(ierr);
1336   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1337   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer)
1342 {
1343   PetscBool      iascii,isbinary,isdraw;
1344   PetscErrorCode ierr;
1345 
1346   PetscFunctionBegin;
1347   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1348   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1349   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1350   if (iascii) {
1351     ierr = MatView_SeqSELL_ASCII(A,viewer);CHKERRQ(ierr);
1352   } else if (isbinary) {
1353     /* ierr = MatView_SeqSELL_Binary(A,viewer);CHKERRQ(ierr); */
1354   } else if (isdraw) {
1355     ierr = MatView_SeqSELL_Draw(A,viewer);CHKERRQ(ierr);
1356   }
1357   PetscFunctionReturn(0);
1358 }
1359 
1360 PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode)
1361 {
1362   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1363   PetscInt       i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k;
1364   MatScalar      *vp;
1365   PetscErrorCode ierr;
1366 
1367   PetscFunctionBegin;
1368   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1369   /* To do: compress out the unused elements */
1370   ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr);
1371   ierr = PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);CHKERRQ(ierr);
1372   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr);
1373   ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);CHKERRQ(ierr);
1374   /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */
1375   for (i=0; i<a->totalslices; ++i) {
1376     shift = a->sliidx[i];    /* starting index of the slice */
1377     cp    = a->colidx+shift; /* pointer to the column indices of the slice */
1378     vp    = a->val+shift;    /* pointer to the nonzero values of the slice */
1379     for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */
1380       row  = 8*i + row_in_slice;
1381       nrow = a->rlen[row]; /* number of nonzeros in row */
1382       /*
1383         Search for the nearest nonzero. Normally setting the index to zero may cause extra communication.
1384         But if the entire slice are empty, it is fine to use 0 since the index will not be loaded.
1385       */
1386       lastcol = 0;
1387       if (nrow>0) { /* nonempty row */
1388         lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */
1389       } else if (!row_in_slice) { /* first row of the currect slice is empty */
1390         for (j=1;j<8;j++) {
1391           if (a->rlen[8*i+j]) {
1392             lastcol = cp[j];
1393             break;
1394           }
1395         }
1396       } else {
1397         if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */
1398       }
1399 
1400       for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) {
1401         cp[8*k+row_in_slice] = lastcol;
1402         vp[8*k+row_in_slice] = (MatScalar)0;
1403       }
1404     }
1405   }
1406 
1407   A->info.mallocs += a->reallocs;
1408   a->reallocs      = 0;
1409 
1410   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info)
1415 {
1416   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1417 
1418   PetscFunctionBegin;
1419   info->block_size   = 1.0;
1420   info->nz_allocated = (double)a->maxallocmat;
1421   info->nz_used      = (double)a->sliidx[a->totalslices]; /* include padding zeros */
1422   info->nz_unneeded  = (double)(a->maxallocmat-a->sliidx[a->totalslices]);
1423   info->assemblies   = (double)A->num_ass;
1424   info->mallocs      = (double)A->info.mallocs;
1425   info->memory       = ((PetscObject)A)->mem;
1426   if (A->factortype) {
1427     info->fill_ratio_given  = A->info.fill_ratio_given;
1428     info->fill_ratio_needed = A->info.fill_ratio_needed;
1429     info->factor_mallocs    = A->info.factor_mallocs;
1430   } else {
1431     info->fill_ratio_given  = 0;
1432     info->fill_ratio_needed = 0;
1433     info->factor_mallocs    = 0;
1434   }
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1439 {
1440   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1441   PetscInt       shift,i,k,l,low,high,t,ii,row,col,nrow;
1442   PetscInt       *cp,nonew=a->nonew,lastcol=-1;
1443   MatScalar      *vp,value;
1444   PetscErrorCode ierr;
1445 
1446   PetscFunctionBegin;
1447   for (k=0; k<m; k++) { /* loop over added rows */
1448     row = im[k];
1449     if (row < 0) continue;
1450 #if defined(PETSC_USE_DEBUG)
1451     if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
1452 #endif
1453     shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
1454     cp    = a->colidx+shift; /* pointer to the row */
1455     vp    = a->val+shift; /* pointer to the row */
1456     nrow  = a->rlen[row];
1457     low   = 0;
1458     high  = nrow;
1459 
1460     for (l=0; l<n; l++) { /* loop over added columns */
1461       col = in[l];
1462       if (col<0) continue;
1463 #if defined(PETSC_USE_DEBUG)
1464       if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1);
1465 #endif
1466       if (a->roworiented) {
1467         value = v[l+k*n];
1468       } else {
1469         value = v[k+l*m];
1470       }
1471       if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue;
1472 
1473       /* search in this row for the specified colmun, i indicates the column to be set */
1474       if (col <= lastcol) low = 0;
1475       else high = nrow;
1476       lastcol = col;
1477       while (high-low > 5) {
1478         t = (low+high)/2;
1479         if (*(cp+t*8) > col) high = t;
1480         else low = t;
1481       }
1482       for (i=low; i<high; i++) {
1483         if (*(cp+i*8) > col) break;
1484         if (*(cp+i*8) == col) {
1485           if (is == ADD_VALUES) *(vp+i*8) += value;
1486           else *(vp+i*8) = value;
1487           low = i + 1;
1488           goto noinsert;
1489         }
1490       }
1491       if (value == 0.0 && a->ignorezeroentries) goto noinsert;
1492       if (nonew == 1) goto noinsert;
1493       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1494       /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */
1495       MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar);
1496       /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */
1497       for (ii=nrow-1; ii>=i; ii--) {
1498         *(cp+(ii+1)*8) = *(cp+ii*8);
1499         *(vp+(ii+1)*8) = *(vp+ii*8);
1500       }
1501       a->rlen[row]++;
1502       *(cp+i*8) = col;
1503       *(vp+i*8) = value;
1504       a->nz++;
1505       A->nonzerostate++;
1506       low = i+1; high++; nrow++;
1507 noinsert:;
1508     }
1509     a->rlen[row] = nrow;
1510   }
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str)
1515 {
1516   PetscErrorCode ierr;
1517 
1518   PetscFunctionBegin;
1519   /* If the two matrices have the same copy implementation, use fast copy. */
1520   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1521     Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1522     Mat_SeqSELL *b=(Mat_SeqSELL*)B->data;
1523 
1524     if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1525     ierr = PetscMemcpy(b->val,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr);
1526   } else {
1527     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1528   }
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 PetscErrorCode MatSetUp_SeqSELL(Mat A)
1533 {
1534   PetscErrorCode ierr;
1535 
1536   PetscFunctionBegin;
1537   ierr = MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,0);CHKERRQ(ierr);
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[])
1542 {
1543   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1544 
1545   PetscFunctionBegin;
1546   *array = a->val;
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[])
1551 {
1552   PetscFunctionBegin;
1553   PetscFunctionReturn(0);
1554 }
1555 
1556 PetscErrorCode MatRealPart_SeqSELL(Mat A)
1557 {
1558   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1559   PetscInt    i;
1560   MatScalar   *aval=a->val;
1561 
1562   PetscFunctionBegin;
1563   for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]);
1564   PetscFunctionReturn(0);
1565 }
1566 
1567 PetscErrorCode MatImaginaryPart_SeqSELL(Mat A)
1568 {
1569   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1570   PetscInt       i;
1571   MatScalar      *aval=a->val;
1572   PetscErrorCode ierr;
1573 
1574   PetscFunctionBegin;
1575   for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]);
1576   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha)
1581 {
1582   Mat_SeqSELL    *a=(Mat_SeqSELL*)inA->data;
1583   MatScalar      *aval=a->val;
1584   PetscScalar    oalpha=alpha;
1585   PetscBLASInt   one=1,size;
1586   PetscErrorCode ierr;
1587 
1588   PetscFunctionBegin;
1589   ierr = PetscBLASIntCast(a->sliidx[a->totalslices],&size);CHKERRQ(ierr);
1590   PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one));
1591   ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1592   ierr = MatSeqSELLInvalidateDiagonal(inA);CHKERRQ(ierr);
1593   PetscFunctionReturn(0);
1594 }
1595 
1596 PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a)
1597 {
1598   Mat_SeqSELL    *y=(Mat_SeqSELL*)Y->data;
1599   PetscErrorCode ierr;
1600 
1601   PetscFunctionBegin;
1602   if (!Y->preallocated || !y->nz) {
1603     ierr = MatSeqSELLSetPreallocation(Y,1,NULL);CHKERRQ(ierr);
1604   }
1605   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1606   PetscFunctionReturn(0);
1607 }
1608 
1609 PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1610 {
1611   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
1612   PetscScalar       *x,sum,*t;
1613   const MatScalar   *idiag=0,*mdiag;
1614   const PetscScalar *b,*xb;
1615   PetscInt          n,m=A->rmap->n,i,j,shift;
1616   const PetscInt    *diag;
1617   PetscErrorCode    ierr;
1618 
1619   PetscFunctionBegin;
1620   its = its*lits;
1621 
1622   if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */
1623   if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqSELL(A,omega,fshift);CHKERRQ(ierr);}
1624   a->fshift = fshift;
1625   a->omega  = omega;
1626 
1627   diag  = a->diag;
1628   t     = a->ssor_work;
1629   idiag = a->idiag;
1630   mdiag = a->mdiag;
1631 
1632   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1633   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1634   /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */
1635   if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented");
1636   if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented");
1637   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
1638 
1639   if (flag & SOR_ZERO_INITIAL_GUESS) {
1640     if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1641       for (i=0; i<m; i++) {
1642         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1643         sum   = b[i];
1644         n     = (diag[i]-shift)/8;
1645         for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1646         t[i]  = sum;
1647         x[i]  = sum*idiag[i];
1648       }
1649       xb   = t;
1650       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1651     } else xb = b;
1652     if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1653       for (i=m-1; i>=0; i--) {
1654         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1655         sum   = xb[i];
1656         n     = a->rlen[i]-(diag[i]-shift)/8-1;
1657         for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1658         if (xb == b) {
1659           x[i] = sum*idiag[i];
1660         } else {
1661           x[i] = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1662         }
1663       }
1664       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */
1665     }
1666     its--;
1667   }
1668   while (its--) {
1669     if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1670       for (i=0; i<m; i++) {
1671         /* lower */
1672         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1673         sum   = b[i];
1674         n     = (diag[i]-shift)/8;
1675         for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1676         t[i]  = sum;             /* save application of the lower-triangular part */
1677         /* upper */
1678         n     = a->rlen[i]-(diag[i]-shift)/8-1;
1679         for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1680         x[i]  = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1681       }
1682       xb   = t;
1683       ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1684     } else xb = b;
1685     if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1686       for (i=m-1; i>=0; i--) {
1687         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1688         sum = xb[i];
1689         if (xb == b) {
1690           /* whole matrix (no checkpointing available) */
1691           n     = a->rlen[i];
1692           for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1693           x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i];
1694         } else { /* lower-triangular part has been saved, so only apply upper-triangular */
1695           n     = a->rlen[i]-(diag[i]-shift)/8-1;
1696           for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1697           x[i]  = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1698         }
1699       }
1700       if (xb == b) {
1701         ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1702       } else {
1703         ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */
1704       }
1705     }
1706   }
1707   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1708   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1709   PetscFunctionReturn(0);
1710 }
1711 
1712 /* -------------------------------------------------------------------*/
1713 static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL,
1714                                        MatGetRow_SeqSELL,
1715                                        MatRestoreRow_SeqSELL,
1716                                        MatMult_SeqSELL,
1717                                /* 4*/  MatMultAdd_SeqSELL,
1718                                        MatMultTranspose_SeqSELL,
1719                                        MatMultTransposeAdd_SeqSELL,
1720                                        0,
1721                                        0,
1722                                        0,
1723                                /* 10*/ 0,
1724                                        0,
1725                                        0,
1726                                        MatSOR_SeqSELL,
1727                                        0,
1728                                /* 15*/ MatGetInfo_SeqSELL,
1729                                        MatEqual_SeqSELL,
1730                                        MatGetDiagonal_SeqSELL,
1731                                        MatDiagonalScale_SeqSELL,
1732                                        0,
1733                                /* 20*/ 0,
1734                                        MatAssemblyEnd_SeqSELL,
1735                                        MatSetOption_SeqSELL,
1736                                        MatZeroEntries_SeqSELL,
1737                                /* 24*/ 0,
1738                                        0,
1739                                        0,
1740                                        0,
1741                                        0,
1742                                /* 29*/ MatSetUp_SeqSELL,
1743                                        0,
1744                                        0,
1745                                        0,
1746                                        0,
1747                                /* 34*/ MatDuplicate_SeqSELL,
1748                                        0,
1749                                        0,
1750                                        0,
1751                                        0,
1752                                /* 39*/ 0,
1753                                        0,
1754                                        0,
1755                                        MatGetValues_SeqSELL,
1756                                        MatCopy_SeqSELL,
1757                                /* 44*/ 0,
1758                                        MatScale_SeqSELL,
1759                                        MatShift_SeqSELL,
1760                                        0,
1761                                        0,
1762                                /* 49*/ 0,
1763                                        0,
1764                                        0,
1765                                        0,
1766                                        0,
1767                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
1768                                        0,
1769                                        0,
1770                                        0,
1771                                        0,
1772                                /* 59*/ 0,
1773                                        MatDestroy_SeqSELL,
1774                                        MatView_SeqSELL,
1775                                        0,
1776                                        0,
1777                                /* 64*/ 0,
1778                                        0,
1779                                        0,
1780                                        0,
1781                                        0,
1782                                /* 69*/ 0,
1783                                        0,
1784                                        0,
1785                                        0,
1786                                        0,
1787                                /* 74*/ 0,
1788                                        MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */
1789                                        0,
1790                                        0,
1791                                        0,
1792                                /* 79*/ 0,
1793                                        0,
1794                                        0,
1795                                        0,
1796                                        0,
1797                                /* 84*/ 0,
1798                                        0,
1799                                        0,
1800                                        0,
1801                                        0,
1802                                /* 89*/ 0,
1803                                        0,
1804                                        0,
1805                                        0,
1806                                        0,
1807                                /* 94*/ 0,
1808                                        0,
1809                                        0,
1810                                        0,
1811                                        0,
1812                                /* 99*/ 0,
1813                                        0,
1814                                        0,
1815                                        MatConjugate_SeqSELL,
1816                                        0,
1817                                /*104*/ 0,
1818                                        0,
1819                                        0,
1820                                        0,
1821                                        0,
1822                                /*109*/ 0,
1823                                        0,
1824                                        0,
1825                                        0,
1826                                        MatMissingDiagonal_SeqSELL,
1827                                /*114*/ 0,
1828                                        0,
1829                                        0,
1830                                        0,
1831                                        0,
1832                                /*119*/ 0,
1833                                        0,
1834                                        0,
1835                                        0,
1836                                        0,
1837                                /*124*/ 0,
1838                                        0,
1839                                        0,
1840                                        0,
1841                                        0,
1842                                /*129*/ 0,
1843                                        0,
1844                                        0,
1845                                        0,
1846                                        0,
1847                                /*134*/ 0,
1848                                        0,
1849                                        0,
1850                                        0,
1851                                        0,
1852                                /*139*/ 0,
1853                                        0,
1854                                        0,
1855                                        MatFDColoringSetUp_SeqXAIJ,
1856                                        0,
1857                                 /*144*/0
1858 };
1859 
1860 PetscErrorCode MatStoreValues_SeqSELL(Mat mat)
1861 {
1862   Mat_SeqSELL    *a=(Mat_SeqSELL*)mat->data;
1863   PetscErrorCode ierr;
1864 
1865   PetscFunctionBegin;
1866   if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1867 
1868   /* allocate space for values if not already there */
1869   if (!a->saved_values) {
1870     ierr = PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);CHKERRQ(ierr);
1871     ierr = PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));CHKERRQ(ierr);
1872   }
1873 
1874   /* copy values over */
1875   ierr = PetscMemcpy(a->saved_values,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr);
1876   PetscFunctionReturn(0);
1877 }
1878 
1879 PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat)
1880 {
1881   Mat_SeqSELL    *a=(Mat_SeqSELL*)mat->data;
1882   PetscErrorCode ierr;
1883 
1884   PetscFunctionBegin;
1885   if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1886   if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1887   /* copy values over */
1888   ierr = PetscMemcpy(a->val,a->saved_values,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr);
1889   PetscFunctionReturn(0);
1890 }
1891 
1892 /*@C
1893  MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray()
1894 
1895  Not Collective
1896 
1897  Input Parameters:
1898  .  mat - a MATSEQSELL matrix
1899  .  array - pointer to the data
1900 
1901  Level: intermediate
1902 
1903  .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90()
1904  @*/
1905 PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array)
1906 {
1907   PetscErrorCode ierr;
1908 
1909   PetscFunctionBegin;
1910   ierr = PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B)
1915 {
1916   Mat_SeqSELL    *b;
1917   PetscMPIInt    size;
1918   PetscErrorCode ierr;
1919 
1920   PetscFunctionBegin;
1921   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
1922   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1");
1923 
1924   ierr = PetscNewLog(B,&b);CHKERRQ(ierr);
1925 
1926   B->data = (void*)b;
1927 
1928   ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1929 
1930   b->row                = 0;
1931   b->col                = 0;
1932   b->icol               = 0;
1933   b->reallocs           = 0;
1934   b->ignorezeroentries  = PETSC_FALSE;
1935   b->roworiented        = PETSC_TRUE;
1936   b->nonew              = 0;
1937   b->diag               = 0;
1938   b->solve_work         = 0;
1939   B->spptr              = 0;
1940   b->saved_values       = 0;
1941   b->idiag              = 0;
1942   b->mdiag              = 0;
1943   b->ssor_work          = 0;
1944   b->omega              = 1.0;
1945   b->fshift             = 0.0;
1946   b->idiagvalid         = PETSC_FALSE;
1947   b->ibdiagvalid        = PETSC_FALSE;
1948   b->keepnonzeropattern = PETSC_FALSE;
1949 
1950   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);CHKERRQ(ierr);
1951   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);CHKERRQ(ierr);
1952   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);CHKERRQ(ierr);
1953   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);CHKERRQ(ierr);
1954   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);CHKERRQ(ierr);
1955   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);CHKERRQ(ierr);
1956   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);CHKERRQ(ierr);
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 /*
1961  Given a matrix generated with MatGetFactor() duplicates all the information in A into B
1962  */
1963 PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
1964 {
1965   Mat_SeqSELL    *c,*a=(Mat_SeqSELL*)A->data;
1966   PetscInt       i,m=A->rmap->n;
1967   PetscInt       totalslices=a->totalslices;
1968   PetscErrorCode ierr;
1969 
1970   PetscFunctionBegin;
1971   c = (Mat_SeqSELL*)C->data;
1972 
1973   C->factortype = A->factortype;
1974   c->row        = 0;
1975   c->col        = 0;
1976   c->icol       = 0;
1977   c->reallocs   = 0;
1978 
1979   C->assembled = PETSC_TRUE;
1980 
1981   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
1982   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
1983 
1984   ierr = PetscMalloc1(8*totalslices,&c->rlen);CHKERRQ(ierr);
1985   ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr);
1986   ierr = PetscMalloc1(totalslices+1,&c->sliidx);CHKERRQ(ierr);
1987   ierr = PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr);
1988 
1989   for (i=0; i<m; i++) c->rlen[i] = a->rlen[i];
1990   for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i];
1991 
1992   /* allocate the matrix space */
1993   if (mallocmatspace) {
1994     ierr = PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);CHKERRQ(ierr);
1995     ierr = PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
1996 
1997     c->singlemalloc = PETSC_TRUE;
1998 
1999     if (m > 0) {
2000       ierr = PetscMemcpy(c->colidx,a->colidx,(a->maxallocmat)*sizeof(PetscInt));CHKERRQ(ierr);
2001       if (cpvalues == MAT_COPY_VALUES) {
2002         ierr = PetscMemcpy(c->val,a->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr);
2003       } else {
2004         ierr = PetscMemzero(c->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr);
2005       }
2006     }
2007   }
2008 
2009   c->ignorezeroentries = a->ignorezeroentries;
2010   c->roworiented       = a->roworiented;
2011   c->nonew             = a->nonew;
2012   if (a->diag) {
2013     ierr = PetscMalloc1(m,&c->diag);CHKERRQ(ierr);
2014     ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr);
2015     for (i=0; i<m; i++) {
2016       c->diag[i] = a->diag[i];
2017     }
2018   } else c->diag = 0;
2019 
2020   c->solve_work         = 0;
2021   c->saved_values       = 0;
2022   c->idiag              = 0;
2023   c->ssor_work          = 0;
2024   c->keepnonzeropattern = a->keepnonzeropattern;
2025   c->free_val           = PETSC_TRUE;
2026   c->free_colidx        = PETSC_TRUE;
2027 
2028   c->maxallocmat  = a->maxallocmat;
2029   c->maxallocrow  = a->maxallocrow;
2030   c->rlenmax      = a->rlenmax;
2031   c->nz           = a->nz;
2032   C->preallocated = PETSC_TRUE;
2033 
2034   c->nonzerorowcnt = a->nonzerorowcnt;
2035   C->nonzerostate  = A->nonzerostate;
2036 
2037   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2038   PetscFunctionReturn(0);
2039 }
2040 
2041 PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B)
2042 {
2043   PetscErrorCode ierr;
2044 
2045   PetscFunctionBegin;
2046   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
2047   ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr);
2048   if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) {
2049     ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr);
2050   }
2051   ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2052   ierr = MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
2053   PetscFunctionReturn(0);
2054 }
2055 
2056 /*@C
2057  MatCreateSeqSELL - Creates a sparse matrix in SELL format.
2058 
2059  Collective on MPI_Comm
2060 
2061  Input Parameters:
2062  +  comm - MPI communicator, set to PETSC_COMM_SELF
2063  .  m - number of rows
2064  .  n - number of columns
2065  .  rlenmax - maximum number of nonzeros in a row
2066  -  rlen - array containing the number of nonzeros in the various rows
2067  (possibly different for each row) or NULL
2068 
2069  Output Parameter:
2070  .  A - the matrix
2071 
2072  It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2073  MatXXXXSetPreallocation() paradgm instead of this routine directly.
2074  [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation]
2075 
2076  Notes:
2077  If nnz is given then nz is ignored
2078 
2079  Specify the preallocated storage with either rlenmax or rlen (not both).
2080  Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory
2081  allocation.  For large problems you MUST preallocate memory or you
2082  will get TERRIBLE performance, see the users' manual chapter on matrices.
2083 
2084  Level: intermediate
2085 
2086  .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatCreateSeqSELLWithArrays()
2087 
2088  @*/
2089 PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A)
2090 {
2091   PetscErrorCode ierr;
2092 
2093   PetscFunctionBegin;
2094   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2095   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2096   ierr = MatSetType(*A,MATSEQSELL);CHKERRQ(ierr);
2097   ierr = MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);CHKERRQ(ierr);
2098   PetscFunctionReturn(0);
2099 }
2100 
2101 PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg)
2102 {
2103   Mat_SeqSELL     *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data;
2104   PetscInt       totalslices=a->totalslices;
2105   PetscErrorCode ierr;
2106 
2107   PetscFunctionBegin;
2108   /* If the  matrix dimensions are not equal,or no of nonzeros */
2109   if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) {
2110     *flg = PETSC_FALSE;
2111     PetscFunctionReturn(0);
2112   }
2113   /* if the a->colidx are the same */
2114   ierr = PetscMemcmp(a->colidx,b->colidx,a->sliidx[totalslices]*sizeof(PetscInt),flg);CHKERRQ(ierr);
2115   if (!*flg) PetscFunctionReturn(0);
2116   /* if a->val are the same */
2117   ierr = PetscMemcmp(a->val,b->val,a->sliidx[totalslices]*sizeof(PetscScalar),flg);CHKERRQ(ierr);
2118   PetscFunctionReturn(0);
2119 }
2120 
2121 PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A)
2122 {
2123   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
2124 
2125   PetscFunctionBegin;
2126   a->idiagvalid  = PETSC_FALSE;
2127   a->ibdiagvalid = PETSC_FALSE;
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 PetscErrorCode MatConjugate_SeqSELL(Mat A)
2132 {
2133 #if defined(PETSC_USE_COMPLEX)
2134   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
2135   PetscInt    i;
2136   PetscScalar *val = a->val;
2137 
2138   PetscFunctionBegin;
2139   for (i=0; i<a->sliidx[a->totalslices]; i++) {
2140     val[i] = PetscConj(val[i]);
2141   }
2142 #else
2143   PetscFunctionBegin;
2144 #endif
2145   PetscFunctionReturn(0);
2146 }
2147