xref: /petsc/src/mat/impls/baij/seq/baij2.c (revision d543d7c6650afe38e02d4e42c97e77acf4b0e00a)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/seq/baij.h"
4 #include "../src/mat/blockinvert.h"
5 #include "petscbt.h"
6 
7 #undef __FUNCT__
8 #define __FUNCT__ "MatIncreaseOverlap_SeqBAIJ"
9 PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
10 {
11   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
12   PetscErrorCode ierr;
13   PetscInt       row,i,j,k,l,m,n,*nidx,isz,val,ival;
14   const PetscInt *idx;
15   PetscInt       start,end,*ai,*aj,bs,*nidx2;
16   PetscBT        table;
17 
18   PetscFunctionBegin;
19   m     = a->mbs;
20   ai    = a->i;
21   aj    = a->j;
22   bs    = A->rmap->bs;
23 
24   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
25 
26   ierr = PetscBTCreate(m,table);CHKERRQ(ierr);
27   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr);
28   ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr);
29 
30   for (i=0; i<is_max; i++) {
31     /* Initialise the two local arrays */
32     isz  = 0;
33     ierr = PetscBTMemzero(m,table);CHKERRQ(ierr);
34 
35     /* Extract the indices, assume there can be duplicate entries */
36     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
37     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
38 
39     /* Enter these into the temp arrays i.e mark table[row], enter row into new index */
40     for (j=0; j<n ; ++j){
41       ival = idx[j]/bs; /* convert the indices into block indices */
42       if (ival>=m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
43       if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;}
44     }
45     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
46     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
47 
48     k = 0;
49     for (j=0; j<ov; j++){ /* for each overlap*/
50       n = isz;
51       for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */
52         row   = nidx[k];
53         start = ai[row];
54         end   = ai[row+1];
55         for (l = start; l<end ; l++){
56           val = aj[l];
57           if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;}
58         }
59       }
60     }
61     /* expand the Index Set */
62     for (j=0; j<isz; j++) {
63       for (k=0; k<bs; k++)
64         nidx2[j*bs+k] = nidx[j]*bs+k;
65     }
66     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
67   }
68   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
69   ierr = PetscFree(nidx);CHKERRQ(ierr);
70   ierr = PetscFree(nidx2);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ_Private"
76 PetscErrorCode MatGetSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
77 {
78   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*c;
79   PetscErrorCode ierr;
80   PetscInt       *smap,i,k,kstart,kend,oldcols = a->nbs,*lens;
81   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
82   const PetscInt *irow,*icol;
83   PetscInt       nrows,ncols,*ssmap,bs=A->rmap->bs,bs2=a->bs2;
84   PetscInt       *aj = a->j,*ai = a->i;
85   MatScalar      *mat_a;
86   Mat            C;
87   PetscTruth     flag,sorted;
88 
89   PetscFunctionBegin;
90   ierr = ISSorted(iscol,&sorted);CHKERRQ(ierr);
91   if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
92 
93   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
94   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
95   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
96   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
97 
98   ierr = PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);CHKERRQ(ierr);
99   ssmap = smap;
100   ierr = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
101   ierr  = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr);
102   for (i=0; i<ncols; i++) smap[icol[i]] = i+1;
103   /* determine lens of each row */
104   for (i=0; i<nrows; i++) {
105     kstart  = ai[irow[i]];
106     kend    = kstart + a->ilen[irow[i]];
107     lens[i] = 0;
108       for (k=kstart; k<kend; k++) {
109         if (ssmap[aj[k]]) {
110           lens[i]++;
111         }
112       }
113     }
114   /* Create and fill new matrix */
115   if (scall == MAT_REUSE_MATRIX) {
116     c = (Mat_SeqBAIJ *)((*B)->data);
117 
118     if (c->mbs!=nrows || c->nbs!=ncols || (*B)->rmap->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
119     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr);
120     if (!flag) {
121       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
122     }
123     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr);
124     C = *B;
125   } else {
126     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
127     ierr = MatSetSizes(C,nrows*bs,ncols*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
128     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
129     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,lens);CHKERRQ(ierr);
130   }
131   c = (Mat_SeqBAIJ *)(C->data);
132   for (i=0; i<nrows; i++) {
133     row    = irow[i];
134     kstart = ai[row];
135     kend   = kstart + a->ilen[row];
136     mat_i  = c->i[i];
137     mat_j  = c->j + mat_i;
138     mat_a  = c->a + mat_i*bs2;
139     mat_ilen = c->ilen + i;
140     for (k=kstart; k<kend; k++) {
141       if ((tcol=ssmap[a->j[k]])) {
142         *mat_j++ = tcol - 1;
143         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
144         mat_a   += bs2;
145         (*mat_ilen)++;
146       }
147     }
148   }
149 
150   /* Free work space */
151   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
152   ierr = PetscFree(smap);CHKERRQ(ierr);
153   ierr = PetscFree(lens);CHKERRQ(ierr);
154   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
155   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
156 
157   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
158   *B = C;
159   PetscFunctionReturn(0);
160 }
161 
162 #undef __FUNCT__
163 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ"
164 PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
165 {
166   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
167   IS             is1,is2;
168   PetscErrorCode ierr;
169   PetscInt       *vary,*iary,nrows,ncols,i,bs=A->rmap->bs,count;
170   const PetscInt *irow,*icol;
171 
172   PetscFunctionBegin;
173   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
174   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
175   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
176   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
177 
178   /* Verify if the indices corespond to each element in a block
179    and form the IS with compressed IS */
180   ierr = PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);CHKERRQ(ierr);
181   iary = vary + a->mbs;
182   ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr);
183   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
184   count = 0;
185   for (i=0; i<a->mbs; i++) {
186     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Index set does not match blocks");
187     if (vary[i]==bs) iary[count++] = i;
188   }
189   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
190 
191   ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr);
192   for (i=0; i<ncols; i++) vary[icol[i]/bs]++;
193   count = 0;
194   for (i=0; i<a->mbs; i++) {
195     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_PLIB,"Internal error in PETSc");
196     if (vary[i]==bs) iary[count++] = i;
197   }
198   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is2);CHKERRQ(ierr);
199   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
200   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
201   ierr = PetscFree(vary);CHKERRQ(ierr);
202 
203   ierr = MatGetSubMatrix_SeqBAIJ_Private(A,is1,is2,scall,B);CHKERRQ(ierr);
204   ISDestroy(is1);
205   ISDestroy(is2);
206   PetscFunctionReturn(0);
207 }
208 
209 #undef __FUNCT__
210 #define __FUNCT__ "MatGetSubMatrices_SeqBAIJ"
211 PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
212 {
213   PetscErrorCode ierr;
214   PetscInt       i;
215 
216   PetscFunctionBegin;
217   if (scall == MAT_INITIAL_MATRIX) {
218     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
219   }
220 
221   for (i=0; i<n; i++) {
222     ierr = MatGetSubMatrix_SeqBAIJ(A,irow[i],icol[i],scall,&(*B)[i]);CHKERRQ(ierr);
223   }
224   PetscFunctionReturn(0);
225 }
226 
227 
228 /* -------------------------------------------------------*/
229 /* Should check that shapes of vectors and matrices match */
230 /* -------------------------------------------------------*/
231 #include "petscblaslapack.h"
232 
233 #undef __FUNCT__
234 #define __FUNCT__ "MatMult_SeqBAIJ_1"
235 PetscErrorCode MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
236 {
237   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
238   PetscScalar       *z,sum;
239   const PetscScalar *x;
240   const MatScalar   *v;
241   PetscErrorCode    ierr;
242   PetscInt          mbs,i,n,nonzerorow=0;
243   const PetscInt    *idx,*ii,*ridx=PETSC_NULL;
244   PetscTruth        usecprow=a->compressedrow.use;
245 
246   PetscFunctionBegin;
247   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
248   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
249 
250   if (usecprow){
251     mbs  = a->compressedrow.nrows;
252     ii   = a->compressedrow.i;
253     ridx = a->compressedrow.rindex;
254     ierr = PetscMemzero(z,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
255   } else {
256     mbs = a->mbs;
257     ii  = a->i;
258   }
259 
260   for (i=0; i<mbs; i++) {
261     n    = ii[1] - ii[0];
262     v    = a->a + ii[0];
263     idx  = a->j + ii[0];
264     ii++;
265     sum  = 0.0;
266     nonzerorow += (n>0);
267     PetscSparseDensePlusDot(sum,x,v,idx,n);
268     if (usecprow){
269       z[ridx[i]] = sum;
270     } else {
271       z[i] = sum;
272     }
273   }
274   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
275   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
276   ierr = PetscLogFlops(2.0*a->nz - nonzerorow);CHKERRQ(ierr);
277   PetscFunctionReturn(0);
278 }
279 
280 #undef __FUNCT__
281 #define __FUNCT__ "MatMult_SeqBAIJ_2"
282 PetscErrorCode MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
283 {
284   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
285   PetscScalar       *z = 0,sum1,sum2,*zarray;
286   const PetscScalar *x,*xb;
287   PetscScalar       x1,x2;
288   const MatScalar   *v;
289   PetscErrorCode    ierr;
290   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
291   PetscTruth        usecprow=a->compressedrow.use;
292 
293   PetscFunctionBegin;
294   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
295   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
296 
297   idx = a->j;
298   v   = a->a;
299   if (usecprow){
300     mbs  = a->compressedrow.nrows;
301     ii   = a->compressedrow.i;
302     ridx = a->compressedrow.rindex;
303   } else {
304     mbs = a->mbs;
305     ii  = a->i;
306     z   = zarray;
307   }
308 
309   for (i=0; i<mbs; i++) {
310     n  = ii[1] - ii[0]; ii++;
311     sum1 = 0.0; sum2 = 0.0;
312     nonzerorow += (n>0);
313     for (j=0; j<n; j++) {
314       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
315       sum1 += v[0]*x1 + v[2]*x2;
316       sum2 += v[1]*x1 + v[3]*x2;
317       v += 4;
318     }
319     if (usecprow) z = zarray + 2*ridx[i];
320     z[0] = sum1; z[1] = sum2;
321     if (!usecprow) z += 2;
322   }
323   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
324   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
325   ierr = PetscLogFlops(8.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
326   PetscFunctionReturn(0);
327 }
328 
329 #undef __FUNCT__
330 #define __FUNCT__ "MatMult_SeqBAIJ_3"
331 PetscErrorCode MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
332 {
333   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
334   PetscScalar       *z = 0,sum1,sum2,sum3,x1,x2,x3,*zarray;
335   const PetscScalar *x,*xb;
336   const MatScalar   *v;
337   PetscErrorCode    ierr;
338   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
339   PetscTruth        usecprow=a->compressedrow.use;
340 
341 
342 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
343 #pragma disjoint(*v,*z,*xb)
344 #endif
345 
346   PetscFunctionBegin;
347   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
348   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
349 
350   idx = a->j;
351   v   = a->a;
352   if (usecprow){
353     mbs  = a->compressedrow.nrows;
354     ii   = a->compressedrow.i;
355     ridx = a->compressedrow.rindex;
356   } else {
357     mbs = a->mbs;
358     ii  = a->i;
359     z   = zarray;
360   }
361 
362   for (i=0; i<mbs; i++) {
363     n  = ii[1] - ii[0]; ii++;
364     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
365     nonzerorow += (n>0);
366     for (j=0; j<n; j++) {
367       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
368       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
369       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
370       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
371       v += 9;
372     }
373     if (usecprow) z = zarray + 3*ridx[i];
374     z[0] = sum1; z[1] = sum2; z[2] = sum3;
375     if (!usecprow) z += 3;
376   }
377   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
378   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
379   ierr = PetscLogFlops(18.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
380   PetscFunctionReturn(0);
381 }
382 
383 #undef __FUNCT__
384 #define __FUNCT__ "MatMult_SeqBAIJ_4"
385 PetscErrorCode MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
386 {
387   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
388   PetscScalar       *z = 0,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*zarray;
389   const PetscScalar *x,*xb;
390   const MatScalar   *v;
391   PetscErrorCode    ierr;
392   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
393   PetscTruth        usecprow=a->compressedrow.use;
394 
395   PetscFunctionBegin;
396   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
397   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
398 
399   idx = a->j;
400   v   = a->a;
401   if (usecprow){
402     mbs  = a->compressedrow.nrows;
403     ii   = a->compressedrow.i;
404     ridx = a->compressedrow.rindex;
405   } else {
406     mbs = a->mbs;
407     ii  = a->i;
408     z   = zarray;
409   }
410 
411   for (i=0; i<mbs; i++) {
412     n  = ii[1] - ii[0]; ii++;
413     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
414     nonzerorow += (n>0);
415     for (j=0; j<n; j++) {
416       xb = x + 4*(*idx++);
417       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
418       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
419       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
420       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
421       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
422       v += 16;
423     }
424     if (usecprow) z = zarray + 4*ridx[i];
425     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
426     if (!usecprow) z += 4;
427   }
428   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
429   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
430   ierr = PetscLogFlops(32.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 #undef __FUNCT__
435 #define __FUNCT__ "MatMult_SeqBAIJ_5"
436 PetscErrorCode MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
437 {
438   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
439   PetscScalar       sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*z = 0,*zarray;
440   const PetscScalar *xb,*x;
441   const MatScalar   *v;
442   PetscErrorCode    ierr;
443   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
444   PetscTruth        usecprow=a->compressedrow.use;
445 
446   PetscFunctionBegin;
447   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
448   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
449 
450   idx = a->j;
451   v   = a->a;
452   if (usecprow){
453     mbs  = a->compressedrow.nrows;
454     ii   = a->compressedrow.i;
455     ridx = a->compressedrow.rindex;
456   } else {
457     mbs = a->mbs;
458     ii  = a->i;
459     z   = zarray;
460   }
461 
462   for (i=0; i<mbs; i++) {
463     n  = ii[1] - ii[0]; ii++;
464     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
465     nonzerorow += (n>0);
466     for (j=0; j<n; j++) {
467       xb = x + 5*(*idx++);
468       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
469       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
470       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
471       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
472       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
473       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
474       v += 25;
475     }
476     if (usecprow) z = zarray + 5*ridx[i];
477     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
478     if (!usecprow) z += 5;
479   }
480   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
481   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
482   ierr = PetscLogFlops(50.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
483   PetscFunctionReturn(0);
484 }
485 
486 
487 #undef __FUNCT__
488 #define __FUNCT__ "MatMult_SeqBAIJ_6"
489 PetscErrorCode MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz)
490 {
491   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
492   PetscScalar       *z = 0,sum1,sum2,sum3,sum4,sum5,sum6;
493   const PetscScalar *x,*xb;
494   PetscScalar       x1,x2,x3,x4,x5,x6,*zarray;
495   const MatScalar   *v;
496   PetscErrorCode    ierr;
497   PetscInt          mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
498   PetscTruth        usecprow=a->compressedrow.use;
499 
500   PetscFunctionBegin;
501   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
502   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
503 
504   idx = a->j;
505   v   = a->a;
506   if (usecprow){
507     mbs  = a->compressedrow.nrows;
508     ii   = a->compressedrow.i;
509     ridx = a->compressedrow.rindex;
510   } else {
511     mbs = a->mbs;
512     ii  = a->i;
513     z   = zarray;
514   }
515 
516   for (i=0; i<mbs; i++) {
517     n  = ii[1] - ii[0]; ii++;
518     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0;
519     nonzerorow += (n>0);
520     for (j=0; j<n; j++) {
521       xb = x + 6*(*idx++);
522       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
523       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
524       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
525       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
526       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
527       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
528       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
529       v += 36;
530     }
531     if (usecprow) z = zarray + 6*ridx[i];
532     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
533     if (!usecprow) z += 6;
534   }
535 
536   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
537   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
538   ierr = PetscLogFlops(72.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
539   PetscFunctionReturn(0);
540 }
541 #undef __FUNCT__
542 #define __FUNCT__ "MatMult_SeqBAIJ_7"
543 PetscErrorCode MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
544 {
545   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
546   PetscScalar       *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
547   const PetscScalar *x,*xb;
548   PetscScalar       x1,x2,x3,x4,x5,x6,x7,*zarray;
549   const MatScalar   *v;
550   PetscErrorCode    ierr;
551   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
552   PetscTruth        usecprow=a->compressedrow.use;
553 
554   PetscFunctionBegin;
555   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
556   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
557 
558   idx = a->j;
559   v   = a->a;
560   if (usecprow){
561     mbs    = a->compressedrow.nrows;
562     ii     = a->compressedrow.i;
563     ridx = a->compressedrow.rindex;
564   } else {
565     mbs = a->mbs;
566     ii  = a->i;
567     z   = zarray;
568   }
569 
570   for (i=0; i<mbs; i++) {
571     n  = ii[1] - ii[0]; ii++;
572     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
573     nonzerorow += (n>0);
574     for (j=0; j<n; j++) {
575       xb = x + 7*(*idx++);
576       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
577       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
578       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
579       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
580       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
581       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
582       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
583       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
584       v += 49;
585     }
586     if (usecprow) z = zarray + 7*ridx[i];
587     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
588     if (!usecprow) z += 7;
589   }
590 
591   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
592   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
593   ierr = PetscLogFlops(98.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
594   PetscFunctionReturn(0);
595 }
596 
597 /*
598     This will not work with MatScalar == float because it calls the BLAS
599 */
600 #undef __FUNCT__
601 #define __FUNCT__ "MatMult_SeqBAIJ_N"
602 PetscErrorCode MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
603 {
604   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
605   PetscScalar    *x,*z = 0,*xb,*work,*workt,*zarray;
606   MatScalar      *v;
607   PetscErrorCode ierr;
608   PetscInt       mbs=a->mbs,i,*idx,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2;
609   PetscInt       ncols,k,*ridx=PETSC_NULL,nonzerorow=0;
610   PetscTruth     usecprow=a->compressedrow.use;
611 
612   PetscFunctionBegin;
613   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
614   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
615 
616   idx = a->j;
617   v   = a->a;
618   if (usecprow){
619     mbs  = a->compressedrow.nrows;
620     ii   = a->compressedrow.i;
621     ridx = a->compressedrow.rindex;
622   } else {
623     mbs = a->mbs;
624     ii  = a->i;
625     z   = zarray;
626   }
627 
628   if (!a->mult_work) {
629     k    = PetscMax(A->rmap->n,A->cmap->n);
630     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
631   }
632   work = a->mult_work;
633   for (i=0; i<mbs; i++) {
634     n     = ii[1] - ii[0]; ii++;
635     ncols = n*bs;
636     workt = work;
637     nonzerorow += (n>0);
638     for (j=0; j<n; j++) {
639       xb = x + bs*(*idx++);
640       for (k=0; k<bs; k++) workt[k] = xb[k];
641       workt += bs;
642     }
643     if (usecprow) z = zarray + bs*ridx[i];
644     Kernel_w_gets_Ar_times_v(bs,ncols,work,v,z);
645     /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */
646     v += n*bs2;
647     if (!usecprow) z += bs;
648   }
649   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
650   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
651   ierr = PetscLogFlops(2.0*a->nz*bs2 - bs*nonzerorow);CHKERRQ(ierr);
652   PetscFunctionReturn(0);
653 }
654 
655 extern PetscErrorCode VecCopy_Seq(Vec,Vec);
656 #undef __FUNCT__
657 #define __FUNCT__ "MatMultAdd_SeqBAIJ_1"
658 PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
659 {
660   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
661   PetscScalar    *x,*y = 0,*z = 0,sum,*yarray,*zarray;
662   MatScalar      *v;
663   PetscErrorCode ierr;
664   PetscInt       mbs=a->mbs,i,*idx,*ii,n,*ridx=PETSC_NULL;
665   PetscTruth     usecprow=a->compressedrow.use;
666 
667   PetscFunctionBegin;
668   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
669   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
670   if (zz != yy) {
671     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
672   } else {
673     zarray = yarray;
674   }
675 
676   idx = a->j;
677   v   = a->a;
678   if (usecprow){
679     if (zz != yy){
680       ierr = PetscMemcpy(zarray,yarray,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
681     }
682     mbs  = a->compressedrow.nrows;
683     ii   = a->compressedrow.i;
684     ridx = a->compressedrow.rindex;
685   } else {
686     ii  = a->i;
687     y   = yarray;
688     z   = zarray;
689   }
690 
691   for (i=0; i<mbs; i++) {
692     n    = ii[1] - ii[0]; ii++;
693     if (usecprow){
694       z = zarray + ridx[i];
695       y = yarray + ridx[i];
696     }
697     sum = y[0];
698     while (n--) sum += *v++ * x[*idx++];
699     z[0] = sum;
700     if (!usecprow){
701       z++; y++;
702     }
703   }
704   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
705   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
706   if (zz != yy) {
707     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
708   }
709   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
710   PetscFunctionReturn(0);
711 }
712 
713 #undef __FUNCT__
714 #define __FUNCT__ "MatMultAdd_SeqBAIJ_2"
715 PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
716 {
717   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
718   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2;
719   PetscScalar    x1,x2,*yarray,*zarray;
720   MatScalar      *v;
721   PetscErrorCode ierr;
722   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
723   PetscTruth     usecprow=a->compressedrow.use;
724 
725   PetscFunctionBegin;
726   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
727   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
728   if (zz != yy) {
729     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
730   } else {
731     zarray = yarray;
732   }
733 
734   idx = a->j;
735   v   = a->a;
736   if (usecprow){
737     if (zz != yy){
738       ierr = PetscMemcpy(zarray,yarray,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
739     }
740     mbs  = a->compressedrow.nrows;
741     ii   = a->compressedrow.i;
742     ridx = a->compressedrow.rindex;
743     if (zz != yy){
744       ierr = PetscMemcpy(zarray,yarray,a->mbs*sizeof(PetscScalar));CHKERRQ(ierr);
745     }
746   } else {
747     ii  = a->i;
748     y   = yarray;
749     z   = zarray;
750   }
751 
752   for (i=0; i<mbs; i++) {
753     n  = ii[1] - ii[0]; ii++;
754     if (usecprow){
755       z = zarray + 2*ridx[i];
756       y = yarray + 2*ridx[i];
757     }
758     sum1 = y[0]; sum2 = y[1];
759     for (j=0; j<n; j++) {
760       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
761       sum1 += v[0]*x1 + v[2]*x2;
762       sum2 += v[1]*x1 + v[3]*x2;
763       v += 4;
764     }
765     z[0] = sum1; z[1] = sum2;
766     if (!usecprow){
767       z += 2; y += 2;
768     }
769   }
770   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
771   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
772   if (zz != yy) {
773     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
774   }
775   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
776   PetscFunctionReturn(0);
777 }
778 
779 #undef __FUNCT__
780 #define __FUNCT__ "MatMultAdd_SeqBAIJ_3"
781 PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
782 {
783   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
784   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,x1,x2,x3,*yarray,*zarray;
785   MatScalar      *v;
786   PetscErrorCode ierr;
787   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
788   PetscTruth     usecprow=a->compressedrow.use;
789 
790   PetscFunctionBegin;
791   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
792   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
793   if (zz != yy) {
794     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
795   } else {
796     zarray = yarray;
797   }
798 
799   idx = a->j;
800   v   = a->a;
801   if (usecprow){
802     if (zz != yy){
803       ierr = PetscMemcpy(zarray,yarray,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
804     }
805     mbs  = a->compressedrow.nrows;
806     ii   = a->compressedrow.i;
807     ridx = a->compressedrow.rindex;
808   } else {
809     ii  = a->i;
810     y   = yarray;
811     z   = zarray;
812   }
813 
814   for (i=0; i<mbs; i++) {
815     n  = ii[1] - ii[0]; ii++;
816     if (usecprow){
817       z = zarray + 3*ridx[i];
818       y = yarray + 3*ridx[i];
819     }
820     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
821     for (j=0; j<n; j++) {
822       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
823       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
824       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
825       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
826       v += 9;
827     }
828     z[0] = sum1; z[1] = sum2; z[2] = sum3;
829     if (!usecprow){
830       z += 3; y += 3;
831     }
832   }
833   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
834   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
835   if (zz != yy) {
836     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
837   }
838   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
839   PetscFunctionReturn(0);
840 }
841 
842 #undef __FUNCT__
843 #define __FUNCT__ "MatMultAdd_SeqBAIJ_4"
844 PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
845 {
846   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
847   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*yarray,*zarray;
848   MatScalar      *v;
849   PetscErrorCode ierr;
850   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
851   PetscTruth     usecprow=a->compressedrow.use;
852 
853   PetscFunctionBegin;
854   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
855   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
856   if (zz != yy) {
857     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
858   } else {
859     zarray = yarray;
860   }
861 
862   idx   = a->j;
863   v     = a->a;
864   if (usecprow){
865     if (zz != yy){
866       ierr = PetscMemcpy(zarray,yarray,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
867     }
868     mbs  = a->compressedrow.nrows;
869     ii   = a->compressedrow.i;
870     ridx = a->compressedrow.rindex;
871   } else {
872     ii  = a->i;
873     y   = yarray;
874     z   = zarray;
875   }
876 
877   for (i=0; i<mbs; i++) {
878     n  = ii[1] - ii[0]; ii++;
879     if (usecprow){
880       z = zarray + 4*ridx[i];
881       y = yarray + 4*ridx[i];
882     }
883     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
884     for (j=0; j<n; j++) {
885       xb = x + 4*(*idx++);
886       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
887       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
888       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
889       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
890       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
891       v += 16;
892     }
893     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
894     if (!usecprow){
895       z += 4; y += 4;
896     }
897   }
898   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
899   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
900   if (zz != yy) {
901     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
902   }
903   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
904   PetscFunctionReturn(0);
905 }
906 
907 #undef __FUNCT__
908 #define __FUNCT__ "MatMultAdd_SeqBAIJ_5"
909 PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
910 {
911   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
912   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
913   PetscScalar    *yarray,*zarray;
914   MatScalar      *v;
915   PetscErrorCode ierr;
916   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
917   PetscTruth     usecprow=a->compressedrow.use;
918 
919   PetscFunctionBegin;
920   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
921   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
922   if (zz != yy) {
923     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
924   } else {
925     zarray = yarray;
926   }
927 
928   idx = a->j;
929   v   = a->a;
930   if (usecprow){
931     if (zz != yy){
932       ierr = PetscMemcpy(zarray,yarray,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
933     }
934     mbs  = a->compressedrow.nrows;
935     ii   = a->compressedrow.i;
936     ridx = a->compressedrow.rindex;
937   } else {
938     ii  = a->i;
939     y   = yarray;
940     z   = zarray;
941   }
942 
943   for (i=0; i<mbs; i++) {
944     n  = ii[1] - ii[0]; ii++;
945     if (usecprow){
946       z = zarray + 5*ridx[i];
947       y = yarray + 5*ridx[i];
948     }
949     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
950     for (j=0; j<n; j++) {
951       xb = x + 5*(*idx++);
952       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
953       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
954       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
955       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
956       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
957       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
958       v += 25;
959     }
960     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
961     if (!usecprow){
962       z += 5; y += 5;
963     }
964   }
965   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
966   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
967   if (zz != yy) {
968     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
969   }
970   ierr = PetscLogFlops(50.0*a->nz);CHKERRQ(ierr);
971   PetscFunctionReturn(0);
972 }
973 #undef __FUNCT__
974 #define __FUNCT__ "MatMultAdd_SeqBAIJ_6"
975 PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
976 {
977   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
978   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6;
979   PetscScalar    x1,x2,x3,x4,x5,x6,*yarray,*zarray;
980   MatScalar      *v;
981   PetscErrorCode ierr;
982   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
983   PetscTruth     usecprow=a->compressedrow.use;
984 
985   PetscFunctionBegin;
986   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
987   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
988   if (zz != yy) {
989     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
990   } else {
991     zarray = yarray;
992   }
993 
994   idx = a->j;
995   v   = a->a;
996   if (usecprow){
997     if (zz != yy){
998       ierr = PetscMemcpy(zarray,yarray,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
999     }
1000     mbs  = a->compressedrow.nrows;
1001     ii   = a->compressedrow.i;
1002     ridx = a->compressedrow.rindex;
1003   } else {
1004     ii  = a->i;
1005     y   = yarray;
1006     z   = zarray;
1007   }
1008 
1009   for (i=0; i<mbs; i++) {
1010     n  = ii[1] - ii[0]; ii++;
1011     if (usecprow){
1012       z = zarray + 6*ridx[i];
1013       y = yarray + 6*ridx[i];
1014     }
1015     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5];
1016     for (j=0; j<n; j++) {
1017       xb = x + 6*(*idx++);
1018       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
1019       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
1020       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
1021       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
1022       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
1023       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
1024       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
1025       v += 36;
1026     }
1027     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
1028     if (!usecprow){
1029       z += 6; y += 6;
1030     }
1031   }
1032   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1033   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
1034   if (zz != yy) {
1035     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1036   }
1037   ierr = PetscLogFlops(72.0*a->nz);CHKERRQ(ierr);
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 #undef __FUNCT__
1042 #define __FUNCT__ "MatMultAdd_SeqBAIJ_7"
1043 PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1044 {
1045   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1046   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
1047   PetscScalar    x1,x2,x3,x4,x5,x6,x7,*yarray,*zarray;
1048   MatScalar      *v;
1049   PetscErrorCode ierr;
1050   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
1051   PetscTruth     usecprow=a->compressedrow.use;
1052 
1053   PetscFunctionBegin;
1054   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1055   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
1056   if (zz != yy) {
1057     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
1058   } else {
1059     zarray = yarray;
1060   }
1061 
1062   idx = a->j;
1063   v   = a->a;
1064   if (usecprow){
1065     if (zz != yy){
1066       ierr = PetscMemcpy(zarray,yarray,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1067     }
1068     mbs  = a->compressedrow.nrows;
1069     ii   = a->compressedrow.i;
1070     ridx = a->compressedrow.rindex;
1071   } else {
1072     ii  = a->i;
1073     y   = yarray;
1074     z   = zarray;
1075   }
1076 
1077   for (i=0; i<mbs; i++) {
1078     n  = ii[1] - ii[0]; ii++;
1079     if (usecprow){
1080       z = zarray + 7*ridx[i];
1081       y = yarray + 7*ridx[i];
1082     }
1083     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
1084     for (j=0; j<n; j++) {
1085       xb = x + 7*(*idx++);
1086       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1087       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1088       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1089       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1090       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1091       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1092       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1093       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1094       v += 49;
1095     }
1096     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1097     if (!usecprow){
1098       z += 7; y += 7;
1099     }
1100   }
1101   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1102   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
1103   if (zz != yy) {
1104     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1105   }
1106   ierr = PetscLogFlops(98.0*a->nz);CHKERRQ(ierr);
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatMultAdd_SeqBAIJ_N"
1112 PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1113 {
1114   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1115   PetscScalar    *x,*z = 0,*xb,*work,*workt,*zarray;
1116   MatScalar      *v;
1117   PetscErrorCode ierr;
1118   PetscInt       mbs,i,*idx,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2;
1119   PetscInt       ncols,k,*ridx=PETSC_NULL;
1120   PetscTruth     usecprow=a->compressedrow.use;
1121 
1122   PetscFunctionBegin;
1123   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
1124   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1125   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
1126 
1127   idx = a->j;
1128   v   = a->a;
1129   if (usecprow){
1130     mbs    = a->compressedrow.nrows;
1131     ii     = a->compressedrow.i;
1132     ridx = a->compressedrow.rindex;
1133   } else {
1134     mbs = a->mbs;
1135     ii  = a->i;
1136     z   = zarray;
1137   }
1138 
1139   if (!a->mult_work) {
1140     k    = PetscMax(A->rmap->n,A->cmap->n);
1141     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1142   }
1143   work = a->mult_work;
1144   for (i=0; i<mbs; i++) {
1145     n     = ii[1] - ii[0]; ii++;
1146     ncols = n*bs;
1147     workt = work;
1148     for (j=0; j<n; j++) {
1149       xb = x + bs*(*idx++);
1150       for (k=0; k<bs; k++) workt[k] = xb[k];
1151       workt += bs;
1152     }
1153     if (usecprow) z = zarray + bs*ridx[i];
1154     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
1155     /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */
1156     v += n*bs2;
1157     if (!usecprow){
1158       z += bs;
1159     }
1160   }
1161   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1162   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1163   ierr = PetscLogFlops(2.0*a->nz*bs2);CHKERRQ(ierr);
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 #undef __FUNCT__
1168 #define __FUNCT__ "MatMultTranspose_SeqBAIJ"
1169 PetscErrorCode MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz)
1170 {
1171   PetscScalar    zero = 0.0;
1172   PetscErrorCode ierr;
1173 
1174   PetscFunctionBegin;
1175   ierr = VecSet(zz,zero);CHKERRQ(ierr);
1176   ierr = MatMultTransposeAdd_SeqBAIJ(A,xx,zz,zz);CHKERRQ(ierr);
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 #undef __FUNCT__
1181 #define __FUNCT__ "MatMultTransposeAdd_SeqBAIJ"
1182 PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1183 
1184 {
1185   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1186   PetscScalar       *zb,*x,*z,*xb = 0,x1,x2,x3,x4,x5;
1187   MatScalar         *v;
1188   PetscErrorCode    ierr;
1189   PetscInt          mbs,i,*idx,*ii,rval,bs=A->rmap->bs,j,n,bs2=a->bs2,*ib,*ridx=PETSC_NULL;
1190   Mat_CompressedRow cprow = a->compressedrow;
1191   PetscTruth        usecprow=cprow.use;
1192 
1193   PetscFunctionBegin;
1194   if (yy != zz) { ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); }
1195   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1196   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
1197 
1198   idx = a->j;
1199   v   = a->a;
1200   if (usecprow){
1201     mbs  = cprow.nrows;
1202     ii   = cprow.i;
1203     ridx = cprow.rindex;
1204   } else {
1205     mbs=a->mbs;
1206     ii = a->i;
1207     xb = x;
1208   }
1209 
1210   switch (bs) {
1211   case 1:
1212     for (i=0; i<mbs; i++) {
1213       if (usecprow) xb = x + ridx[i];
1214       x1 = xb[0];
1215       ib = idx + ii[0];
1216       n  = ii[1] - ii[0]; ii++;
1217       for (j=0; j<n; j++) {
1218         rval    = ib[j];
1219         z[rval] += *v * x1;
1220         v++;
1221       }
1222       if (!usecprow) xb++;
1223     }
1224     break;
1225   case 2:
1226     for (i=0; i<mbs; i++) {
1227       if (usecprow) xb = x + 2*ridx[i];
1228       x1 = xb[0]; x2 = xb[1];
1229       ib = idx + ii[0];
1230       n  = ii[1] - ii[0]; ii++;
1231       for (j=0; j<n; j++) {
1232         rval      = ib[j]*2;
1233         z[rval++] += v[0]*x1 + v[1]*x2;
1234         z[rval++] += v[2]*x1 + v[3]*x2;
1235         v  += 4;
1236       }
1237       if (!usecprow) xb += 2;
1238     }
1239     break;
1240   case 3:
1241     for (i=0; i<mbs; i++) {
1242       if (usecprow) xb = x + 3*ridx[i];
1243       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1244       ib = idx + ii[0];
1245       n  = ii[1] - ii[0]; ii++;
1246       for (j=0; j<n; j++) {
1247         rval      = ib[j]*3;
1248         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1249         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1250         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1251         v  += 9;
1252       }
1253       if (!usecprow) xb += 3;
1254     }
1255     break;
1256   case 4:
1257     for (i=0; i<mbs; i++) {
1258       if (usecprow) xb = x + 4*ridx[i];
1259       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1260       ib = idx + ii[0];
1261       n  = ii[1] - ii[0]; ii++;
1262       for (j=0; j<n; j++) {
1263         rval      = ib[j]*4;
1264         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1265         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1266         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1267         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1268         v  += 16;
1269       }
1270       if (!usecprow) xb += 4;
1271     }
1272     break;
1273   case 5:
1274     for (i=0; i<mbs; i++) {
1275       if (usecprow) xb = x + 5*ridx[i];
1276       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1277       x4 = xb[3]; x5 = xb[4];
1278       ib = idx + ii[0];
1279       n  = ii[1] - ii[0]; ii++;
1280       for (j=0; j<n; j++) {
1281         rval      = ib[j]*5;
1282         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1283         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1284         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1285         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1286         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1287         v  += 25;
1288       }
1289       if (!usecprow) xb += 5;
1290     }
1291     break;
1292   default: {      /* block sizes larger then 5 by 5 are handled by BLAS */
1293       PetscInt     ncols,k;
1294       PetscScalar  *work,*workt,*xtmp;
1295 
1296       if (!a->mult_work) {
1297         k = PetscMax(A->rmap->n,A->cmap->n);
1298         ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1299       }
1300       work = a->mult_work;
1301       xtmp = x;
1302       for (i=0; i<mbs; i++) {
1303         n     = ii[1] - ii[0]; ii++;
1304         ncols = n*bs;
1305         ierr  = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1306         if (usecprow) {
1307           xtmp = x + bs*ridx[i];
1308         }
1309         Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,xtmp,v,work);
1310         /* BLASgemv_("T",&bs,&ncols,&_DOne,v,&bs,xtmp,&_One,&_DOne,work,&_One); */
1311         v += n*bs2;
1312         if (!usecprow) xtmp += bs;
1313         workt = work;
1314         for (j=0; j<n; j++) {
1315           zb = z + bs*(*idx++);
1316           for (k=0; k<bs; k++) zb[k] += workt[k] ;
1317           workt += bs;
1318         }
1319       }
1320     }
1321   }
1322   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1323   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
1324   ierr = PetscLogFlops(2.0*a->nz*a->bs2);CHKERRQ(ierr);
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 #undef __FUNCT__
1329 #define __FUNCT__ "MatScale_SeqBAIJ"
1330 PetscErrorCode MatScale_SeqBAIJ(Mat inA,PetscScalar alpha)
1331 {
1332   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1333   PetscInt       totalnz = a->bs2*a->nz;
1334   PetscScalar    oalpha = alpha;
1335   PetscErrorCode ierr;
1336   PetscBLASInt   one = 1,tnz = PetscBLASIntCast(totalnz);
1337 
1338   PetscFunctionBegin;
1339   BLASscal_(&tnz,&oalpha,a->a,&one);
1340   ierr = PetscLogFlops(totalnz);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatNorm_SeqBAIJ"
1346 PetscErrorCode MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm)
1347 {
1348   PetscErrorCode ierr;
1349   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1350   MatScalar      *v = a->a;
1351   PetscReal      sum = 0.0;
1352   PetscInt       i,j,k,bs=A->rmap->bs,nz=a->nz,bs2=a->bs2,k1;
1353 
1354   PetscFunctionBegin;
1355   if (type == NORM_FROBENIUS) {
1356     for (i=0; i< bs2*nz; i++) {
1357 #if defined(PETSC_USE_COMPLEX)
1358       sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1359 #else
1360       sum += (*v)*(*v); v++;
1361 #endif
1362     }
1363     *norm = sqrt(sum);
1364   } else if (type == NORM_1) { /* maximum column sum */
1365     PetscReal *tmp;
1366     PetscInt  *bcol = a->j;
1367     ierr = PetscMalloc((A->cmap->n+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1368     ierr = PetscMemzero(tmp,A->cmap->n*sizeof(PetscReal));CHKERRQ(ierr);
1369     for (i=0; i<nz; i++){
1370       for (j=0; j<bs; j++){
1371         k1 = bs*(*bcol) + j; /* column index */
1372         for (k=0; k<bs; k++){
1373           tmp[k1] += PetscAbsScalar(*v); v++;
1374         }
1375       }
1376       bcol++;
1377     }
1378     *norm = 0.0;
1379     for (j=0; j<A->cmap->n; j++) {
1380       if (tmp[j] > *norm) *norm = tmp[j];
1381     }
1382     ierr = PetscFree(tmp);CHKERRQ(ierr);
1383   } else if (type == NORM_INFINITY) { /* maximum row sum */
1384     *norm = 0.0;
1385     for (k=0; k<bs; k++) {
1386       for (j=0; j<a->mbs; j++) {
1387         v = a->a + bs2*a->i[j] + k;
1388         sum = 0.0;
1389         for (i=0; i<a->i[j+1]-a->i[j]; i++) {
1390           for (k1=0; k1<bs; k1++){
1391             sum += PetscAbsScalar(*v);
1392             v   += bs;
1393           }
1394         }
1395         if (sum > *norm) *norm = sum;
1396       }
1397     }
1398   } else {
1399     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1400   }
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatEqual_SeqBAIJ"
1407 PetscErrorCode MatEqual_SeqBAIJ(Mat A,Mat B,PetscTruth* flg)
1408 {
1409   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ *)A->data,*b = (Mat_SeqBAIJ *)B->data;
1410   PetscErrorCode ierr;
1411 
1412   PetscFunctionBegin;
1413   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1414   if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
1415     *flg = PETSC_FALSE;
1416     PetscFunctionReturn(0);
1417   }
1418 
1419   /* if the a->i are the same */
1420   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1421   if (!*flg) {
1422     PetscFunctionReturn(0);
1423   }
1424 
1425   /* if a->j are the same */
1426   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1427   if (!*flg) {
1428     PetscFunctionReturn(0);
1429   }
1430   /* if a->a are the same */
1431   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(B->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1432   PetscFunctionReturn(0);
1433 
1434 }
1435 
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatGetDiagonal_SeqBAIJ"
1438 PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1439 {
1440   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1441   PetscErrorCode ierr;
1442   PetscInt       i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1443   PetscScalar    *x,zero = 0.0;
1444   MatScalar      *aa,*aa_j;
1445 
1446   PetscFunctionBegin;
1447   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1448   bs   = A->rmap->bs;
1449   aa   = a->a;
1450   ai   = a->i;
1451   aj   = a->j;
1452   ambs = a->mbs;
1453   bs2  = a->bs2;
1454 
1455   ierr = VecSet(v,zero);CHKERRQ(ierr);
1456   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1457   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1458   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1459   for (i=0; i<ambs; i++) {
1460     for (j=ai[i]; j<ai[i+1]; j++) {
1461       if (aj[j] == i) {
1462         row  = i*bs;
1463         aa_j = aa+j*bs2;
1464         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1465         break;
1466       }
1467     }
1468   }
1469   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 #undef __FUNCT__
1474 #define __FUNCT__ "MatDiagonalScale_SeqBAIJ"
1475 PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1476 {
1477   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1478   PetscScalar    *l,*r,x,*li,*ri;
1479   MatScalar      *aa,*v;
1480   PetscErrorCode ierr;
1481   PetscInt       i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1482 
1483   PetscFunctionBegin;
1484   ai  = a->i;
1485   aj  = a->j;
1486   aa  = a->a;
1487   m   = A->rmap->n;
1488   n   = A->cmap->n;
1489   bs  = A->rmap->bs;
1490   mbs = a->mbs;
1491   bs2 = a->bs2;
1492   if (ll) {
1493     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
1494     ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1495     if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1496     for (i=0; i<mbs; i++) { /* for each block row */
1497       M  = ai[i+1] - ai[i];
1498       li = l + i*bs;
1499       v  = aa + bs2*ai[i];
1500       for (j=0; j<M; j++) { /* for each block */
1501         for (k=0; k<bs2; k++) {
1502           (*v++) *= li[k%bs];
1503         }
1504       }
1505     }
1506     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
1507     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1508   }
1509 
1510   if (rr) {
1511     ierr = VecGetArray(rr,&r);CHKERRQ(ierr);
1512     ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr);
1513     if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
1514     for (i=0; i<mbs; i++) { /* for each block row */
1515       M  = ai[i+1] - ai[i];
1516       v  = aa + bs2*ai[i];
1517       for (j=0; j<M; j++) { /* for each block */
1518         ri = r + bs*aj[ai[i]+j];
1519         for (k=0; k<bs; k++) {
1520           x = ri[k];
1521           for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
1522         }
1523       }
1524     }
1525     ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr);
1526     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1527   }
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 
1532 #undef __FUNCT__
1533 #define __FUNCT__ "MatGetInfo_SeqBAIJ"
1534 PetscErrorCode MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1535 {
1536   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1537 
1538   PetscFunctionBegin;
1539   info->block_size     = a->bs2;
1540   info->nz_allocated   = a->maxnz;
1541   info->nz_used        = a->bs2*a->nz;
1542   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1543   info->assemblies   = A->num_ass;
1544   info->mallocs      = a->reallocs;
1545   info->memory       = ((PetscObject)A)->mem;
1546   if (A->factor) {
1547     info->fill_ratio_given  = A->info.fill_ratio_given;
1548     info->fill_ratio_needed = A->info.fill_ratio_needed;
1549     info->factor_mallocs    = A->info.factor_mallocs;
1550   } else {
1551     info->fill_ratio_given  = 0;
1552     info->fill_ratio_needed = 0;
1553     info->factor_mallocs    = 0;
1554   }
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 
1559 #undef __FUNCT__
1560 #define __FUNCT__ "MatZeroEntries_SeqBAIJ"
1561 PetscErrorCode MatZeroEntries_SeqBAIJ(Mat A)
1562 {
1563   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1564   PetscErrorCode ierr;
1565 
1566   PetscFunctionBegin;
1567   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
1568   PetscFunctionReturn(0);
1569 }
1570