xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 9463ebdafcaa1297f0cc5126b9b2ebb9458fdb8c)
1be1d678aSKris Buschelman 
22593348eSBarry Smith /*
3b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
42593348eSBarry Smith   matrix storage format.
52593348eSBarry Smith */
6c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7c6db04a5SJed Brown #include <petscblaslapack.h>
8af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h>
9af0996ceSBarry Smith #include <petsc/private/kernels/blockmatmult.h>
1043516a2dSKris Buschelman 
117ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
127ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
137ea3e4caSstefano_zampini #endif
147ea3e4caSstefano_zampini 
15b5b72c8aSIrina Sokolova #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16fd9d3c67SJed Brown PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17b5b72c8aSIrina Sokolova #endif
18c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19b5b72c8aSIrina Sokolova 
20*9463ebdaSPierre Jolivet PetscErrorCode MatGetColumnNorms_SeqBAIJ(Mat A,NormType type,PetscReal *norms)
21*9463ebdaSPierre Jolivet {
22*9463ebdaSPierre Jolivet   PetscErrorCode ierr;
23*9463ebdaSPierre Jolivet   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) A->data;
24*9463ebdaSPierre Jolivet   PetscInt       N,i;
25*9463ebdaSPierre Jolivet   PetscInt       ib,jb,bs = A->rmap->bs;
26*9463ebdaSPierre Jolivet   MatScalar      *a_val = a_aij->a;
27*9463ebdaSPierre Jolivet 
28*9463ebdaSPierre Jolivet   PetscFunctionBegin;
29*9463ebdaSPierre Jolivet   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
30*9463ebdaSPierre Jolivet   for (i=0; i<N; i++) norms[i] = 0.0;
31*9463ebdaSPierre Jolivet   if (type == NORM_2) {
32*9463ebdaSPierre Jolivet     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
33*9463ebdaSPierre Jolivet       for (jb=0; jb<bs; jb++) {
34*9463ebdaSPierre Jolivet         for (ib=0; ib<bs; ib++) {
35*9463ebdaSPierre Jolivet           norms[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
36*9463ebdaSPierre Jolivet           a_val++;
37*9463ebdaSPierre Jolivet         }
38*9463ebdaSPierre Jolivet       }
39*9463ebdaSPierre Jolivet     }
40*9463ebdaSPierre Jolivet   } else if (type == NORM_1) {
41*9463ebdaSPierre Jolivet     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
42*9463ebdaSPierre Jolivet       for (jb=0; jb<bs; jb++) {
43*9463ebdaSPierre Jolivet         for (ib=0; ib<bs; ib++) {
44*9463ebdaSPierre Jolivet           norms[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
45*9463ebdaSPierre Jolivet           a_val++;
46*9463ebdaSPierre Jolivet         }
47*9463ebdaSPierre Jolivet       }
48*9463ebdaSPierre Jolivet     }
49*9463ebdaSPierre Jolivet   } else if (type == NORM_INFINITY) {
50*9463ebdaSPierre Jolivet     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
51*9463ebdaSPierre Jolivet       for (jb=0; jb<bs; jb++) {
52*9463ebdaSPierre Jolivet         for (ib=0; ib<bs; ib++) {
53*9463ebdaSPierre Jolivet           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
54*9463ebdaSPierre Jolivet           norms[col] = PetscMax(PetscAbsScalar(*a_val), norms[col]);
55*9463ebdaSPierre Jolivet           a_val++;
56*9463ebdaSPierre Jolivet         }
57*9463ebdaSPierre Jolivet       }
58*9463ebdaSPierre Jolivet     }
59*9463ebdaSPierre Jolivet   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
60*9463ebdaSPierre Jolivet   if (type == NORM_2) {
61*9463ebdaSPierre Jolivet     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
62*9463ebdaSPierre Jolivet   }
63*9463ebdaSPierre Jolivet   PetscFunctionReturn(0);
64*9463ebdaSPierre Jolivet }
65*9463ebdaSPierre Jolivet 
66713ccfa9SJed Brown PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
67b01c7715SBarry Smith {
68b01c7715SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
696849ba73SBarry Smith   PetscErrorCode ierr;
70de80f912SBarry Smith   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
717f0c90edSBarry Smith   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
7262bba022SBarry Smith   PetscReal      shift = 0.0;
731a9391e3SHong Zhang   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
74b01c7715SBarry Smith 
75b01c7715SBarry Smith   PetscFunctionBegin;
76a455e926SHong Zhang   allowzeropivot = PetscNot(A->erroriffailure);
77a455e926SHong Zhang 
789797317bSBarry Smith   if (a->idiagvalid) {
799797317bSBarry Smith     if (values) *values = a->idiag;
809797317bSBarry Smith     PetscFunctionReturn(0);
819797317bSBarry Smith   }
82b01c7715SBarry Smith   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
83b01c7715SBarry Smith   diag_offset = a->diag;
84b01c7715SBarry Smith   if (!a->idiag) {
857f0c90edSBarry Smith     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
867f0c90edSBarry Smith     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
87b01c7715SBarry Smith   }
88b01c7715SBarry Smith   diag  = a->idiag;
89bbead8a2SBarry Smith   if (values) *values = a->idiag;
90b01c7715SBarry Smith   /* factor and invert each block */
91521d7252SBarry Smith   switch (bs) {
92ab040260SJed Brown   case 1:
93ab040260SJed Brown     for (i=0; i<mbs; i++) {
94ab040260SJed Brown       odiag    = v + 1*diag_offset[i];
95ab040260SJed Brown       diag[0]  = odiag[0];
96ec1892c8SHong Zhang 
97ec1892c8SHong Zhang       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
98ec1892c8SHong Zhang         if (allowzeropivot) {
997b6c816cSBarry Smith           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1007b6c816cSBarry Smith           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
1017b6c816cSBarry Smith           A->factorerror_zeropivot_row   = i;
102ec1892c8SHong Zhang           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
1037b6c816cSBarry Smith         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
104ec1892c8SHong Zhang       }
105ec1892c8SHong Zhang 
106d4a378daSJed Brown       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
107ab040260SJed Brown       diag    += 1;
108ab040260SJed Brown     }
109ab040260SJed Brown     break;
110b01c7715SBarry Smith   case 2:
111b01c7715SBarry Smith     for (i=0; i<mbs; i++) {
112b01c7715SBarry Smith       odiag    = v + 4*diag_offset[i];
113b01c7715SBarry Smith       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
114a455e926SHong Zhang       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
1157b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
116b01c7715SBarry Smith       diag    += 4;
117b01c7715SBarry Smith     }
118b01c7715SBarry Smith     break;
119b01c7715SBarry Smith   case 3:
120b01c7715SBarry Smith     for (i=0; i<mbs; i++) {
121b01c7715SBarry Smith       odiag    = v + 9*diag_offset[i];
122b01c7715SBarry Smith       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
123b01c7715SBarry Smith       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
124b01c7715SBarry Smith       diag[8]  = odiag[8];
125a455e926SHong Zhang       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
1267b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
127b01c7715SBarry Smith       diag    += 9;
128b01c7715SBarry Smith     }
129b01c7715SBarry Smith     break;
130b01c7715SBarry Smith   case 4:
131b01c7715SBarry Smith     for (i=0; i<mbs; i++) {
132b01c7715SBarry Smith       odiag  = v + 16*diag_offset[i];
133580bdb30SBarry Smith       ierr   = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr);
134a455e926SHong Zhang       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
1357b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
136b01c7715SBarry Smith       diag  += 16;
137b01c7715SBarry Smith     }
138b01c7715SBarry Smith     break;
139b01c7715SBarry Smith   case 5:
140b01c7715SBarry Smith     for (i=0; i<mbs; i++) {
141b01c7715SBarry Smith       odiag  = v + 25*diag_offset[i];
142580bdb30SBarry Smith       ierr   = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr);
143a455e926SHong Zhang       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
1447b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
145b01c7715SBarry Smith       diag  += 25;
146b01c7715SBarry Smith     }
147b01c7715SBarry Smith     break;
148d49b2adcSBarry Smith   case 6:
149d49b2adcSBarry Smith     for (i=0; i<mbs; i++) {
150d49b2adcSBarry Smith       odiag  = v + 36*diag_offset[i];
151580bdb30SBarry Smith       ierr   = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr);
152a455e926SHong Zhang       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
1537b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
154d49b2adcSBarry Smith       diag  += 36;
155d49b2adcSBarry Smith     }
156d49b2adcSBarry Smith     break;
157de80f912SBarry Smith   case 7:
158de80f912SBarry Smith     for (i=0; i<mbs; i++) {
159de80f912SBarry Smith       odiag  = v + 49*diag_offset[i];
160580bdb30SBarry Smith       ierr   = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr);
161a455e926SHong Zhang       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
1627b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
163de80f912SBarry Smith       diag  += 49;
164de80f912SBarry Smith     }
165de80f912SBarry Smith     break;
166b01c7715SBarry Smith   default:
167dcca6d9dSJed Brown     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
168de80f912SBarry Smith     for (i=0; i<mbs; i++) {
169de80f912SBarry Smith       odiag  = v + bs2*diag_offset[i];
170580bdb30SBarry Smith       ierr   = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr);
1715f8bbccaSHong Zhang       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
1727b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
173de80f912SBarry Smith       diag  += bs2;
174de80f912SBarry Smith     }
175de80f912SBarry Smith     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
176b01c7715SBarry Smith   }
177b01c7715SBarry Smith   a->idiagvalid = PETSC_TRUE;
178b01c7715SBarry Smith   PetscFunctionReturn(0);
179b01c7715SBarry Smith }
180b01c7715SBarry Smith 
181e48d15efSToby Isaac PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1826d3beeddSMatthew Knepley {
1836d3beeddSMatthew Knepley   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
184e48d15efSToby Isaac   PetscScalar       *x,*work,*w,*workt,*t;
185e48d15efSToby Isaac   const MatScalar   *v,*aa = a->a, *idiag;
186e48d15efSToby Isaac   const PetscScalar *b,*xb;
1875455b99fSToby Isaac   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
1886d3beeddSMatthew Knepley   PetscErrorCode    ierr;
189e48d15efSToby Isaac   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
190c1ac3661SBarry Smith   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
191b01c7715SBarry Smith 
192b01c7715SBarry Smith   PetscFunctionBegin;
193b01c7715SBarry Smith   its = its*lits;
194e32f2f54SBarry Smith   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
195e32f2f54SBarry Smith   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
19652c4f950SSatish Balay   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
19752c4f950SSatish Balay   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor");
19852c4f950SSatish Balay   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
199b01c7715SBarry Smith 
2000298fd71SBarry Smith   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
201b01c7715SBarry Smith 
202b2ec919aSToby Isaac   if (!m) PetscFunctionReturn(0);
203b01c7715SBarry Smith   diag  = a->diag;
204b01c7715SBarry Smith   idiag = a->idiag;
205de80f912SBarry Smith   k    = PetscMax(A->rmap->n,A->cmap->n);
206e48d15efSToby Isaac   if (!a->mult_work) {
207f361c04dSBarry Smith     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
208de80f912SBarry Smith   }
2093475c22fSBarry Smith   if (!a->sor_workt) {
210f361c04dSBarry Smith     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
211de80f912SBarry Smith   }
212de80f912SBarry Smith   if (!a->sor_work) {
213785e854fSJed Brown     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
214de80f912SBarry Smith   }
2153475c22fSBarry Smith   work = a->mult_work;
2163475c22fSBarry Smith   t    = a->sor_workt;
217de80f912SBarry Smith   w    = a->sor_work;
218de80f912SBarry Smith 
219de80f912SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
220de80f912SBarry Smith   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
221de80f912SBarry Smith 
222de80f912SBarry Smith   if (flag & SOR_ZERO_INITIAL_GUESS) {
223de80f912SBarry Smith     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
224e48d15efSToby Isaac       switch (bs) {
225e48d15efSToby Isaac       case 1:
226e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
227e48d15efSToby Isaac         t[0] = b[0];
228e48d15efSToby Isaac         i2     = 1;
229e48d15efSToby Isaac         idiag += 1;
230e48d15efSToby Isaac         for (i=1; i<m; i++) {
231e48d15efSToby Isaac           v  = aa + ai[i];
232e48d15efSToby Isaac           vi = aj + ai[i];
233e48d15efSToby Isaac           nz = diag[i] - ai[i];
234e48d15efSToby Isaac           s[0] = b[i2];
235e48d15efSToby Isaac           for (j=0; j<nz; j++) {
236e48d15efSToby Isaac             xw[0] = x[vi[j]];
237e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
238e48d15efSToby Isaac           }
239e48d15efSToby Isaac           t[i2] = s[0];
240e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
241e48d15efSToby Isaac           x[i2]  = xw[0];
242e48d15efSToby Isaac           idiag += 1;
243e48d15efSToby Isaac           i2    += 1;
244e48d15efSToby Isaac         }
245e48d15efSToby Isaac         break;
246e48d15efSToby Isaac       case 2:
247e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
248e48d15efSToby Isaac         t[0] = b[0]; t[1] = b[1];
249e48d15efSToby Isaac         i2     = 2;
250e48d15efSToby Isaac         idiag += 4;
251e48d15efSToby Isaac         for (i=1; i<m; i++) {
252e48d15efSToby Isaac           v  = aa + 4*ai[i];
253e48d15efSToby Isaac           vi = aj + ai[i];
254e48d15efSToby Isaac           nz = diag[i] - ai[i];
255e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1];
256e48d15efSToby Isaac           for (j=0; j<nz; j++) {
257e48d15efSToby Isaac             idx = 2*vi[j];
258e48d15efSToby Isaac             it  = 4*j;
259e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx];
260e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
261e48d15efSToby Isaac           }
262e48d15efSToby Isaac           t[i2] = s[0]; t[i2+1] = s[1];
263e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
264e48d15efSToby Isaac           x[i2]   = xw[0]; x[i2+1] = xw[1];
265e48d15efSToby Isaac           idiag  += 4;
266e48d15efSToby Isaac           i2     += 2;
267e48d15efSToby Isaac         }
268e48d15efSToby Isaac         break;
269e48d15efSToby Isaac       case 3:
270e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
271e48d15efSToby Isaac         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
272e48d15efSToby Isaac         i2     = 3;
273e48d15efSToby Isaac         idiag += 9;
274e48d15efSToby Isaac         for (i=1; i<m; i++) {
275e48d15efSToby Isaac           v  = aa + 9*ai[i];
276e48d15efSToby Isaac           vi = aj + ai[i];
277e48d15efSToby Isaac           nz = diag[i] - ai[i];
278e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
279e48d15efSToby Isaac           while (nz--) {
280e48d15efSToby Isaac             idx = 3*(*vi++);
281e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
282e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
283e48d15efSToby Isaac             v  += 9;
284e48d15efSToby Isaac           }
285e48d15efSToby Isaac           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
286e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
287e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
288e48d15efSToby Isaac           idiag  += 9;
289e48d15efSToby Isaac           i2     += 3;
290e48d15efSToby Isaac         }
291e48d15efSToby Isaac         break;
292e48d15efSToby Isaac       case 4:
293e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
294e48d15efSToby Isaac         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
295e48d15efSToby Isaac         i2     = 4;
296e48d15efSToby Isaac         idiag += 16;
297e48d15efSToby Isaac         for (i=1; i<m; i++) {
298e48d15efSToby Isaac           v  = aa + 16*ai[i];
299e48d15efSToby Isaac           vi = aj + ai[i];
300e48d15efSToby Isaac           nz = diag[i] - ai[i];
301e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
302e48d15efSToby Isaac           while (nz--) {
303e48d15efSToby Isaac             idx = 4*(*vi++);
304e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
305e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
306e48d15efSToby Isaac             v  += 16;
307e48d15efSToby Isaac           }
308e48d15efSToby Isaac           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
309e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
310e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
311e48d15efSToby Isaac           idiag  += 16;
312e48d15efSToby Isaac           i2     += 4;
313e48d15efSToby Isaac         }
314e48d15efSToby Isaac         break;
315e48d15efSToby Isaac       case 5:
316e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
317e48d15efSToby Isaac         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
318e48d15efSToby Isaac         i2     = 5;
319e48d15efSToby Isaac         idiag += 25;
320e48d15efSToby Isaac         for (i=1; i<m; i++) {
321e48d15efSToby Isaac           v  = aa + 25*ai[i];
322e48d15efSToby Isaac           vi = aj + ai[i];
323e48d15efSToby Isaac           nz = diag[i] - ai[i];
324e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
325e48d15efSToby Isaac           while (nz--) {
326e48d15efSToby Isaac             idx = 5*(*vi++);
327e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
328e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
329e48d15efSToby Isaac             v  += 25;
330e48d15efSToby Isaac           }
331e48d15efSToby Isaac           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
332e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
333e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
334e48d15efSToby Isaac           idiag  += 25;
335e48d15efSToby Isaac           i2     += 5;
336e48d15efSToby Isaac         }
337e48d15efSToby Isaac         break;
338e48d15efSToby Isaac       case 6:
339e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
340e48d15efSToby Isaac         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
341e48d15efSToby Isaac         i2     = 6;
342e48d15efSToby Isaac         idiag += 36;
343e48d15efSToby Isaac         for (i=1; i<m; i++) {
344e48d15efSToby Isaac           v  = aa + 36*ai[i];
345e48d15efSToby Isaac           vi = aj + ai[i];
346e48d15efSToby Isaac           nz = diag[i] - ai[i];
347e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
348e48d15efSToby Isaac           while (nz--) {
349e48d15efSToby Isaac             idx = 6*(*vi++);
350e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
351e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
352e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
353e48d15efSToby Isaac             v  += 36;
354e48d15efSToby Isaac           }
355e48d15efSToby Isaac           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
356e48d15efSToby Isaac           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
357e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
358e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
359e48d15efSToby Isaac           idiag  += 36;
360e48d15efSToby Isaac           i2     += 6;
361e48d15efSToby Isaac         }
362e48d15efSToby Isaac         break;
363e48d15efSToby Isaac       case 7:
364e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
365e48d15efSToby Isaac         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
366e48d15efSToby Isaac         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
367e48d15efSToby Isaac         i2     = 7;
368e48d15efSToby Isaac         idiag += 49;
369e48d15efSToby Isaac         for (i=1; i<m; i++) {
370e48d15efSToby Isaac           v  = aa + 49*ai[i];
371e48d15efSToby Isaac           vi = aj + ai[i];
372e48d15efSToby Isaac           nz = diag[i] - ai[i];
373e48d15efSToby Isaac           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
374e48d15efSToby Isaac           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
375e48d15efSToby Isaac           while (nz--) {
376e48d15efSToby Isaac             idx = 7*(*vi++);
377e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
378e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
379e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
380e48d15efSToby Isaac             v  += 49;
381e48d15efSToby Isaac           }
382e48d15efSToby Isaac           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
383e48d15efSToby Isaac           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
384e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
385e48d15efSToby Isaac           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
386e48d15efSToby Isaac           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
387e48d15efSToby Isaac           idiag  += 49;
388e48d15efSToby Isaac           i2     += 7;
389e48d15efSToby Isaac         }
390e48d15efSToby Isaac         break;
391e48d15efSToby Isaac       default:
39296b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
393580bdb30SBarry Smith         ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr);
394de80f912SBarry Smith         i2     = bs;
395de80f912SBarry Smith         idiag += bs2;
396de80f912SBarry Smith         for (i=1; i<m; i++) {
397de80f912SBarry Smith           v  = aa + bs2*ai[i];
398de80f912SBarry Smith           vi = aj + ai[i];
399de80f912SBarry Smith           nz = diag[i] - ai[i];
400de80f912SBarry Smith 
401580bdb30SBarry Smith           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
402de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
403de80f912SBarry Smith           workt = work;
404de80f912SBarry Smith           for (j=0; j<nz; j++) {
405580bdb30SBarry Smith             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
406de80f912SBarry Smith             workt += bs;
407de80f912SBarry Smith           }
40896b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
409580bdb30SBarry Smith           ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr);
41096b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
411de80f912SBarry Smith 
412de80f912SBarry Smith           idiag += bs2;
413de80f912SBarry Smith           i2    += bs;
414de80f912SBarry Smith         }
415e48d15efSToby Isaac         break;
416e48d15efSToby Isaac       }
417de80f912SBarry Smith       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
418e48d15efSToby Isaac       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
419e48d15efSToby Isaac       xb = t;
420de80f912SBarry Smith     }
421e48d15efSToby Isaac     else xb = b;
422de80f912SBarry Smith     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
423e48d15efSToby Isaac       idiag = a->idiag+bs2*(a->mbs-1);
424e48d15efSToby Isaac       i2 = bs * (m-1);
425e48d15efSToby Isaac       switch (bs) {
426e48d15efSToby Isaac       case 1:
427e48d15efSToby Isaac         s[0]  = xb[i2];
428e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
429e48d15efSToby Isaac         x[i2] = xw[0];
430e48d15efSToby Isaac         i2   -= 1;
431e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
432e48d15efSToby Isaac           v  = aa + (diag[i]+1);
433e48d15efSToby Isaac           vi = aj + diag[i] + 1;
434e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
435e48d15efSToby Isaac           s[0] = xb[i2];
436e48d15efSToby Isaac           for (j=0; j<nz; j++) {
437e48d15efSToby Isaac             xw[0] = x[vi[j]];
438e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
439e48d15efSToby Isaac           }
440e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
441e48d15efSToby Isaac           x[i2]  = xw[0];
442e48d15efSToby Isaac           idiag -= 1;
443e48d15efSToby Isaac           i2    -= 1;
444e48d15efSToby Isaac         }
445e48d15efSToby Isaac         break;
446e48d15efSToby Isaac       case 2:
447e48d15efSToby Isaac         s[0]  = xb[i2]; s[1] = xb[i2+1];
448e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
449e48d15efSToby Isaac         x[i2] = xw[0]; x[i2+1] = xw[1];
450e48d15efSToby Isaac         i2    -= 2;
451e48d15efSToby Isaac         idiag -= 4;
452e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
453e48d15efSToby Isaac           v  = aa + 4*(diag[i] + 1);
454e48d15efSToby Isaac           vi = aj + diag[i] + 1;
455e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
456e48d15efSToby Isaac           s[0] = xb[i2]; s[1] = xb[i2+1];
457e48d15efSToby Isaac           for (j=0; j<nz; j++) {
458e48d15efSToby Isaac             idx = 2*vi[j];
459e48d15efSToby Isaac             it  = 4*j;
460e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx];
461e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
462e48d15efSToby Isaac           }
463e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
464e48d15efSToby Isaac           x[i2]   = xw[0]; x[i2+1] = xw[1];
465e48d15efSToby Isaac           idiag  -= 4;
466e48d15efSToby Isaac           i2     -= 2;
467e48d15efSToby Isaac         }
468e48d15efSToby Isaac         break;
469e48d15efSToby Isaac       case 3:
470e48d15efSToby Isaac         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
471e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
472e48d15efSToby Isaac         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
473e48d15efSToby Isaac         i2    -= 3;
474e48d15efSToby Isaac         idiag -= 9;
475e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
476e48d15efSToby Isaac           v  = aa + 9*(diag[i]+1);
477e48d15efSToby Isaac           vi = aj + diag[i] + 1;
478e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
479e48d15efSToby Isaac           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
480e48d15efSToby Isaac           while (nz--) {
481e48d15efSToby Isaac             idx = 3*(*vi++);
482e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
483e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
484e48d15efSToby Isaac             v  += 9;
485e48d15efSToby Isaac           }
486e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
487e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
488e48d15efSToby Isaac           idiag  -= 9;
489e48d15efSToby Isaac           i2     -= 3;
490e48d15efSToby Isaac         }
491e48d15efSToby Isaac         break;
492e48d15efSToby Isaac       case 4:
493e48d15efSToby Isaac         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
494e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
495e48d15efSToby Isaac         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
496e48d15efSToby Isaac         i2    -= 4;
497e48d15efSToby Isaac         idiag -= 16;
498e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
499e48d15efSToby Isaac           v  = aa + 16*(diag[i]+1);
500e48d15efSToby Isaac           vi = aj + diag[i] + 1;
501e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
502e48d15efSToby Isaac           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
503e48d15efSToby Isaac           while (nz--) {
504e48d15efSToby Isaac             idx = 4*(*vi++);
505e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
506e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
507e48d15efSToby Isaac             v  += 16;
508e48d15efSToby Isaac           }
509e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
510e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
511e48d15efSToby Isaac           idiag  -= 16;
512e48d15efSToby Isaac           i2     -= 4;
513e48d15efSToby Isaac         }
514e48d15efSToby Isaac         break;
515e48d15efSToby Isaac       case 5:
516e48d15efSToby Isaac         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
517e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
518e48d15efSToby Isaac         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
519e48d15efSToby Isaac         i2    -= 5;
520e48d15efSToby Isaac         idiag -= 25;
521e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
522e48d15efSToby Isaac           v  = aa + 25*(diag[i]+1);
523e48d15efSToby Isaac           vi = aj + diag[i] + 1;
524e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
525e48d15efSToby Isaac           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
526e48d15efSToby Isaac           while (nz--) {
527e48d15efSToby Isaac             idx = 5*(*vi++);
528e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
529e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
530e48d15efSToby Isaac             v  += 25;
531e48d15efSToby Isaac           }
532e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
533e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
534e48d15efSToby Isaac           idiag  -= 25;
535e48d15efSToby Isaac           i2     -= 5;
536e48d15efSToby Isaac         }
537e48d15efSToby Isaac         break;
538e48d15efSToby Isaac       case 6:
539e48d15efSToby Isaac         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
540e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
541e48d15efSToby Isaac         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
542e48d15efSToby Isaac         i2    -= 6;
543e48d15efSToby Isaac         idiag -= 36;
544e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
545e48d15efSToby Isaac           v  = aa + 36*(diag[i]+1);
546e48d15efSToby Isaac           vi = aj + diag[i] + 1;
547e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
548e48d15efSToby Isaac           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
549e48d15efSToby Isaac           while (nz--) {
550e48d15efSToby Isaac             idx = 6*(*vi++);
551e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
552e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
553e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
554e48d15efSToby Isaac             v  += 36;
555e48d15efSToby Isaac           }
556e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
557e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
558e48d15efSToby Isaac           idiag  -= 36;
559e48d15efSToby Isaac           i2     -= 6;
560e48d15efSToby Isaac         }
561e48d15efSToby Isaac         break;
562e48d15efSToby Isaac       case 7:
563e48d15efSToby Isaac         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
564e48d15efSToby Isaac         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
565e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
566e48d15efSToby Isaac         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
567e48d15efSToby Isaac         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
568e48d15efSToby Isaac         i2    -= 7;
569e48d15efSToby Isaac         idiag -= 49;
570e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
571e48d15efSToby Isaac           v  = aa + 49*(diag[i]+1);
572e48d15efSToby Isaac           vi = aj + diag[i] + 1;
573e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
574e48d15efSToby Isaac           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
575e48d15efSToby Isaac           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
576e48d15efSToby Isaac           while (nz--) {
577e48d15efSToby Isaac             idx = 7*(*vi++);
578e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
579e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
580e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
581e48d15efSToby Isaac             v  += 49;
582e48d15efSToby Isaac           }
583e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
584e48d15efSToby Isaac           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
585e48d15efSToby Isaac           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
586e48d15efSToby Isaac           idiag  -= 49;
587e48d15efSToby Isaac           i2     -= 7;
588e48d15efSToby Isaac         }
589e48d15efSToby Isaac         break;
590e48d15efSToby Isaac       default:
591580bdb30SBarry Smith         ierr  = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
59296b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
593de80f912SBarry Smith         i2    -= bs;
594e48d15efSToby Isaac         idiag -= bs2;
595de80f912SBarry Smith         for (i=m-2; i>=0; i--) {
596de80f912SBarry Smith           v  = aa + bs2*(diag[i]+1);
597de80f912SBarry Smith           vi = aj + diag[i] + 1;
598de80f912SBarry Smith           nz = ai[i+1] - diag[i] - 1;
599de80f912SBarry Smith 
600580bdb30SBarry Smith           ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
601de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
602de80f912SBarry Smith           workt = work;
603de80f912SBarry Smith           for (j=0; j<nz; j++) {
604580bdb30SBarry Smith             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
605de80f912SBarry Smith             workt += bs;
606de80f912SBarry Smith           }
60796b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
60896b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
609e48d15efSToby Isaac 
610de80f912SBarry Smith           idiag -= bs2;
611de80f912SBarry Smith           i2    -= bs;
612de80f912SBarry Smith         }
613e48d15efSToby Isaac         break;
614e48d15efSToby Isaac       }
615de80f912SBarry Smith       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
616de80f912SBarry Smith     }
617e48d15efSToby Isaac     its--;
618e48d15efSToby Isaac   }
619e48d15efSToby Isaac   while (its--) {
620e48d15efSToby Isaac     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
621e48d15efSToby Isaac       idiag = a->idiag;
622e48d15efSToby Isaac       i2 = 0;
623e48d15efSToby Isaac       switch (bs) {
624e48d15efSToby Isaac       case 1:
625e48d15efSToby Isaac         for (i=0; i<m; i++) {
626e48d15efSToby Isaac           v  = aa + ai[i];
627e48d15efSToby Isaac           vi = aj + ai[i];
628e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
629e48d15efSToby Isaac           s[0] = b[i2];
630e48d15efSToby Isaac           for (j=0; j<nz; j++) {
631e48d15efSToby Isaac             xw[0] = x[vi[j]];
632e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
633e48d15efSToby Isaac           }
634e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
635e48d15efSToby Isaac           x[i2] += xw[0];
636e48d15efSToby Isaac           idiag += 1;
637e48d15efSToby Isaac           i2    += 1;
638e48d15efSToby Isaac         }
639e48d15efSToby Isaac         break;
640e48d15efSToby Isaac       case 2:
641e48d15efSToby Isaac         for (i=0; i<m; i++) {
642e48d15efSToby Isaac           v  = aa + 4*ai[i];
643e48d15efSToby Isaac           vi = aj + ai[i];
644e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
645e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1];
646e48d15efSToby Isaac           for (j=0; j<nz; j++) {
647e48d15efSToby Isaac             idx = 2*vi[j];
648e48d15efSToby Isaac             it  = 4*j;
649e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx];
650e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
651e48d15efSToby Isaac           }
652e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
653e48d15efSToby Isaac           x[i2]  += xw[0]; x[i2+1] += xw[1];
654e48d15efSToby Isaac           idiag  += 4;
655e48d15efSToby Isaac           i2     += 2;
656e48d15efSToby Isaac         }
657e48d15efSToby Isaac         break;
658e48d15efSToby Isaac       case 3:
659e48d15efSToby Isaac         for (i=0; i<m; i++) {
660e48d15efSToby Isaac           v  = aa + 9*ai[i];
661e48d15efSToby Isaac           vi = aj + ai[i];
662e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
663e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
664e48d15efSToby Isaac           while (nz--) {
665e48d15efSToby Isaac             idx = 3*(*vi++);
666e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
667e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
668e48d15efSToby Isaac             v  += 9;
669e48d15efSToby Isaac           }
670e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
671e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
672e48d15efSToby Isaac           idiag  += 9;
673e48d15efSToby Isaac           i2     += 3;
674e48d15efSToby Isaac         }
675e48d15efSToby Isaac         break;
676e48d15efSToby Isaac       case 4:
677e48d15efSToby Isaac         for (i=0; i<m; i++) {
678e48d15efSToby Isaac           v  = aa + 16*ai[i];
679e48d15efSToby Isaac           vi = aj + ai[i];
680e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
681e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
682e48d15efSToby Isaac           while (nz--) {
683e48d15efSToby Isaac             idx = 4*(*vi++);
684e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
685e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
686e48d15efSToby Isaac             v  += 16;
687e48d15efSToby Isaac           }
688e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
689e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
690e48d15efSToby Isaac           idiag  += 16;
691e48d15efSToby Isaac           i2     += 4;
692e48d15efSToby Isaac         }
693e48d15efSToby Isaac         break;
694e48d15efSToby Isaac       case 5:
695e48d15efSToby Isaac         for (i=0; i<m; i++) {
696e48d15efSToby Isaac           v  = aa + 25*ai[i];
697e48d15efSToby Isaac           vi = aj + ai[i];
698e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
699e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
700e48d15efSToby Isaac           while (nz--) {
701e48d15efSToby Isaac             idx = 5*(*vi++);
702e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
703e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
704e48d15efSToby Isaac             v  += 25;
705e48d15efSToby Isaac           }
706e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
707e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
708e48d15efSToby Isaac           idiag  += 25;
709e48d15efSToby Isaac           i2     += 5;
710e48d15efSToby Isaac         }
711e48d15efSToby Isaac         break;
712e48d15efSToby Isaac       case 6:
713e48d15efSToby Isaac         for (i=0; i<m; i++) {
714e48d15efSToby Isaac           v  = aa + 36*ai[i];
715e48d15efSToby Isaac           vi = aj + ai[i];
716e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
717e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
718e48d15efSToby Isaac           while (nz--) {
719e48d15efSToby Isaac             idx = 6*(*vi++);
720e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
721e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
722e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
723e48d15efSToby Isaac             v  += 36;
724e48d15efSToby Isaac           }
725e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
726e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
727e48d15efSToby Isaac           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
728e48d15efSToby Isaac           idiag  += 36;
729e48d15efSToby Isaac           i2     += 6;
730e48d15efSToby Isaac         }
731e48d15efSToby Isaac         break;
732e48d15efSToby Isaac       case 7:
733e48d15efSToby Isaac         for (i=0; i<m; i++) {
734e48d15efSToby Isaac           v  = aa + 49*ai[i];
735e48d15efSToby Isaac           vi = aj + ai[i];
736e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
737e48d15efSToby Isaac           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
738e48d15efSToby Isaac           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
739e48d15efSToby Isaac           while (nz--) {
740e48d15efSToby Isaac             idx = 7*(*vi++);
741e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
742e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
743e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
744e48d15efSToby Isaac             v  += 49;
745e48d15efSToby Isaac           }
746e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
747e48d15efSToby Isaac           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
748e48d15efSToby Isaac           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
749e48d15efSToby Isaac           idiag  += 49;
750e48d15efSToby Isaac           i2     += 7;
751e48d15efSToby Isaac         }
752e48d15efSToby Isaac         break;
753e48d15efSToby Isaac       default:
754e48d15efSToby Isaac         for (i=0; i<m; i++) {
755e48d15efSToby Isaac           v  = aa + bs2*ai[i];
756e48d15efSToby Isaac           vi = aj + ai[i];
757e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
758e48d15efSToby Isaac 
759580bdb30SBarry Smith           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
760e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
761e48d15efSToby Isaac           workt = work;
762e48d15efSToby Isaac           for (j=0; j<nz; j++) {
763580bdb30SBarry Smith             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
764e48d15efSToby Isaac             workt += bs;
765e48d15efSToby Isaac           }
766e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
767e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
768e48d15efSToby Isaac 
769e48d15efSToby Isaac           idiag += bs2;
770e48d15efSToby Isaac           i2    += bs;
771e48d15efSToby Isaac         }
772e48d15efSToby Isaac         break;
773e48d15efSToby Isaac       }
774e48d15efSToby Isaac       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
775e48d15efSToby Isaac     }
776e48d15efSToby Isaac     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
777e48d15efSToby Isaac       idiag = a->idiag+bs2*(a->mbs-1);
778e48d15efSToby Isaac       i2 = bs * (m-1);
779e48d15efSToby Isaac       switch (bs) {
780e48d15efSToby Isaac       case 1:
781e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
782e48d15efSToby Isaac           v  = aa + ai[i];
783e48d15efSToby Isaac           vi = aj + ai[i];
784e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
785e48d15efSToby Isaac           s[0] = b[i2];
786e48d15efSToby Isaac           for (j=0; j<nz; j++) {
787e48d15efSToby Isaac             xw[0] = x[vi[j]];
788e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
789e48d15efSToby Isaac           }
790e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
791e48d15efSToby Isaac           x[i2] += xw[0];
792e48d15efSToby Isaac           idiag -= 1;
793e48d15efSToby Isaac           i2    -= 1;
794e48d15efSToby Isaac         }
795e48d15efSToby Isaac         break;
796e48d15efSToby Isaac       case 2:
797e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
798e48d15efSToby Isaac           v  = aa + 4*ai[i];
799e48d15efSToby Isaac           vi = aj + ai[i];
800e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
801e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1];
802e48d15efSToby Isaac           for (j=0; j<nz; j++) {
803e48d15efSToby Isaac             idx = 2*vi[j];
804e48d15efSToby Isaac             it  = 4*j;
805e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx];
806e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
807e48d15efSToby Isaac           }
808e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
809e48d15efSToby Isaac           x[i2]  += xw[0]; x[i2+1] += xw[1];
810e48d15efSToby Isaac           idiag  -= 4;
811e48d15efSToby Isaac           i2     -= 2;
812e48d15efSToby Isaac         }
813e48d15efSToby Isaac         break;
814e48d15efSToby Isaac       case 3:
815e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
816e48d15efSToby Isaac           v  = aa + 9*ai[i];
817e48d15efSToby Isaac           vi = aj + ai[i];
818e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
819e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
820e48d15efSToby Isaac           while (nz--) {
821e48d15efSToby Isaac             idx = 3*(*vi++);
822e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
823e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
824e48d15efSToby Isaac             v  += 9;
825e48d15efSToby Isaac           }
826e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
827e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
828e48d15efSToby Isaac           idiag  -= 9;
829e48d15efSToby Isaac           i2     -= 3;
830e48d15efSToby Isaac         }
831e48d15efSToby Isaac         break;
832e48d15efSToby Isaac       case 4:
833e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
834e48d15efSToby Isaac           v  = aa + 16*ai[i];
835e48d15efSToby Isaac           vi = aj + ai[i];
836e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
837e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
838e48d15efSToby Isaac           while (nz--) {
839e48d15efSToby Isaac             idx = 4*(*vi++);
840e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
841e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
842e48d15efSToby Isaac             v  += 16;
843e48d15efSToby Isaac           }
844e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
845e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
846e48d15efSToby Isaac           idiag  -= 16;
847e48d15efSToby Isaac           i2     -= 4;
848e48d15efSToby Isaac         }
849e48d15efSToby Isaac         break;
850e48d15efSToby Isaac       case 5:
851e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
852e48d15efSToby Isaac           v  = aa + 25*ai[i];
853e48d15efSToby Isaac           vi = aj + ai[i];
854e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
855e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
856e48d15efSToby Isaac           while (nz--) {
857e48d15efSToby Isaac             idx = 5*(*vi++);
858e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
859e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
860e48d15efSToby Isaac             v  += 25;
861e48d15efSToby Isaac           }
862e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
863e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
864e48d15efSToby Isaac           idiag  -= 25;
865e48d15efSToby Isaac           i2     -= 5;
866e48d15efSToby Isaac         }
867e48d15efSToby Isaac         break;
868e48d15efSToby Isaac       case 6:
869e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
870e48d15efSToby Isaac           v  = aa + 36*ai[i];
871e48d15efSToby Isaac           vi = aj + ai[i];
872e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
873e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
874e48d15efSToby Isaac           while (nz--) {
875e48d15efSToby Isaac             idx = 6*(*vi++);
876e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
877e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
878e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
879e48d15efSToby Isaac             v  += 36;
880e48d15efSToby Isaac           }
881e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
882e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
883e48d15efSToby Isaac           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
884e48d15efSToby Isaac           idiag  -= 36;
885e48d15efSToby Isaac           i2     -= 6;
886e48d15efSToby Isaac         }
887e48d15efSToby Isaac         break;
888e48d15efSToby Isaac       case 7:
889e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
890e48d15efSToby Isaac           v  = aa + 49*ai[i];
891e48d15efSToby Isaac           vi = aj + ai[i];
892e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
893e48d15efSToby Isaac           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
894e48d15efSToby Isaac           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
895e48d15efSToby Isaac           while (nz--) {
896e48d15efSToby Isaac             idx = 7*(*vi++);
897e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
898e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
899e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
900e48d15efSToby Isaac             v  += 49;
901e48d15efSToby Isaac           }
902e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
903e48d15efSToby Isaac           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
904e48d15efSToby Isaac           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
905e48d15efSToby Isaac           idiag  -= 49;
906e48d15efSToby Isaac           i2     -= 7;
907e48d15efSToby Isaac         }
908e48d15efSToby Isaac         break;
909e48d15efSToby Isaac       default:
910e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
911e48d15efSToby Isaac           v  = aa + bs2*ai[i];
912e48d15efSToby Isaac           vi = aj + ai[i];
913e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
914e48d15efSToby Isaac 
915580bdb30SBarry Smith           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
916e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
917e48d15efSToby Isaac           workt = work;
918e48d15efSToby Isaac           for (j=0; j<nz; j++) {
919580bdb30SBarry Smith             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
920e48d15efSToby Isaac             workt += bs;
921e48d15efSToby Isaac           }
922e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
923e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
924e48d15efSToby Isaac 
925e48d15efSToby Isaac           idiag -= bs2;
926e48d15efSToby Isaac           i2    -= bs;
927e48d15efSToby Isaac         }
928e48d15efSToby Isaac         break;
929e48d15efSToby Isaac       }
930e48d15efSToby Isaac       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
931e48d15efSToby Isaac     }
932e48d15efSToby Isaac   }
933de80f912SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
934de80f912SBarry Smith   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
935de80f912SBarry Smith   PetscFunctionReturn(0);
936de80f912SBarry Smith }
937de80f912SBarry Smith 
938af674e45SBarry Smith /*
93981824310SBarry Smith     Special version for direct calls from Fortran (Used in PETSc-fun3d)
940af674e45SBarry Smith */
941af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
942af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
943af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
944af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4
945af674e45SBarry Smith #endif
946af674e45SBarry Smith 
9478cc058d9SJed Brown PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
948af674e45SBarry Smith {
949af674e45SBarry Smith   Mat               A  = *AA;
950af674e45SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
951c1ac3661SBarry Smith   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
952c1ac3661SBarry Smith   PetscInt          *ai    =a->i,*ailen=a->ilen;
95317ec6a02SBarry Smith   PetscInt          *aj    =a->j,stepval,lastcol = -1;
954f15d580aSBarry Smith   const PetscScalar *value = v;
9554bb09213Spetsc   MatScalar         *ap,*aa = a->a,*bap;
95670990e77SSatish Balay   PetscErrorCode    ierr;
957af674e45SBarry Smith 
958af674e45SBarry Smith   PetscFunctionBegin;
959ce94432eSBarry Smith   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
960af674e45SBarry Smith   stepval = (n-1)*4;
961af674e45SBarry Smith   for (k=0; k<m; k++) { /* loop over added rows */
962af674e45SBarry Smith     row  = im[k];
963af674e45SBarry Smith     rp   = aj + ai[row];
964af674e45SBarry Smith     ap   = aa + 16*ai[row];
965af674e45SBarry Smith     nrow = ailen[row];
966af674e45SBarry Smith     low  = 0;
96717ec6a02SBarry Smith     high = nrow;
968af674e45SBarry Smith     for (l=0; l<n; l++) { /* loop over added columns */
969af674e45SBarry Smith       col = in[l];
970db4deed7SKarl Rupp       if (col <= lastcol)  low = 0;
971db4deed7SKarl Rupp       else                high = nrow;
97217ec6a02SBarry Smith       lastcol = col;
9731e3347e8SBarry Smith       value   = v + k*(stepval+4 + l)*4;
974af674e45SBarry Smith       while (high-low > 7) {
975af674e45SBarry Smith         t = (low+high)/2;
976af674e45SBarry Smith         if (rp[t] > col) high = t;
977af674e45SBarry Smith         else             low  = t;
978af674e45SBarry Smith       }
979af674e45SBarry Smith       for (i=low; i<high; i++) {
980af674e45SBarry Smith         if (rp[i] > col) break;
981af674e45SBarry Smith         if (rp[i] == col) {
982af674e45SBarry Smith           bap = ap +  16*i;
983af674e45SBarry Smith           for (ii=0; ii<4; ii++,value+=stepval) {
984af674e45SBarry Smith             for (jj=ii; jj<16; jj+=4) {
985af674e45SBarry Smith               bap[jj] += *value++;
986af674e45SBarry Smith             }
987af674e45SBarry Smith           }
988af674e45SBarry Smith           goto noinsert2;
989af674e45SBarry Smith         }
990af674e45SBarry Smith       }
991af674e45SBarry Smith       N = nrow++ - 1;
99217ec6a02SBarry Smith       high++; /* added new column index thus must search to one higher than before */
993af674e45SBarry Smith       /* shift up all the later entries in this row */
994af674e45SBarry Smith       for (ii=N; ii>=i; ii--) {
995af674e45SBarry Smith         rp[ii+1] = rp[ii];
99670990e77SSatish Balay         ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr);
997af674e45SBarry Smith       }
998af674e45SBarry Smith       if (N >= i) {
99970990e77SSatish Balay         ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1000af674e45SBarry Smith       }
1001af674e45SBarry Smith       rp[i] = col;
1002af674e45SBarry Smith       bap   = ap +  16*i;
1003af674e45SBarry Smith       for (ii=0; ii<4; ii++,value+=stepval) {
1004af674e45SBarry Smith         for (jj=ii; jj<16; jj+=4) {
1005af674e45SBarry Smith           bap[jj] = *value++;
1006af674e45SBarry Smith         }
1007af674e45SBarry Smith       }
1008af674e45SBarry Smith       noinsert2:;
1009af674e45SBarry Smith       low = i;
1010af674e45SBarry Smith     }
1011af674e45SBarry Smith     ailen[row] = nrow;
1012af674e45SBarry Smith   }
1013be1d678aSKris Buschelman   PetscFunctionReturnVoid();
1014af674e45SBarry Smith }
1015af674e45SBarry Smith 
1016af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
1017af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4
1018af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1019af674e45SBarry Smith #define matsetvalues4_ matsetvalues4
1020af674e45SBarry Smith #endif
1021af674e45SBarry Smith 
10228cc058d9SJed Brown PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1023af674e45SBarry Smith {
1024af674e45SBarry Smith   Mat         A  = *AA;
1025af674e45SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1026580bdb30SBarry Smith   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
1027c1ac3661SBarry Smith   PetscInt    *ai=a->i,*ailen=a->ilen;
1028c1ac3661SBarry Smith   PetscInt    *aj=a->j,brow,bcol;
102917ec6a02SBarry Smith   PetscInt    ridx,cidx,lastcol = -1;
1030af674e45SBarry Smith   MatScalar   *ap,value,*aa=a->a,*bap;
103170990e77SSatish Balay   PetscErrorCode ierr;
1032af674e45SBarry Smith 
1033af674e45SBarry Smith   PetscFunctionBegin;
1034af674e45SBarry Smith   for (k=0; k<m; k++) { /* loop over added rows */
1035af674e45SBarry Smith     row  = im[k]; brow = row/4;
1036af674e45SBarry Smith     rp   = aj + ai[brow];
1037af674e45SBarry Smith     ap   = aa + 16*ai[brow];
1038af674e45SBarry Smith     nrow = ailen[brow];
1039af674e45SBarry Smith     low  = 0;
104017ec6a02SBarry Smith     high = nrow;
1041af674e45SBarry Smith     for (l=0; l<n; l++) { /* loop over added columns */
1042af674e45SBarry Smith       col   = in[l]; bcol = col/4;
1043af674e45SBarry Smith       ridx  = row % 4; cidx = col % 4;
1044af674e45SBarry Smith       value = v[l + k*n];
1045db4deed7SKarl Rupp       if (col <= lastcol)  low = 0;
1046db4deed7SKarl Rupp       else                high = nrow;
104717ec6a02SBarry Smith       lastcol = col;
1048af674e45SBarry Smith       while (high-low > 7) {
1049af674e45SBarry Smith         t = (low+high)/2;
1050af674e45SBarry Smith         if (rp[t] > bcol) high = t;
1051af674e45SBarry Smith         else              low  = t;
1052af674e45SBarry Smith       }
1053af674e45SBarry Smith       for (i=low; i<high; i++) {
1054af674e45SBarry Smith         if (rp[i] > bcol) break;
1055af674e45SBarry Smith         if (rp[i] == bcol) {
1056af674e45SBarry Smith           bap   = ap +  16*i + 4*cidx + ridx;
1057af674e45SBarry Smith           *bap += value;
1058af674e45SBarry Smith           goto noinsert1;
1059af674e45SBarry Smith         }
1060af674e45SBarry Smith       }
1061af674e45SBarry Smith       N = nrow++ - 1;
106217ec6a02SBarry Smith       high++; /* added new column thus must search to one higher than before */
1063af674e45SBarry Smith       /* shift up all the later entries in this row */
106470990e77SSatish Balay       ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr);
106570990e77SSatish Balay       ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr);
106670990e77SSatish Balay       ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1067af674e45SBarry Smith       rp[i]                    = bcol;
1068af674e45SBarry Smith       ap[16*i + 4*cidx + ridx] = value;
1069af674e45SBarry Smith noinsert1:;
1070af674e45SBarry Smith       low = i;
1071af674e45SBarry Smith     }
1072af674e45SBarry Smith     ailen[brow] = nrow;
1073af674e45SBarry Smith   }
1074be1d678aSKris Buschelman   PetscFunctionReturnVoid();
1075af674e45SBarry Smith }
1076af674e45SBarry Smith 
1077be5855fcSBarry Smith /*
1078be5855fcSBarry Smith      Checks for missing diagonals
1079be5855fcSBarry Smith */
1080ace3abfcSBarry Smith PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1081be5855fcSBarry Smith {
1082be5855fcSBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
10836849ba73SBarry Smith   PetscErrorCode ierr;
10847734d3b5SMatthew G. Knepley   PetscInt       *diag,*ii = a->i,i;
1085be5855fcSBarry Smith 
1086be5855fcSBarry Smith   PetscFunctionBegin;
1087c4992f7dSBarry Smith   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
10882af78befSBarry Smith   *missing = PETSC_FALSE;
10897734d3b5SMatthew G. Knepley   if (A->rmap->n > 0 && !ii) {
10902efa7f71SHong Zhang     *missing = PETSC_TRUE;
10912efa7f71SHong Zhang     if (d) *d = 0;
1092994fe344SLisandro Dalcin     ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
10932efa7f71SHong Zhang   } else {
109401445905SHong Zhang     PetscInt n;
109501445905SHong Zhang     n = PetscMin(a->mbs, a->nbs);
1096883fce79SBarry Smith     diag = a->diag;
109701445905SHong Zhang     for (i=0; i<n; i++) {
10987734d3b5SMatthew G. Knepley       if (diag[i] >= ii[i+1]) {
10992af78befSBarry Smith         *missing = PETSC_TRUE;
11002af78befSBarry Smith         if (d) *d = i;
1101994fe344SLisandro Dalcin         ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr);
1102358d2f5dSShri Abhyankar         break;
11032efa7f71SHong Zhang       }
1104be5855fcSBarry Smith     }
1105be5855fcSBarry Smith   }
1106be5855fcSBarry Smith   PetscFunctionReturn(0);
1107be5855fcSBarry Smith }
1108be5855fcSBarry Smith 
1109dfbe8321SBarry Smith PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1110de6a44a3SBarry Smith {
1111de6a44a3SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
11126849ba73SBarry Smith   PetscErrorCode ierr;
111309f38230SBarry Smith   PetscInt       i,j,m = a->mbs;
1114de6a44a3SBarry Smith 
11153a40ed3dSBarry Smith   PetscFunctionBegin;
111609f38230SBarry Smith   if (!a->diag) {
1117785e854fSJed Brown     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
11183bb1ff40SBarry Smith     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
11194fd072dbSBarry Smith     a->free_diag = PETSC_TRUE;
112009f38230SBarry Smith   }
11217fc0212eSBarry Smith   for (i=0; i<m; i++) {
112209f38230SBarry Smith     a->diag[i] = a->i[i+1];
1123de6a44a3SBarry Smith     for (j=a->i[i]; j<a->i[i+1]; j++) {
1124de6a44a3SBarry Smith       if (a->j[j] == i) {
112509f38230SBarry Smith         a->diag[i] = j;
1126de6a44a3SBarry Smith         break;
1127de6a44a3SBarry Smith       }
1128de6a44a3SBarry Smith     }
1129de6a44a3SBarry Smith   }
11303a40ed3dSBarry Smith   PetscFunctionReturn(0);
1131de6a44a3SBarry Smith }
11322593348eSBarry Smith 
11331a83f524SJed Brown static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
11343b2fbd54SBarry Smith {
11353b2fbd54SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1136dfbe8321SBarry Smith   PetscErrorCode ierr;
11371a83f524SJed Brown   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
11381a83f524SJed Brown   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
11393b2fbd54SBarry Smith 
11403a40ed3dSBarry Smith   PetscFunctionBegin;
11413b2fbd54SBarry Smith   *nn = n;
11423a40ed3dSBarry Smith   if (!ia) PetscFunctionReturn(0);
11433b2fbd54SBarry Smith   if (symmetric) {
11442462f5fdSStefano Zampini     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1145553b3c51SBarry Smith     nz   = tia[n];
11463b2fbd54SBarry Smith   } else {
11478f7157efSSatish Balay     tia = a->i; tja = a->j;
11483b2fbd54SBarry Smith   }
11493b2fbd54SBarry Smith 
1150ecc77c7aSBarry Smith   if (!blockcompressed && bs > 1) {
1151ecc77c7aSBarry Smith     (*nn) *= bs;
11528f7157efSSatish Balay     /* malloc & create the natural set of indices */
1153785e854fSJed Brown     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
11549985e31cSBarry Smith     if (n) {
11552462f5fdSStefano Zampini       (*ia)[0] = oshift;
1156ecc77c7aSBarry Smith       for (j=1; j<bs; j++) {
1157ecc77c7aSBarry Smith         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1158ecc77c7aSBarry Smith       }
11599985e31cSBarry Smith     }
1160ecc77c7aSBarry Smith 
1161ecc77c7aSBarry Smith     for (i=1; i<n; i++) {
1162ecc77c7aSBarry Smith       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1163ecc77c7aSBarry Smith       for (j=1; j<bs; j++) {
1164ecc77c7aSBarry Smith         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
11658f7157efSSatish Balay       }
11668f7157efSSatish Balay     }
11679985e31cSBarry Smith     if (n) {
1168ecc77c7aSBarry Smith       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
11699985e31cSBarry Smith     }
1170ecc77c7aSBarry Smith 
11711a83f524SJed Brown     if (inja) {
1172785e854fSJed Brown       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
11739985e31cSBarry Smith       cnt = 0;
11749985e31cSBarry Smith       for (i=0; i<n; i++) {
11759985e31cSBarry Smith         for (j=0; j<bs; j++) {
11769985e31cSBarry Smith           for (k=tia[i]; k<tia[i+1]; k++) {
11779985e31cSBarry Smith             for (l=0; l<bs; l++) {
11789985e31cSBarry Smith               (*ja)[cnt++] = bs*tja[k] + l;
11799985e31cSBarry Smith             }
11809985e31cSBarry Smith           }
11819985e31cSBarry Smith         }
11829985e31cSBarry Smith       }
11839985e31cSBarry Smith     }
11849985e31cSBarry Smith 
11858f7157efSSatish Balay     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
11868f7157efSSatish Balay       ierr = PetscFree(tia);CHKERRQ(ierr);
11878f7157efSSatish Balay       ierr = PetscFree(tja);CHKERRQ(ierr);
11888f7157efSSatish Balay     }
1189f6d58c54SBarry Smith   } else if (oshift == 1) {
1190715a17b5SBarry Smith     if (symmetric) {
1191a2ea699eSBarry Smith       nz = tia[A->rmap->n/bs];
1192715a17b5SBarry Smith       /*  add 1 to i and j indices */
1193715a17b5SBarry Smith       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1194715a17b5SBarry Smith       *ia = tia;
1195715a17b5SBarry Smith       if (ja) {
1196715a17b5SBarry Smith         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1197715a17b5SBarry Smith         *ja = tja;
1198715a17b5SBarry Smith       }
1199715a17b5SBarry Smith     } else {
1200a2ea699eSBarry Smith       nz = a->i[A->rmap->n/bs];
1201f6d58c54SBarry Smith       /* malloc space and  add 1 to i and j indices */
1202854ce69bSBarry Smith       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1203f6d58c54SBarry Smith       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1204f6d58c54SBarry Smith       if (ja) {
1205785e854fSJed Brown         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1206f6d58c54SBarry Smith         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1207f6d58c54SBarry Smith       }
1208715a17b5SBarry Smith     }
12098f7157efSSatish Balay   } else {
12108f7157efSSatish Balay     *ia = tia;
1211ecc77c7aSBarry Smith     if (ja) *ja = tja;
12128f7157efSSatish Balay   }
12133a40ed3dSBarry Smith   PetscFunctionReturn(0);
12143b2fbd54SBarry Smith }
12153b2fbd54SBarry Smith 
12161a83f524SJed Brown static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
12173b2fbd54SBarry Smith {
12186849ba73SBarry Smith   PetscErrorCode ierr;
12193b2fbd54SBarry Smith 
12203a40ed3dSBarry Smith   PetscFunctionBegin;
12213a40ed3dSBarry Smith   if (!ia) PetscFunctionReturn(0);
1222715a17b5SBarry Smith   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1223606d414cSSatish Balay     ierr = PetscFree(*ia);CHKERRQ(ierr);
12249985e31cSBarry Smith     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
12253b2fbd54SBarry Smith   }
12263a40ed3dSBarry Smith   PetscFunctionReturn(0);
12273b2fbd54SBarry Smith }
12283b2fbd54SBarry Smith 
1229dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
12302d61bbb3SSatish Balay {
12312d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1232dfbe8321SBarry Smith   PetscErrorCode ierr;
12332d61bbb3SSatish Balay 
1234433994e6SBarry Smith   PetscFunctionBegin;
1235aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1236d0f46423SBarry Smith   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
12372d61bbb3SSatish Balay #endif
1238e6b907acSBarry Smith   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
12396bf464f9SBarry Smith   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
12406bf464f9SBarry Smith   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
12414fd072dbSBarry Smith   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
124205b42c5fSBarry Smith   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
12434fd072dbSBarry Smith   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
124405b42c5fSBarry Smith   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
124505b42c5fSBarry Smith   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1246f361c04dSBarry Smith   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1247de80f912SBarry Smith   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
12486bf464f9SBarry Smith   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
124905b42c5fSBarry Smith   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1250cd6b891eSBarry Smith   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1251c4319e64SHong Zhang 
12526bf464f9SBarry Smith   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
12536bf464f9SBarry Smith   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1254bf0cc555SLisandro Dalcin   ierr = PetscFree(A->data);CHKERRQ(ierr);
1255901853e0SKris Buschelman 
1256f4259b30SLisandro Dalcin   ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr);
1257cda14afcSprj-   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr);
1258cda14afcSprj-   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr);
1259bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1260bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1261bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1262bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1263bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1264bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1265bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1266bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1267bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1268bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
12697ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
1270c9225affSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr);
12717ea3e4caSstefano_zampini #endif
1272c9225affSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr);
12732d61bbb3SSatish Balay   PetscFunctionReturn(0);
12742d61bbb3SSatish Balay }
12752d61bbb3SSatish Balay 
1276ace3abfcSBarry Smith PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
12772d61bbb3SSatish Balay {
12782d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
127963ba0a88SBarry Smith   PetscErrorCode ierr;
12802d61bbb3SSatish Balay 
12812d61bbb3SSatish Balay   PetscFunctionBegin;
1282aa275fccSKris Buschelman   switch (op) {
1283aa275fccSKris Buschelman   case MAT_ROW_ORIENTED:
12844e0d8c25SBarry Smith     a->roworiented = flg;
1285aa275fccSKris Buschelman     break;
1286a9817697SBarry Smith   case MAT_KEEP_NONZERO_PATTERN:
1287a9817697SBarry Smith     a->keepnonzeropattern = flg;
1288aa275fccSKris Buschelman     break;
1289512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
1290512a5fc5SBarry Smith     a->nonew = (flg ? 0 : 1);
1291aa275fccSKris Buschelman     break;
1292aa275fccSKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
12934e0d8c25SBarry Smith     a->nonew = (flg ? -1 : 0);
1294aa275fccSKris Buschelman     break;
1295aa275fccSKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
12964e0d8c25SBarry Smith     a->nonew = (flg ? -2 : 0);
1297aa275fccSKris Buschelman     break;
129828b2fa4aSMatthew Knepley   case MAT_UNUSED_NONZERO_LOCATION_ERR:
129928b2fa4aSMatthew Knepley     a->nounused = (flg ? -1 : 0);
130028b2fa4aSMatthew Knepley     break;
13018c78258cSHong Zhang   case MAT_FORCE_DIAGONAL_ENTRIES:
1302aa275fccSKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
1303aa275fccSKris Buschelman   case MAT_USE_HASH_TABLE:
1304071fcb05SBarry Smith   case MAT_SORTED_FULL:
1305290bbb0aSBarry Smith     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1306aa275fccSKris Buschelman     break;
13075021d80fSJed Brown   case MAT_SPD:
130877e54ba9SKris Buschelman   case MAT_SYMMETRIC:
130977e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
13109a4540c5SBarry Smith   case MAT_HERMITIAN:
13119a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
1312c10200c1SHong Zhang   case MAT_SUBMAT_SINGLEIS:
1313672ba085SHong Zhang   case MAT_STRUCTURE_ONLY:
13145021d80fSJed Brown     /* These options are handled directly by MatSetOption() */
131577e54ba9SKris Buschelman     break;
1316aa275fccSKris Buschelman   default:
1317e32f2f54SBarry Smith     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
13182d61bbb3SSatish Balay   }
13192d61bbb3SSatish Balay   PetscFunctionReturn(0);
13202d61bbb3SSatish Balay }
13212d61bbb3SSatish Balay 
132252768537SHong Zhang /* used for both SeqBAIJ and SeqSBAIJ matrices */
132352768537SHong Zhang PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
13242d61bbb3SSatish Balay {
13256849ba73SBarry Smith   PetscErrorCode ierr;
132652768537SHong Zhang   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
132752768537SHong Zhang   MatScalar      *aa_i;
132887828ca2SBarry Smith   PetscScalar    *v_i;
13292d61bbb3SSatish Balay 
13302d61bbb3SSatish Balay   PetscFunctionBegin;
1331d0f46423SBarry Smith   bs  = A->rmap->bs;
133252768537SHong Zhang   bs2 = bs*bs;
1333e32f2f54SBarry Smith   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
13342d61bbb3SSatish Balay 
13352d61bbb3SSatish Balay   bn  = row/bs;   /* Block number */
13362d61bbb3SSatish Balay   bp  = row % bs; /* Block Position */
13372d61bbb3SSatish Balay   M   = ai[bn+1] - ai[bn];
13382d61bbb3SSatish Balay   *nz = bs*M;
13392d61bbb3SSatish Balay 
13402d61bbb3SSatish Balay   if (v) {
1341f4259b30SLisandro Dalcin     *v = NULL;
13422d61bbb3SSatish Balay     if (*nz) {
1343854ce69bSBarry Smith       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
13442d61bbb3SSatish Balay       for (i=0; i<M; i++) { /* for each block in the block row */
13452d61bbb3SSatish Balay         v_i  = *v + i*bs;
13462d61bbb3SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
134726fbe8dcSKarl Rupp         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
13482d61bbb3SSatish Balay       }
13492d61bbb3SSatish Balay     }
13502d61bbb3SSatish Balay   }
13512d61bbb3SSatish Balay 
13522d61bbb3SSatish Balay   if (idx) {
1353f4259b30SLisandro Dalcin     *idx = NULL;
13542d61bbb3SSatish Balay     if (*nz) {
1355854ce69bSBarry Smith       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
13562d61bbb3SSatish Balay       for (i=0; i<M; i++) { /* for each block in the block row */
13572d61bbb3SSatish Balay         idx_i = *idx + i*bs;
13582d61bbb3SSatish Balay         itmp  = bs*aj[ai[bn] + i];
135926fbe8dcSKarl Rupp         for (j=0; j<bs; j++) idx_i[j] = itmp++;
13602d61bbb3SSatish Balay       }
13612d61bbb3SSatish Balay     }
13622d61bbb3SSatish Balay   }
13632d61bbb3SSatish Balay   PetscFunctionReturn(0);
13642d61bbb3SSatish Balay }
13652d61bbb3SSatish Balay 
136652768537SHong Zhang PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
136752768537SHong Zhang {
136852768537SHong Zhang   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
136952768537SHong Zhang   PetscErrorCode ierr;
137052768537SHong Zhang 
137152768537SHong Zhang   PetscFunctionBegin;
137252768537SHong Zhang   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
137352768537SHong Zhang   PetscFunctionReturn(0);
137452768537SHong Zhang }
137552768537SHong Zhang 
1376c1ac3661SBarry Smith PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
13772d61bbb3SSatish Balay {
1378dfbe8321SBarry Smith   PetscErrorCode ierr;
1379606d414cSSatish Balay 
13802d61bbb3SSatish Balay   PetscFunctionBegin;
1381cb4a9cd9SHong Zhang   if (nz)  *nz = 0;
138205b42c5fSBarry Smith   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
138305b42c5fSBarry Smith   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
13842d61bbb3SSatish Balay   PetscFunctionReturn(0);
13852d61bbb3SSatish Balay }
13862d61bbb3SSatish Balay 
1387fc4dec0aSBarry Smith PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
13882d61bbb3SSatish Balay {
138920e84f26SHong Zhang   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
13902d61bbb3SSatish Balay   Mat            C;
13916849ba73SBarry Smith   PetscErrorCode ierr;
139220e84f26SHong Zhang   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
139320e84f26SHong Zhang   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
139420e84f26SHong Zhang   MatScalar      *ata,*aa=a->a;
13952d61bbb3SSatish Balay 
13962d61bbb3SSatish Balay   PetscFunctionBegin;
139720e84f26SHong Zhang   ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr);
1398cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
139920e84f26SHong Zhang     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
14002d61bbb3SSatish Balay 
1401ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1402d0f46423SBarry Smith     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
14037adad957SLisandro Dalcin     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
140420e84f26SHong Zhang     ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr);
140520e84f26SHong Zhang 
140620e84f26SHong Zhang     at  = (Mat_SeqBAIJ*)C->data;
140720e84f26SHong Zhang     ati = at->i;
140820e84f26SHong Zhang     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1409fc4dec0aSBarry Smith   } else {
1410fc4dec0aSBarry Smith     C = *B;
141120e84f26SHong Zhang     at = (Mat_SeqBAIJ*)C->data;
141220e84f26SHong Zhang     ati = at->i;
1413fc4dec0aSBarry Smith   }
1414fc4dec0aSBarry Smith 
141520e84f26SHong Zhang   atj = at->j;
141620e84f26SHong Zhang   ata = at->a;
141720e84f26SHong Zhang 
141820e84f26SHong Zhang   /* Copy ati into atfill so we have locations of the next free space in atj */
1419580bdb30SBarry Smith   ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr);
142020e84f26SHong Zhang 
142120e84f26SHong Zhang   /* Walk through A row-wise and mark nonzero entries of A^T. */
14222d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
142320e84f26SHong Zhang     anzj = ai[i+1] - ai[i];
142420e84f26SHong Zhang     for (j=0; j<anzj; j++) {
142520e84f26SHong Zhang       atj[atfill[*aj]] = i;
142620e84f26SHong Zhang       for (kr=0; kr<bs; kr++) {
142720e84f26SHong Zhang         for (k=0; k<bs; k++) {
142820e84f26SHong Zhang           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
14292d61bbb3SSatish Balay         }
14302d61bbb3SSatish Balay       }
143120e84f26SHong Zhang       atfill[*aj++] += 1;
143220e84f26SHong Zhang     }
143320e84f26SHong Zhang   }
14342d61bbb3SSatish Balay   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
14352d61bbb3SSatish Balay   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
14362d61bbb3SSatish Balay 
143720e84f26SHong Zhang   /* Clean up temporary space and complete requests. */
143820e84f26SHong Zhang   ierr = PetscFree(atfill);CHKERRQ(ierr);
143920e84f26SHong Zhang 
1440cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
144120e84f26SHong Zhang     ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
14422d61bbb3SSatish Balay     *B = C;
14432d61bbb3SSatish Balay   } else {
144428be2f97SBarry Smith     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
14452d61bbb3SSatish Balay   }
14462d61bbb3SSatish Balay   PetscFunctionReturn(0);
14472d61bbb3SSatish Balay }
14482d61bbb3SSatish Balay 
1449453d3561SHong Zhang PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1450453d3561SHong Zhang {
1451453d3561SHong Zhang   PetscErrorCode ierr;
1452453d3561SHong Zhang   Mat            Btrans;
1453453d3561SHong Zhang 
1454453d3561SHong Zhang   PetscFunctionBegin;
1455453d3561SHong Zhang   *f   = PETSC_FALSE;
1456453d3561SHong Zhang   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1457453d3561SHong Zhang   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1458453d3561SHong Zhang   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1459453d3561SHong Zhang   PetscFunctionReturn(0);
1460453d3561SHong Zhang }
1461453d3561SHong Zhang 
1462618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1463b51a4376SLisandro Dalcin PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
14642593348eSBarry Smith {
1465b51a4376SLisandro Dalcin   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)mat->data;
1466b51a4376SLisandro Dalcin   PetscInt       header[4],M,N,m,bs,nz,cnt,i,j,k,l;
1467b51a4376SLisandro Dalcin   PetscInt       *rowlens,*colidxs;
1468b51a4376SLisandro Dalcin   PetscScalar    *matvals;
14696849ba73SBarry Smith   PetscErrorCode ierr;
14702593348eSBarry Smith 
14713a40ed3dSBarry Smith   PetscFunctionBegin;
1472b51a4376SLisandro Dalcin   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
14733b2fbd54SBarry Smith 
1474b51a4376SLisandro Dalcin   M  = mat->rmap->N;
1475b51a4376SLisandro Dalcin   N  = mat->cmap->N;
1476b51a4376SLisandro Dalcin   m  = mat->rmap->n;
1477b51a4376SLisandro Dalcin   bs = mat->rmap->bs;
1478b51a4376SLisandro Dalcin   nz = bs*bs*A->nz;
14792593348eSBarry Smith 
1480b51a4376SLisandro Dalcin   /* write matrix header */
1481b51a4376SLisandro Dalcin   header[0] = MAT_FILE_CLASSID;
1482b51a4376SLisandro Dalcin   header[1] = M; header[2] = N; header[3] = nz;
1483b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
14842593348eSBarry Smith 
1485b51a4376SLisandro Dalcin   /* store row lengths */
1486b51a4376SLisandro Dalcin   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1487b51a4376SLisandro Dalcin   for (cnt=0, i=0; i<A->mbs; i++)
1488b51a4376SLisandro Dalcin     for (j=0; j<bs; j++)
1489b51a4376SLisandro Dalcin       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]);
1490b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr);
1491b51a4376SLisandro Dalcin   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1492b51a4376SLisandro Dalcin 
1493b51a4376SLisandro Dalcin   /* store column indices  */
1494b51a4376SLisandro Dalcin   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1495b51a4376SLisandro Dalcin   for (cnt=0, i=0; i<A->mbs; i++)
1496b51a4376SLisandro Dalcin     for (k=0; k<bs; k++)
1497b51a4376SLisandro Dalcin       for (j=A->i[i]; j<A->i[i+1]; j++)
1498b51a4376SLisandro Dalcin         for (l=0; l<bs; l++)
1499b51a4376SLisandro Dalcin           colidxs[cnt++] = bs*A->j[j] + l;
1500b51a4376SLisandro Dalcin   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1501b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr);
1502b51a4376SLisandro Dalcin   ierr = PetscFree(colidxs);CHKERRQ(ierr);
15032593348eSBarry Smith 
15042593348eSBarry Smith   /* store nonzero values */
1505b51a4376SLisandro Dalcin   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1506b51a4376SLisandro Dalcin   for (cnt=0, i=0; i<A->mbs; i++)
1507b51a4376SLisandro Dalcin     for (k=0; k<bs; k++)
1508b51a4376SLisandro Dalcin       for (j=A->i[i]; j<A->i[i+1]; j++)
1509b51a4376SLisandro Dalcin         for (l=0; l<bs; l++)
1510b51a4376SLisandro Dalcin           matvals[cnt++] = A->a[bs*(bs*j + l) + k];
1511b51a4376SLisandro Dalcin   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1512b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1513b51a4376SLisandro Dalcin   ierr = PetscFree(matvals);CHKERRQ(ierr);
1514ce6f0cecSBarry Smith 
1515b51a4376SLisandro Dalcin   /* write block size option to the viewer's .info file */
1516b51a4376SLisandro Dalcin   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
15173a40ed3dSBarry Smith   PetscFunctionReturn(0);
15182593348eSBarry Smith }
15192593348eSBarry Smith 
15207dc0baabSHong Zhang static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
15217dc0baabSHong Zhang {
15227dc0baabSHong Zhang   PetscErrorCode ierr;
15237dc0baabSHong Zhang   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
15247dc0baabSHong Zhang   PetscInt       i,bs = A->rmap->bs,k;
15257dc0baabSHong Zhang 
15267dc0baabSHong Zhang   PetscFunctionBegin;
15277dc0baabSHong Zhang   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
15287dc0baabSHong Zhang   for (i=0; i<a->mbs; i++) {
15297dc0baabSHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
15307dc0baabSHong Zhang     for (k=a->i[i]; k<a->i[i+1]; k++) {
15317dc0baabSHong Zhang       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
15327dc0baabSHong Zhang     }
15337dc0baabSHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
15347dc0baabSHong Zhang   }
15357dc0baabSHong Zhang   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
15367dc0baabSHong Zhang   PetscFunctionReturn(0);
15377dc0baabSHong Zhang }
15387dc0baabSHong Zhang 
15396849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
15402593348eSBarry Smith {
1541b6490206SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1542dfbe8321SBarry Smith   PetscErrorCode    ierr;
1543d0f46423SBarry Smith   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1544f3ef73ceSBarry Smith   PetscViewerFormat format;
15452593348eSBarry Smith 
15463a40ed3dSBarry Smith   PetscFunctionBegin;
15477dc0baabSHong Zhang   if (A->structure_only) {
15487dc0baabSHong Zhang     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
15497dc0baabSHong Zhang     PetscFunctionReturn(0);
15507dc0baabSHong Zhang   }
15517dc0baabSHong Zhang 
1552b0a32e0cSBarry Smith   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1553456192e2SBarry Smith   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
155477431f27SBarry Smith     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1555fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1556ade3a672SBarry Smith     const char *matname;
1557bcd9e38bSBarry Smith     Mat        aij;
1558ceb03754SKris Buschelman     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1559ade3a672SBarry Smith     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1560ade3a672SBarry Smith     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1561bcd9e38bSBarry Smith     ierr = MatView(aij,viewer);CHKERRQ(ierr);
15626bf464f9SBarry Smith     ierr = MatDestroy(&aij);CHKERRQ(ierr);
156304929863SHong Zhang   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
156404929863SHong Zhang       PetscFunctionReturn(0);
1565fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1566d00279f6SBarry Smith     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
156744cd7ae7SLois Curfman McInnes     for (i=0; i<a->mbs; i++) {
156844cd7ae7SLois Curfman McInnes       for (j=0; j<bs; j++) {
156977431f27SBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
157044cd7ae7SLois Curfman McInnes         for (k=a->i[i]; k<a->i[i+1]; k++) {
157144cd7ae7SLois Curfman McInnes           for (l=0; l<bs; l++) {
1572aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
15730e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
157457622a8eSBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
157557622a8eSBarry Smith                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
15760e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
157757622a8eSBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
157857622a8eSBarry Smith                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
15790e6d2581SBarry Smith             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
158057622a8eSBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
15810ef38995SBarry Smith             }
158244cd7ae7SLois Curfman McInnes #else
15830ef38995SBarry Smith             if (a->a[bs2*k + l*bs + j] != 0.0) {
158457622a8eSBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
15850ef38995SBarry Smith             }
158644cd7ae7SLois Curfman McInnes #endif
158744cd7ae7SLois Curfman McInnes           }
158844cd7ae7SLois Curfman McInnes         }
1589b0a32e0cSBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
159044cd7ae7SLois Curfman McInnes       }
159144cd7ae7SLois Curfman McInnes     }
1592d00279f6SBarry Smith     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
15930ef38995SBarry Smith   } else {
1594d00279f6SBarry Smith     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1595b6490206SBarry Smith     for (i=0; i<a->mbs; i++) {
1596b6490206SBarry Smith       for (j=0; j<bs; j++) {
159777431f27SBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1598b6490206SBarry Smith         for (k=a->i[i]; k<a->i[i+1]; k++) {
1599b6490206SBarry Smith           for (l=0; l<bs; l++) {
1600aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
16010e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
160257622a8eSBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
160357622a8eSBarry Smith                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
16040e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
160557622a8eSBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
160657622a8eSBarry Smith                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
16070ef38995SBarry Smith             } else {
160857622a8eSBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
160988685aaeSLois Curfman McInnes             }
161088685aaeSLois Curfman McInnes #else
161157622a8eSBarry Smith             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
161288685aaeSLois Curfman McInnes #endif
16132593348eSBarry Smith           }
16142593348eSBarry Smith         }
1615b0a32e0cSBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
16162593348eSBarry Smith       }
16172593348eSBarry Smith     }
1618d00279f6SBarry Smith     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1619b6490206SBarry Smith   }
1620b0a32e0cSBarry Smith   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
16213a40ed3dSBarry Smith   PetscFunctionReturn(0);
16222593348eSBarry Smith }
16232593348eSBarry Smith 
16249804daf3SBarry Smith #include <petscdraw.h>
16256849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
16263270192aSSatish Balay {
162777ed5343SBarry Smith   Mat               A = (Mat) Aa;
16283270192aSSatish Balay   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
16296849ba73SBarry Smith   PetscErrorCode    ierr;
1630d0f46423SBarry Smith   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
16310e6d2581SBarry Smith   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
16323f1db9ecSBarry Smith   MatScalar         *aa;
1633b0a32e0cSBarry Smith   PetscViewer       viewer;
1634b3e7f47fSJed Brown   PetscViewerFormat format;
16353270192aSSatish Balay 
16363a40ed3dSBarry Smith   PetscFunctionBegin;
163777ed5343SBarry Smith   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1638b3e7f47fSJed Brown   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1639b0a32e0cSBarry Smith   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
164077ed5343SBarry Smith 
16413270192aSSatish Balay   /* loop over matrix elements drawing boxes */
1642b3e7f47fSJed Brown 
1643b3e7f47fSJed Brown   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1644383922c3SLisandro Dalcin     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1645383922c3SLisandro Dalcin     /* Blue for negative, Cyan for zero and  Red for positive */
1646b0a32e0cSBarry Smith     color = PETSC_DRAW_BLUE;
16473270192aSSatish Balay     for (i=0,row=0; i<mbs; i++,row+=bs) {
16483270192aSSatish Balay       for (j=a->i[i]; j<a->i[i+1]; j++) {
1649d0f46423SBarry Smith         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
16503270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
16513270192aSSatish Balay         aa  = a->a + j*bs2;
16523270192aSSatish Balay         for (k=0; k<bs; k++) {
16533270192aSSatish Balay           for (l=0; l<bs; l++) {
16540e6d2581SBarry Smith             if (PetscRealPart(*aa++) >=  0.) continue;
1655b0a32e0cSBarry Smith             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
16563270192aSSatish Balay           }
16573270192aSSatish Balay         }
16583270192aSSatish Balay       }
16593270192aSSatish Balay     }
1660b0a32e0cSBarry Smith     color = PETSC_DRAW_CYAN;
16613270192aSSatish Balay     for (i=0,row=0; i<mbs; i++,row+=bs) {
16623270192aSSatish Balay       for (j=a->i[i]; j<a->i[i+1]; j++) {
1663d0f46423SBarry Smith         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
16643270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
16653270192aSSatish Balay         aa  = a->a + j*bs2;
16663270192aSSatish Balay         for (k=0; k<bs; k++) {
16673270192aSSatish Balay           for (l=0; l<bs; l++) {
16680e6d2581SBarry Smith             if (PetscRealPart(*aa++) != 0.) continue;
1669b0a32e0cSBarry Smith             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
16703270192aSSatish Balay           }
16713270192aSSatish Balay         }
16723270192aSSatish Balay       }
16733270192aSSatish Balay     }
1674b0a32e0cSBarry Smith     color = PETSC_DRAW_RED;
16753270192aSSatish Balay     for (i=0,row=0; i<mbs; i++,row+=bs) {
16763270192aSSatish Balay       for (j=a->i[i]; j<a->i[i+1]; j++) {
1677d0f46423SBarry Smith         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
16783270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
16793270192aSSatish Balay         aa  = a->a + j*bs2;
16803270192aSSatish Balay         for (k=0; k<bs; k++) {
16813270192aSSatish Balay           for (l=0; l<bs; l++) {
16820e6d2581SBarry Smith             if (PetscRealPart(*aa++) <= 0.) continue;
1683b0a32e0cSBarry Smith             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
16843270192aSSatish Balay           }
16853270192aSSatish Balay         }
16863270192aSSatish Balay       }
16873270192aSSatish Balay     }
1688383922c3SLisandro Dalcin     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1689b3e7f47fSJed Brown   } else {
1690b3e7f47fSJed Brown     /* use contour shading to indicate magnitude of values */
1691b3e7f47fSJed Brown     /* first determine max of all nonzero values */
1692b05fc000SLisandro Dalcin     PetscReal minv = 0.0, maxv = 0.0;
1693b3e7f47fSJed Brown     PetscDraw popup;
1694b3e7f47fSJed Brown 
1695b3e7f47fSJed Brown     for (i=0; i<a->nz*a->bs2; i++) {
1696b3e7f47fSJed Brown       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1697b3e7f47fSJed Brown     }
1698383922c3SLisandro Dalcin     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1699b3e7f47fSJed Brown     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
170045f3bb6eSLisandro Dalcin     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1701383922c3SLisandro Dalcin 
1702383922c3SLisandro Dalcin     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1703b3e7f47fSJed Brown     for (i=0,row=0; i<mbs; i++,row+=bs) {
1704b3e7f47fSJed Brown       for (j=a->i[i]; j<a->i[i+1]; j++) {
1705b3e7f47fSJed Brown         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1706b3e7f47fSJed Brown         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1707b3e7f47fSJed Brown         aa  = a->a + j*bs2;
1708b3e7f47fSJed Brown         for (k=0; k<bs; k++) {
1709b3e7f47fSJed Brown           for (l=0; l<bs; l++) {
1710383922c3SLisandro Dalcin             MatScalar v = *aa++;
1711383922c3SLisandro Dalcin             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1712b3e7f47fSJed Brown             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1713b3e7f47fSJed Brown           }
1714b3e7f47fSJed Brown         }
1715b3e7f47fSJed Brown       }
1716b3e7f47fSJed Brown     }
1717383922c3SLisandro Dalcin     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1718b3e7f47fSJed Brown   }
171977ed5343SBarry Smith   PetscFunctionReturn(0);
172077ed5343SBarry Smith }
17213270192aSSatish Balay 
17226849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
172377ed5343SBarry Smith {
1724dfbe8321SBarry Smith   PetscErrorCode ierr;
17250e6d2581SBarry Smith   PetscReal      xl,yl,xr,yr,w,h;
1726b0a32e0cSBarry Smith   PetscDraw      draw;
1727ace3abfcSBarry Smith   PetscBool      isnull;
17283270192aSSatish Balay 
172977ed5343SBarry Smith   PetscFunctionBegin;
1730b0a32e0cSBarry Smith   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
173145f3bb6eSLisandro Dalcin   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
173245f3bb6eSLisandro Dalcin   if (isnull) PetscFunctionReturn(0);
173377ed5343SBarry Smith 
1734d0f46423SBarry Smith   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
173577ed5343SBarry Smith   xr  += w;          yr += h;        xl = -w;     yl = -h;
1736b0a32e0cSBarry Smith   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1737832b7cebSLisandro Dalcin   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1738b0a32e0cSBarry Smith   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
17390298fd71SBarry Smith   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1740832b7cebSLisandro Dalcin   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
17413a40ed3dSBarry Smith   PetscFunctionReturn(0);
17423270192aSSatish Balay }
17433270192aSSatish Balay 
1744dfbe8321SBarry Smith PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
17452593348eSBarry Smith {
1746dfbe8321SBarry Smith   PetscErrorCode ierr;
1747ace3abfcSBarry Smith   PetscBool      iascii,isbinary,isdraw;
17482593348eSBarry Smith 
17493a40ed3dSBarry Smith   PetscFunctionBegin;
1750251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1751251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1752251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
175332077d6dSBarry Smith   if (iascii) {
17543a40ed3dSBarry Smith     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
17550f5bd95cSBarry Smith   } else if (isbinary) {
17563a40ed3dSBarry Smith     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
17570f5bd95cSBarry Smith   } else if (isdraw) {
17583a40ed3dSBarry Smith     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
17595cd90555SBarry Smith   } else {
1760a5e6ed63SBarry Smith     Mat B;
1761ceb03754SKris Buschelman     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1762a5e6ed63SBarry Smith     ierr = MatView(B,viewer);CHKERRQ(ierr);
17636bf464f9SBarry Smith     ierr = MatDestroy(&B);CHKERRQ(ierr);
17642593348eSBarry Smith   }
17653a40ed3dSBarry Smith   PetscFunctionReturn(0);
17662593348eSBarry Smith }
1767b6490206SBarry Smith 
1768c1ac3661SBarry Smith PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1769cd0e1443SSatish Balay {
1770cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1771c1ac3661SBarry Smith   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1772c1ac3661SBarry Smith   PetscInt    *ai = a->i,*ailen = a->ilen;
1773d0f46423SBarry Smith   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
177497e567efSBarry Smith   MatScalar   *ap,*aa = a->a;
1775cd0e1443SSatish Balay 
17763a40ed3dSBarry Smith   PetscFunctionBegin;
17772d61bbb3SSatish Balay   for (k=0; k<m; k++) { /* loop over rows */
1778cd0e1443SSatish Balay     row = im[k]; brow = row/bs;
1779e32f2f54SBarry Smith     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1780e32f2f54SBarry Smith     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
17812d61bbb3SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
17822c3acbe9SBarry Smith     nrow = ailen[brow];
17832d61bbb3SSatish Balay     for (l=0; l<n; l++) { /* loop over columns */
1784e32f2f54SBarry Smith       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1785e32f2f54SBarry Smith       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
17862d61bbb3SSatish Balay       col  = in[l];
17872d61bbb3SSatish Balay       bcol = col/bs;
17882d61bbb3SSatish Balay       cidx = col%bs;
17892d61bbb3SSatish Balay       ridx = row%bs;
17902d61bbb3SSatish Balay       high = nrow;
17912d61bbb3SSatish Balay       low  = 0; /* assume unsorted */
17922d61bbb3SSatish Balay       while (high-low > 5) {
1793cd0e1443SSatish Balay         t = (low+high)/2;
1794cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
1795cd0e1443SSatish Balay         else             low  = t;
1796cd0e1443SSatish Balay       }
1797cd0e1443SSatish Balay       for (i=low; i<high; i++) {
1798cd0e1443SSatish Balay         if (rp[i] > bcol) break;
1799cd0e1443SSatish Balay         if (rp[i] == bcol) {
18002d61bbb3SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
18012d61bbb3SSatish Balay           goto finished;
1802cd0e1443SSatish Balay         }
1803cd0e1443SSatish Balay       }
180497e567efSBarry Smith       *v++ = 0.0;
18052d61bbb3SSatish Balay finished:;
1806cd0e1443SSatish Balay     }
1807cd0e1443SSatish Balay   }
18083a40ed3dSBarry Smith   PetscFunctionReturn(0);
1809cd0e1443SSatish Balay }
1810cd0e1443SSatish Balay 
1811dd6ea824SBarry Smith PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
181292c4ed94SBarry Smith {
181392c4ed94SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1814e2ee6c50SBarry Smith   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1815c1ac3661SBarry Smith   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
18166849ba73SBarry Smith   PetscErrorCode    ierr;
1817d0f46423SBarry Smith   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1818ace3abfcSBarry Smith   PetscBool         roworiented=a->roworiented;
1819dd6ea824SBarry Smith   const PetscScalar *value     = v;
18209d243f67SHong Zhang   MatScalar         *ap=NULL,*aa = a->a,*bap;
182192c4ed94SBarry Smith 
18223a40ed3dSBarry Smith   PetscFunctionBegin;
18230e324ae4SSatish Balay   if (roworiented) {
18240e324ae4SSatish Balay     stepval = (n-1)*bs;
18250e324ae4SSatish Balay   } else {
18260e324ae4SSatish Balay     stepval = (m-1)*bs;
18270e324ae4SSatish Balay   }
182892c4ed94SBarry Smith   for (k=0; k<m; k++) { /* loop over added rows */
182992c4ed94SBarry Smith     row = im[k];
18305ef9f2a5SBarry Smith     if (row < 0) continue;
1831cf9c20a2SJed Brown     if (PetscUnlikelyDebug(row >= a->mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
183292c4ed94SBarry Smith     rp   = aj + ai[row];
18337dc0baabSHong Zhang     if (!A->structure_only) ap = aa + bs2*ai[row];
183492c4ed94SBarry Smith     rmax = imax[row];
183592c4ed94SBarry Smith     nrow = ailen[row];
183692c4ed94SBarry Smith     low  = 0;
1837c71e6ed7SBarry Smith     high = nrow;
183892c4ed94SBarry Smith     for (l=0; l<n; l++) { /* loop over added columns */
18395ef9f2a5SBarry Smith       if (in[l] < 0) continue;
1840cf9c20a2SJed Brown       if (PetscUnlikelyDebug(in[l] >= a->nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
184192c4ed94SBarry Smith       col = in[l];
18427dc0baabSHong Zhang       if (!A->structure_only) {
184392c4ed94SBarry Smith         if (roworiented) {
184453ef36baSBarry Smith           value = v + (k*(stepval+bs) + l)*bs;
18450e324ae4SSatish Balay         } else {
184653ef36baSBarry Smith           value = v + (l*(stepval+bs) + k)*bs;
184792c4ed94SBarry Smith         }
18487dc0baabSHong Zhang       }
184926fbe8dcSKarl Rupp       if (col <= lastcol) low = 0;
185026fbe8dcSKarl Rupp       else high = nrow;
1851e2ee6c50SBarry Smith       lastcol = col;
185292c4ed94SBarry Smith       while (high-low > 7) {
185392c4ed94SBarry Smith         t = (low+high)/2;
185492c4ed94SBarry Smith         if (rp[t] > col) high = t;
185592c4ed94SBarry Smith         else             low  = t;
185692c4ed94SBarry Smith       }
185792c4ed94SBarry Smith       for (i=low; i<high; i++) {
185892c4ed94SBarry Smith         if (rp[i] > col) break;
185992c4ed94SBarry Smith         if (rp[i] == col) {
18607dc0baabSHong Zhang           if (A->structure_only) goto noinsert2;
18618a84c255SSatish Balay           bap = ap +  bs2*i;
18620e324ae4SSatish Balay           if (roworiented) {
18638a84c255SSatish Balay             if (is == ADD_VALUES) {
1864dd9472c6SBarry Smith               for (ii=0; ii<bs; ii++,value+=stepval) {
1865dd9472c6SBarry Smith                 for (jj=ii; jj<bs2; jj+=bs) {
18668a84c255SSatish Balay                   bap[jj] += *value++;
1867dd9472c6SBarry Smith                 }
1868dd9472c6SBarry Smith               }
18690e324ae4SSatish Balay             } else {
1870dd9472c6SBarry Smith               for (ii=0; ii<bs; ii++,value+=stepval) {
1871dd9472c6SBarry Smith                 for (jj=ii; jj<bs2; jj+=bs) {
18720e324ae4SSatish Balay                   bap[jj] = *value++;
18738a84c255SSatish Balay                 }
1874dd9472c6SBarry Smith               }
1875dd9472c6SBarry Smith             }
18760e324ae4SSatish Balay           } else {
18770e324ae4SSatish Balay             if (is == ADD_VALUES) {
187853ef36baSBarry Smith               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1879dd9472c6SBarry Smith                 for (jj=0; jj<bs; jj++) {
188053ef36baSBarry Smith                   bap[jj] += value[jj];
1881dd9472c6SBarry Smith                 }
188253ef36baSBarry Smith                 bap += bs;
1883dd9472c6SBarry Smith               }
18840e324ae4SSatish Balay             } else {
188553ef36baSBarry Smith               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1886dd9472c6SBarry Smith                 for (jj=0; jj<bs; jj++) {
188753ef36baSBarry Smith                   bap[jj]  = value[jj];
18880e324ae4SSatish Balay                 }
188953ef36baSBarry Smith                 bap += bs;
18908a84c255SSatish Balay               }
1891dd9472c6SBarry Smith             }
1892dd9472c6SBarry Smith           }
1893f1241b54SBarry Smith           goto noinsert2;
189492c4ed94SBarry Smith         }
189592c4ed94SBarry Smith       }
189689280ab3SLois Curfman McInnes       if (nonew == 1) goto noinsert2;
18972f7d4af7SBarry Smith       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
18987dc0baabSHong Zhang       if (A->structure_only) {
18997dc0baabSHong Zhang         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
19007dc0baabSHong Zhang       } else {
1901fef13f97SBarry Smith         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
19027dc0baabSHong Zhang       }
1903c03d1d03SSatish Balay       N = nrow++ - 1; high++;
190492c4ed94SBarry Smith       /* shift up all the later entries in this row */
1905580bdb30SBarry Smith       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
190692c4ed94SBarry Smith       rp[i] = col;
19077dc0baabSHong Zhang       if (!A->structure_only) {
1908580bdb30SBarry Smith         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
19098a84c255SSatish Balay         bap   = ap +  bs2*i;
19100e324ae4SSatish Balay         if (roworiented) {
1911dd9472c6SBarry Smith           for (ii=0; ii<bs; ii++,value+=stepval) {
1912dd9472c6SBarry Smith             for (jj=ii; jj<bs2; jj+=bs) {
19130e324ae4SSatish Balay               bap[jj] = *value++;
1914dd9472c6SBarry Smith             }
1915dd9472c6SBarry Smith           }
19160e324ae4SSatish Balay         } else {
1917dd9472c6SBarry Smith           for (ii=0; ii<bs; ii++,value+=stepval) {
1918dd9472c6SBarry Smith             for (jj=0; jj<bs; jj++) {
19190e324ae4SSatish Balay               *bap++ = *value++;
19200e324ae4SSatish Balay             }
1921dd9472c6SBarry Smith           }
1922dd9472c6SBarry Smith         }
19237dc0baabSHong Zhang       }
1924f1241b54SBarry Smith noinsert2:;
192592c4ed94SBarry Smith       low = i;
192692c4ed94SBarry Smith     }
192792c4ed94SBarry Smith     ailen[row] = nrow;
192892c4ed94SBarry Smith   }
19293a40ed3dSBarry Smith   PetscFunctionReturn(0);
193092c4ed94SBarry Smith }
193126e093fcSHong Zhang 
1932dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1933584200bdSSatish Balay {
1934584200bdSSatish Balay   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1935580bdb30SBarry Smith   PetscInt       fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1936d0f46423SBarry Smith   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
19376849ba73SBarry Smith   PetscErrorCode ierr;
1938c1ac3661SBarry Smith   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
19393f1db9ecSBarry Smith   MatScalar      *aa  = a->a,*ap;
19403447b6efSHong Zhang   PetscReal      ratio=0.6;
1941584200bdSSatish Balay 
19423a40ed3dSBarry Smith   PetscFunctionBegin;
19433a40ed3dSBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1944584200bdSSatish Balay 
194543ee02c3SBarry Smith   if (m) rmax = ailen[0];
1946584200bdSSatish Balay   for (i=1; i<mbs; i++) {
1947584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
1948584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
1949d402145bSBarry Smith     rmax    = PetscMax(rmax,ailen[i]);
1950584200bdSSatish Balay     if (fshift) {
1951580bdb30SBarry Smith       ip = aj + ai[i];
1952580bdb30SBarry Smith       ap = aa + bs2*ai[i];
1953584200bdSSatish Balay       N  = ailen[i];
1954580bdb30SBarry Smith       ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr);
1955672ba085SHong Zhang       if (!A->structure_only) {
1956580bdb30SBarry Smith         ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr);
1957584200bdSSatish Balay       }
1958672ba085SHong Zhang     }
1959584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
1960584200bdSSatish Balay   }
1961584200bdSSatish Balay   if (mbs) {
1962584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
1963584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1964584200bdSSatish Balay   }
19657c565772SBarry Smith 
1966584200bdSSatish Balay   /* reset ilen and imax for each row */
19677c565772SBarry Smith   a->nonzerorowcnt = 0;
1968672ba085SHong Zhang   if (A->structure_only) {
1969672ba085SHong Zhang     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1970672ba085SHong Zhang   } else { /* !A->structure_only */
1971584200bdSSatish Balay     for (i=0; i<mbs; i++) {
1972584200bdSSatish Balay       ailen[i] = imax[i] = ai[i+1] - ai[i];
19737c565772SBarry Smith       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1974584200bdSSatish Balay     }
1975672ba085SHong Zhang   }
1976a7c10996SSatish Balay   a->nz = ai[mbs];
1977584200bdSSatish Balay 
1978584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
1979b01c7715SBarry Smith   a->idiagvalid = PETSC_FALSE;
1980584200bdSSatish Balay   if (fshift && a->diag) {
1981606d414cSSatish Balay     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
19823bb1ff40SBarry Smith     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1983f4259b30SLisandro Dalcin     a->diag = NULL;
1984584200bdSSatish Balay   }
198565e19b50SBarry Smith   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1986d0f46423SBarry Smith   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1987ae15b995SBarry Smith   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1988ae15b995SBarry Smith   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
198926fbe8dcSKarl Rupp 
19908e58a170SBarry Smith   A->info.mallocs    += a->reallocs;
1991e2f3b5e9SSatish Balay   a->reallocs         = 0;
19920e6d2581SBarry Smith   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1993647a6520SHong Zhang   a->rmax             = rmax;
1994cf4441caSHong Zhang 
1995672ba085SHong Zhang   if (!A->structure_only) {
199611e456e1SBarry Smith     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1997672ba085SHong Zhang   }
19983a40ed3dSBarry Smith   PetscFunctionReturn(0);
1999584200bdSSatish Balay }
2000584200bdSSatish Balay 
2001bea157c4SSatish Balay /*
2002bea157c4SSatish Balay    This function returns an array of flags which indicate the locations of contiguous
2003bea157c4SSatish Balay    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2004bea157c4SSatish Balay    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2005bea157c4SSatish Balay    Assume: sizes should be long enough to hold all the values.
2006bea157c4SSatish Balay */
2007c1ac3661SBarry Smith static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2008d9b7c43dSSatish Balay {
2009c1ac3661SBarry Smith   PetscInt  i,j,k,row;
2010ace3abfcSBarry Smith   PetscBool flg;
20113a40ed3dSBarry Smith 
2012433994e6SBarry Smith   PetscFunctionBegin;
2013bea157c4SSatish Balay   for (i=0,j=0; i<n; j++) {
2014bea157c4SSatish Balay     row = idx[i];
2015bea157c4SSatish Balay     if (row%bs!=0) { /* Not the begining of a block */
2016bea157c4SSatish Balay       sizes[j] = 1;
2017bea157c4SSatish Balay       i++;
2018e4fda26cSSatish Balay     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2019bea157c4SSatish Balay       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2020bea157c4SSatish Balay       i++;
2021bea157c4SSatish Balay     } else { /* Begining of the block, so check if the complete block exists */
2022bea157c4SSatish Balay       flg = PETSC_TRUE;
2023bea157c4SSatish Balay       for (k=1; k<bs; k++) {
2024bea157c4SSatish Balay         if (row+k != idx[i+k]) { /* break in the block */
2025bea157c4SSatish Balay           flg = PETSC_FALSE;
2026bea157c4SSatish Balay           break;
2027d9b7c43dSSatish Balay         }
2028bea157c4SSatish Balay       }
2029abc0a331SBarry Smith       if (flg) { /* No break in the bs */
2030bea157c4SSatish Balay         sizes[j] = bs;
2031bea157c4SSatish Balay         i       += bs;
2032bea157c4SSatish Balay       } else {
2033bea157c4SSatish Balay         sizes[j] = 1;
2034bea157c4SSatish Balay         i++;
2035bea157c4SSatish Balay       }
2036bea157c4SSatish Balay     }
2037bea157c4SSatish Balay   }
2038bea157c4SSatish Balay   *bs_max = j;
20393a40ed3dSBarry Smith   PetscFunctionReturn(0);
2040d9b7c43dSSatish Balay }
2041d9b7c43dSSatish Balay 
20422b40b63fSBarry Smith PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2043d9b7c43dSSatish Balay {
2044d9b7c43dSSatish Balay   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2045dfbe8321SBarry Smith   PetscErrorCode    ierr;
2046f4df32b1SMatthew Knepley   PetscInt          i,j,k,count,*rows;
2047d0f46423SBarry Smith   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
204887828ca2SBarry Smith   PetscScalar       zero = 0.0;
20493f1db9ecSBarry Smith   MatScalar         *aa;
205097b48c8fSBarry Smith   const PetscScalar *xx;
205197b48c8fSBarry Smith   PetscScalar       *bb;
2052d9b7c43dSSatish Balay 
20533a40ed3dSBarry Smith   PetscFunctionBegin;
205497b48c8fSBarry Smith   /* fix right hand side if needed */
205597b48c8fSBarry Smith   if (x && b) {
205697b48c8fSBarry Smith     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
205797b48c8fSBarry Smith     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
205897b48c8fSBarry Smith     for (i=0; i<is_n; i++) {
205997b48c8fSBarry Smith       bb[is_idx[i]] = diag*xx[is_idx[i]];
206097b48c8fSBarry Smith     }
206197b48c8fSBarry Smith     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
206297b48c8fSBarry Smith     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
206397b48c8fSBarry Smith   }
206497b48c8fSBarry Smith 
2065d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
2066bea157c4SSatish Balay   /* allocate memory for rows,sizes */
2067dcca6d9dSJed Brown   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2068bea157c4SSatish Balay 
2069563b5814SBarry Smith   /* copy IS values to rows, and sort them */
207026fbe8dcSKarl Rupp   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2071bea157c4SSatish Balay   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
207297b48c8fSBarry Smith 
2073a9817697SBarry Smith   if (baij->keepnonzeropattern) {
207426fbe8dcSKarl Rupp     for (i=0; i<is_n; i++) sizes[i] = 1;
2075dffd3267SBarry Smith     bs_max          = is_n;
2076dffd3267SBarry Smith   } else {
2077bea157c4SSatish Balay     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2078e56f5c9eSBarry Smith     A->nonzerostate++;
2079dffd3267SBarry Smith   }
2080bea157c4SSatish Balay 
2081bea157c4SSatish Balay   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2082bea157c4SSatish Balay     row = rows[j];
2083e32f2f54SBarry Smith     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2084bea157c4SSatish Balay     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2085b31fbe3bSSatish Balay     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2086a9817697SBarry Smith     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2087d4a378daSJed Brown       if (diag != (PetscScalar)0.0) {
2088bea157c4SSatish Balay         if (baij->ilen[row/bs] > 0) {
2089bea157c4SSatish Balay           baij->ilen[row/bs]       = 1;
2090bea157c4SSatish Balay           baij->j[baij->i[row/bs]] = row/bs;
209126fbe8dcSKarl Rupp 
2092580bdb30SBarry Smith           ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr);
2093a07cd24cSSatish Balay         }
2094563b5814SBarry Smith         /* Now insert all the diagonal values for this bs */
2095bea157c4SSatish Balay         for (k=0; k<bs; k++) {
2096f4df32b1SMatthew Knepley           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2097bea157c4SSatish Balay         }
2098f4df32b1SMatthew Knepley       } else { /* (diag == 0.0) */
2099bea157c4SSatish Balay         baij->ilen[row/bs] = 0;
2100f4df32b1SMatthew Knepley       } /* end (diag == 0.0) */
2101bea157c4SSatish Balay     } else { /* (sizes[i] != bs) */
2102cf9c20a2SJed Brown       if (PetscUnlikelyDebug(sizes[i] != 1)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2103bea157c4SSatish Balay       for (k=0; k<count; k++) {
2104d9b7c43dSSatish Balay         aa[0] =  zero;
2105d9b7c43dSSatish Balay         aa   += bs;
2106d9b7c43dSSatish Balay       }
2107d4a378daSJed Brown       if (diag != (PetscScalar)0.0) {
2108f4df32b1SMatthew Knepley         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2109d9b7c43dSSatish Balay       }
2110d9b7c43dSSatish Balay     }
2111bea157c4SSatish Balay   }
2112bea157c4SSatish Balay 
2113fca92195SBarry Smith   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
21149a8dea36SBarry Smith   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
21153a40ed3dSBarry Smith   PetscFunctionReturn(0);
2116d9b7c43dSSatish Balay }
21171c351548SSatish Balay 
211897b48c8fSBarry Smith PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
211997b48c8fSBarry Smith {
212097b48c8fSBarry Smith   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
212197b48c8fSBarry Smith   PetscErrorCode    ierr;
212297b48c8fSBarry Smith   PetscInt          i,j,k,count;
212397b48c8fSBarry Smith   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
212497b48c8fSBarry Smith   PetscScalar       zero = 0.0;
212597b48c8fSBarry Smith   MatScalar         *aa;
212697b48c8fSBarry Smith   const PetscScalar *xx;
212797b48c8fSBarry Smith   PetscScalar       *bb;
212856777dd2SBarry Smith   PetscBool         *zeroed,vecs = PETSC_FALSE;
212997b48c8fSBarry Smith 
213097b48c8fSBarry Smith   PetscFunctionBegin;
213197b48c8fSBarry Smith   /* fix right hand side if needed */
213297b48c8fSBarry Smith   if (x && b) {
213397b48c8fSBarry Smith     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
213497b48c8fSBarry Smith     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
213556777dd2SBarry Smith     vecs = PETSC_TRUE;
213697b48c8fSBarry Smith   }
213797b48c8fSBarry Smith 
213897b48c8fSBarry Smith   /* zero the columns */
21391795a4d1SJed Brown   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
214097b48c8fSBarry Smith   for (i=0; i<is_n; i++) {
214197b48c8fSBarry Smith     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
214297b48c8fSBarry Smith     zeroed[is_idx[i]] = PETSC_TRUE;
214397b48c8fSBarry Smith   }
214497b48c8fSBarry Smith   for (i=0; i<A->rmap->N; i++) {
214597b48c8fSBarry Smith     if (!zeroed[i]) {
214697b48c8fSBarry Smith       row = i/bs;
214797b48c8fSBarry Smith       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
214897b48c8fSBarry Smith         for (k=0; k<bs; k++) {
214997b48c8fSBarry Smith           col = bs*baij->j[j] + k;
215097b48c8fSBarry Smith           if (zeroed[col]) {
215197b48c8fSBarry Smith             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
215256777dd2SBarry Smith             if (vecs) bb[i] -= aa[0]*xx[col];
215397b48c8fSBarry Smith             aa[0] = 0.0;
215497b48c8fSBarry Smith           }
215597b48c8fSBarry Smith         }
215697b48c8fSBarry Smith       }
215756777dd2SBarry Smith     } else if (vecs) bb[i] = diag*xx[i];
215897b48c8fSBarry Smith   }
215997b48c8fSBarry Smith   ierr = PetscFree(zeroed);CHKERRQ(ierr);
216056777dd2SBarry Smith   if (vecs) {
216156777dd2SBarry Smith     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
216256777dd2SBarry Smith     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
216356777dd2SBarry Smith   }
216497b48c8fSBarry Smith 
216597b48c8fSBarry Smith   /* zero the rows */
216697b48c8fSBarry Smith   for (i=0; i<is_n; i++) {
216797b48c8fSBarry Smith     row   = is_idx[i];
216897b48c8fSBarry Smith     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
216997b48c8fSBarry Smith     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
217097b48c8fSBarry Smith     for (k=0; k<count; k++) {
217197b48c8fSBarry Smith       aa[0] =  zero;
217297b48c8fSBarry Smith       aa   += bs;
217397b48c8fSBarry Smith     }
2174d4a378daSJed Brown     if (diag != (PetscScalar)0.0) {
217597b48c8fSBarry Smith       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
217697b48c8fSBarry Smith     }
217797b48c8fSBarry Smith   }
217897b48c8fSBarry Smith   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
217997b48c8fSBarry Smith   PetscFunctionReturn(0);
218097b48c8fSBarry Smith }
218197b48c8fSBarry Smith 
2182c1ac3661SBarry Smith PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
21832d61bbb3SSatish Balay {
21842d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2185e2ee6c50SBarry Smith   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2186c1ac3661SBarry Smith   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2187d0f46423SBarry Smith   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
21886849ba73SBarry Smith   PetscErrorCode ierr;
2189c1ac3661SBarry Smith   PetscInt       ridx,cidx,bs2=a->bs2;
2190ace3abfcSBarry Smith   PetscBool      roworiented=a->roworiented;
2191d8cdefa3SHong Zhang   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
21922d61bbb3SSatish Balay 
21932d61bbb3SSatish Balay   PetscFunctionBegin;
21942d61bbb3SSatish Balay   for (k=0; k<m; k++) { /* loop over added rows */
2195085a36d4SBarry Smith     row  = im[k];
2196085a36d4SBarry Smith     brow = row/bs;
21975ef9f2a5SBarry Smith     if (row < 0) continue;
2198cf9c20a2SJed Brown     if (PetscUnlikelyDebug(row >= A->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
21992d61bbb3SSatish Balay     rp   = aj + ai[brow];
2200672ba085SHong Zhang     if (!A->structure_only) ap = aa + bs2*ai[brow];
22012d61bbb3SSatish Balay     rmax = imax[brow];
22022d61bbb3SSatish Balay     nrow = ailen[brow];
22032d61bbb3SSatish Balay     low  = 0;
2204c71e6ed7SBarry Smith     high = nrow;
22052d61bbb3SSatish Balay     for (l=0; l<n; l++) { /* loop over added columns */
22065ef9f2a5SBarry Smith       if (in[l] < 0) continue;
2207cf9c20a2SJed Brown       if (PetscUnlikelyDebug(in[l] >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
22082d61bbb3SSatish Balay       col  = in[l]; bcol = col/bs;
22092d61bbb3SSatish Balay       ridx = row % bs; cidx = col % bs;
2210672ba085SHong Zhang       if (!A->structure_only) {
22112d61bbb3SSatish Balay         if (roworiented) {
22125ef9f2a5SBarry Smith           value = v[l + k*n];
22132d61bbb3SSatish Balay         } else {
22142d61bbb3SSatish Balay           value = v[k + l*m];
22152d61bbb3SSatish Balay         }
2216672ba085SHong Zhang       }
22177cd84e04SBarry Smith       if (col <= lastcol) low = 0; else high = nrow;
2218e2ee6c50SBarry Smith       lastcol = col;
22192d61bbb3SSatish Balay       while (high-low > 7) {
22202d61bbb3SSatish Balay         t = (low+high)/2;
22212d61bbb3SSatish Balay         if (rp[t] > bcol) high = t;
22222d61bbb3SSatish Balay         else              low  = t;
22232d61bbb3SSatish Balay       }
22242d61bbb3SSatish Balay       for (i=low; i<high; i++) {
22252d61bbb3SSatish Balay         if (rp[i] > bcol) break;
22262d61bbb3SSatish Balay         if (rp[i] == bcol) {
22272d61bbb3SSatish Balay           bap = ap +  bs2*i + bs*cidx + ridx;
2228672ba085SHong Zhang           if (!A->structure_only) {
22292d61bbb3SSatish Balay             if (is == ADD_VALUES) *bap += value;
22302d61bbb3SSatish Balay             else                  *bap  = value;
2231672ba085SHong Zhang           }
22322d61bbb3SSatish Balay           goto noinsert1;
22332d61bbb3SSatish Balay         }
22342d61bbb3SSatish Balay       }
22352d61bbb3SSatish Balay       if (nonew == 1) goto noinsert1;
2236e32f2f54SBarry Smith       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2237672ba085SHong Zhang       if (A->structure_only) {
2238672ba085SHong Zhang         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2239672ba085SHong Zhang       } else {
2240fef13f97SBarry Smith         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2241672ba085SHong Zhang       }
2242c03d1d03SSatish Balay       N = nrow++ - 1; high++;
22432d61bbb3SSatish Balay       /* shift up all the later entries in this row */
2244580bdb30SBarry Smith       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
22452d61bbb3SSatish Balay       rp[i] = bcol;
2246580bdb30SBarry Smith       if (!A->structure_only) {
2247580bdb30SBarry Smith         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
2248580bdb30SBarry Smith         ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr);
2249580bdb30SBarry Smith         ap[bs2*i + bs*cidx + ridx] = value;
2250580bdb30SBarry Smith       }
2251085a36d4SBarry Smith       a->nz++;
2252e56f5c9eSBarry Smith       A->nonzerostate++;
22532d61bbb3SSatish Balay noinsert1:;
22542d61bbb3SSatish Balay       low = i;
22552d61bbb3SSatish Balay     }
22562d61bbb3SSatish Balay     ailen[brow] = nrow;
22572d61bbb3SSatish Balay   }
22582d61bbb3SSatish Balay   PetscFunctionReturn(0);
22592d61bbb3SSatish Balay }
22602d61bbb3SSatish Balay 
22610481f469SBarry Smith PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
22622d61bbb3SSatish Balay {
22632d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
22642d61bbb3SSatish Balay   Mat            outA;
2265dfbe8321SBarry Smith   PetscErrorCode ierr;
2266ace3abfcSBarry Smith   PetscBool      row_identity,col_identity;
22672d61bbb3SSatish Balay 
22682d61bbb3SSatish Balay   PetscFunctionBegin;
2269e32f2f54SBarry Smith   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2270667159a5SBarry Smith   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2271667159a5SBarry Smith   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2272f23aa3ddSBarry Smith   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
22732d61bbb3SSatish Balay 
22742d61bbb3SSatish Balay   outA            = inA;
2275d5f3da31SBarry Smith   inA->factortype = MAT_FACTOR_LU;
2276f6224b95SHong Zhang   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2277f6224b95SHong Zhang   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
22782d61bbb3SSatish Balay 
2279c4992f7dSBarry Smith   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2280cf242676SKris Buschelman 
2281c38d4ed2SBarry Smith   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
22826bf464f9SBarry Smith   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2283c3122656SLisandro Dalcin   a->row = row;
2284c38d4ed2SBarry Smith   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
22856bf464f9SBarry Smith   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2286c3122656SLisandro Dalcin   a->col = col;
2287c38d4ed2SBarry Smith 
2288c38d4ed2SBarry Smith   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
22896bf464f9SBarry Smith   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
22904c49b128SBarry Smith   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
22913bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2292c38d4ed2SBarry Smith 
2293ace3abfcSBarry Smith   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2294c38d4ed2SBarry Smith   if (!a->solve_work) {
2295854ce69bSBarry Smith     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
22963bb1ff40SBarry Smith     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2297c38d4ed2SBarry Smith   }
2298719d5645SBarry Smith   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
22992d61bbb3SSatish Balay   PetscFunctionReturn(0);
23002d61bbb3SSatish Balay }
2301d9b7c43dSSatish Balay 
23027087cfbeSBarry Smith PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
230327a8da17SBarry Smith {
230427a8da17SBarry Smith   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2305bdb1c0e1SJed Brown   PetscInt    i,nz,mbs;
230627a8da17SBarry Smith 
230727a8da17SBarry Smith   PetscFunctionBegin;
2308b32cb4a7SJed Brown   nz  = baij->maxnz;
2309bdb1c0e1SJed Brown   mbs = baij->mbs;
231027a8da17SBarry Smith   for (i=0; i<nz; i++) {
231127a8da17SBarry Smith     baij->j[i] = indices[i];
231227a8da17SBarry Smith   }
231327a8da17SBarry Smith   baij->nz = nz;
2314bdb1c0e1SJed Brown   for (i=0; i<mbs; i++) {
231527a8da17SBarry Smith     baij->ilen[i] = baij->imax[i];
231627a8da17SBarry Smith   }
231727a8da17SBarry Smith   PetscFunctionReturn(0);
231827a8da17SBarry Smith }
231927a8da17SBarry Smith 
232027a8da17SBarry Smith /*@
232127a8da17SBarry Smith     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
232227a8da17SBarry Smith        in the matrix.
232327a8da17SBarry Smith 
232427a8da17SBarry Smith   Input Parameters:
232527a8da17SBarry Smith +  mat - the SeqBAIJ matrix
232627a8da17SBarry Smith -  indices - the column indices
232727a8da17SBarry Smith 
232815091d37SBarry Smith   Level: advanced
232915091d37SBarry Smith 
233027a8da17SBarry Smith   Notes:
233127a8da17SBarry Smith     This can be called if you have precomputed the nonzero structure of the
233227a8da17SBarry Smith   matrix and want to provide it to the matrix object to improve the performance
233327a8da17SBarry Smith   of the MatSetValues() operation.
233427a8da17SBarry Smith 
233527a8da17SBarry Smith     You MUST have set the correct numbers of nonzeros per row in the call to
2336d1be2dadSMatthew Knepley   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
233727a8da17SBarry Smith 
233827a8da17SBarry Smith     MUST be called before any calls to MatSetValues();
233927a8da17SBarry Smith 
234027a8da17SBarry Smith @*/
23417087cfbeSBarry Smith PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
234227a8da17SBarry Smith {
23434ac538c5SBarry Smith   PetscErrorCode ierr;
234427a8da17SBarry Smith 
234527a8da17SBarry Smith   PetscFunctionBegin;
23460700a824SBarry Smith   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
23474482741eSBarry Smith   PetscValidPointer(indices,2);
23484ac538c5SBarry Smith   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
234927a8da17SBarry Smith   PetscFunctionReturn(0);
235027a8da17SBarry Smith }
235127a8da17SBarry Smith 
2352985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2353273d9f13SBarry Smith {
2354273d9f13SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2355dfbe8321SBarry Smith   PetscErrorCode ierr;
2356c1ac3661SBarry Smith   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2357273d9f13SBarry Smith   PetscReal      atmp;
235887828ca2SBarry Smith   PetscScalar    *x,zero = 0.0;
2359273d9f13SBarry Smith   MatScalar      *aa;
2360c1ac3661SBarry Smith   PetscInt       ncols,brow,krow,kcol;
2361273d9f13SBarry Smith 
2362273d9f13SBarry Smith   PetscFunctionBegin;
2363e32f2f54SBarry Smith   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2364d0f46423SBarry Smith   bs  = A->rmap->bs;
2365273d9f13SBarry Smith   aa  = a->a;
2366273d9f13SBarry Smith   ai  = a->i;
2367273d9f13SBarry Smith   aj  = a->j;
2368273d9f13SBarry Smith   mbs = a->mbs;
2369273d9f13SBarry Smith 
23702dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
23711ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2372273d9f13SBarry Smith   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2373e32f2f54SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2374273d9f13SBarry Smith   for (i=0; i<mbs; i++) {
2375273d9f13SBarry Smith     ncols = ai[1] - ai[0]; ai++;
2376273d9f13SBarry Smith     brow  = bs*i;
2377273d9f13SBarry Smith     for (j=0; j<ncols; j++) {
2378273d9f13SBarry Smith       for (kcol=0; kcol<bs; kcol++) {
2379273d9f13SBarry Smith         for (krow=0; krow<bs; krow++) {
2380273d9f13SBarry Smith           atmp = PetscAbsScalar(*aa);aa++;
2381273d9f13SBarry Smith           row  = brow + krow;   /* row index */
2382985db425SBarry Smith           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2383273d9f13SBarry Smith         }
2384273d9f13SBarry Smith       }
2385273d9f13SBarry Smith       aj++;
2386273d9f13SBarry Smith     }
2387273d9f13SBarry Smith   }
23881ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2389273d9f13SBarry Smith   PetscFunctionReturn(0);
2390273d9f13SBarry Smith }
2391273d9f13SBarry Smith 
23923c896bc6SHong Zhang PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
23933c896bc6SHong Zhang {
23943c896bc6SHong Zhang   PetscErrorCode ierr;
23953c896bc6SHong Zhang 
23963c896bc6SHong Zhang   PetscFunctionBegin;
23973c896bc6SHong Zhang   /* If the two matrices have the same copy implementation, use fast copy. */
23983c896bc6SHong Zhang   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
23993c896bc6SHong Zhang     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
24003c896bc6SHong Zhang     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2401d88c0aacSHong Zhang     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
24023c896bc6SHong Zhang 
2403d88c0aacSHong Zhang     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2404d88c0aacSHong Zhang     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2405580bdb30SBarry Smith     ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr);
2406cdc753b6SBarry Smith     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
24073c896bc6SHong Zhang   } else {
24083c896bc6SHong Zhang     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
24093c896bc6SHong Zhang   }
24103c896bc6SHong Zhang   PetscFunctionReturn(0);
24113c896bc6SHong Zhang }
24123c896bc6SHong Zhang 
24134994cf47SJed Brown PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2414273d9f13SBarry Smith {
2415dfbe8321SBarry Smith   PetscErrorCode ierr;
2416273d9f13SBarry Smith 
2417273d9f13SBarry Smith   PetscFunctionBegin;
2418f4259b30SLisandro Dalcin   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2419273d9f13SBarry Smith   PetscFunctionReturn(0);
2420273d9f13SBarry Smith }
2421273d9f13SBarry Smith 
2422cda14afcSprj- static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2423f2a5309cSSatish Balay {
2424f2a5309cSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
24256e111a19SKarl Rupp 
2426f2a5309cSSatish Balay   PetscFunctionBegin;
2427f2a5309cSSatish Balay   *array = a->a;
2428f2a5309cSSatish Balay   PetscFunctionReturn(0);
2429f2a5309cSSatish Balay }
2430f2a5309cSSatish Balay 
2431cda14afcSprj- static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2432f2a5309cSSatish Balay {
2433f2a5309cSSatish Balay   PetscFunctionBegin;
2434cda14afcSprj-   *array = NULL;
2435f2a5309cSSatish Balay   PetscFunctionReturn(0);
2436f2a5309cSSatish Balay }
2437f2a5309cSSatish Balay 
243852768537SHong Zhang PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
243952768537SHong Zhang {
2440b264fe52SHong Zhang   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
244152768537SHong Zhang   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
244252768537SHong Zhang   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2443b264fe52SHong Zhang   PetscErrorCode ierr;
244452768537SHong Zhang 
244552768537SHong Zhang   PetscFunctionBegin;
244652768537SHong Zhang   /* Set the number of nonzeros in the new matrix */
2447b264fe52SHong Zhang   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
244852768537SHong Zhang   PetscFunctionReturn(0);
244952768537SHong Zhang }
245052768537SHong Zhang 
2451f4df32b1SMatthew Knepley PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
245242ee4b1aSHong Zhang {
245342ee4b1aSHong Zhang   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2454dfbe8321SBarry Smith   PetscErrorCode ierr;
245531ce2d13SHong Zhang   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2456e838b9e7SJed Brown   PetscBLASInt   one=1;
245742ee4b1aSHong Zhang 
245842ee4b1aSHong Zhang   PetscFunctionBegin;
245942ee4b1aSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
2460f4df32b1SMatthew Knepley     PetscScalar  alpha = a;
2461c5df96a5SBarry Smith     PetscBLASInt bnz;
2462c5df96a5SBarry Smith     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
24638b83055fSJed Brown     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2464a3fa217bSJose E. Roman     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2465ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2466ab784542SHong Zhang     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
246742ee4b1aSHong Zhang   } else {
246852768537SHong Zhang     Mat      B;
246952768537SHong Zhang     PetscInt *nnz;
247052768537SHong Zhang     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
247152768537SHong Zhang     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
247252768537SHong Zhang     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
247352768537SHong Zhang     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
247452768537SHong Zhang     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
247552768537SHong Zhang     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
247652768537SHong Zhang     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
247752768537SHong Zhang     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
247852768537SHong Zhang     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
247952768537SHong Zhang     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
248028be2f97SBarry Smith     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
248152768537SHong Zhang     ierr = PetscFree(nnz);CHKERRQ(ierr);
248242ee4b1aSHong Zhang   }
248342ee4b1aSHong Zhang   PetscFunctionReturn(0);
248442ee4b1aSHong Zhang }
248542ee4b1aSHong Zhang 
24862726fb6dSPierre Jolivet PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
24872726fb6dSPierre Jolivet {
24882726fb6dSPierre Jolivet #if defined(PETSC_USE_COMPLEX)
24892726fb6dSPierre Jolivet   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
24902726fb6dSPierre Jolivet   PetscInt    i,nz = a->bs2*a->i[a->mbs];
24912726fb6dSPierre Jolivet   MatScalar   *aa = a->a;
24922726fb6dSPierre Jolivet 
24932726fb6dSPierre Jolivet   PetscFunctionBegin;
24942726fb6dSPierre Jolivet   for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]);
24952726fb6dSPierre Jolivet #else
24962726fb6dSPierre Jolivet   PetscFunctionBegin;
24972726fb6dSPierre Jolivet #endif
24982726fb6dSPierre Jolivet   PetscFunctionReturn(0);
24992726fb6dSPierre Jolivet }
25002726fb6dSPierre Jolivet 
250199cafbc1SBarry Smith PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
250299cafbc1SBarry Smith {
250399cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
250499cafbc1SBarry Smith   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2505dd6ea824SBarry Smith   MatScalar   *aa = a->a;
250699cafbc1SBarry Smith 
250799cafbc1SBarry Smith   PetscFunctionBegin;
250899cafbc1SBarry Smith   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
250999cafbc1SBarry Smith   PetscFunctionReturn(0);
251099cafbc1SBarry Smith }
251199cafbc1SBarry Smith 
251299cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
251399cafbc1SBarry Smith {
251499cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
251599cafbc1SBarry Smith   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2516dd6ea824SBarry Smith   MatScalar   *aa = a->a;
251799cafbc1SBarry Smith 
251899cafbc1SBarry Smith   PetscFunctionBegin;
251999cafbc1SBarry Smith   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
252099cafbc1SBarry Smith   PetscFunctionReturn(0);
252199cafbc1SBarry Smith }
252299cafbc1SBarry Smith 
25233acb8795SBarry Smith /*
25242479783cSJose E. Roman     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
25253acb8795SBarry Smith */
25261a83f524SJed Brown PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
25273acb8795SBarry Smith {
25283acb8795SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
25293acb8795SBarry Smith   PetscErrorCode ierr;
25303acb8795SBarry Smith   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
25313acb8795SBarry Smith   PetscInt       nz = a->i[m],row,*jj,mr,col;
25323acb8795SBarry Smith 
25333acb8795SBarry Smith   PetscFunctionBegin;
25343acb8795SBarry Smith   *nn = n;
25353acb8795SBarry Smith   if (!ia) PetscFunctionReturn(0);
2536e7e72b3dSBarry Smith   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2537e7e72b3dSBarry Smith   else {
2538b9e7e5c1SBarry Smith     ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2539854ce69bSBarry Smith     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2540b9e7e5c1SBarry Smith     ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
25413acb8795SBarry Smith     jj   = a->j;
25423acb8795SBarry Smith     for (i=0; i<nz; i++) {
25433acb8795SBarry Smith       collengths[jj[i]]++;
25443acb8795SBarry Smith     }
25453acb8795SBarry Smith     cia[0] = oshift;
25463acb8795SBarry Smith     for (i=0; i<n; i++) {
25473acb8795SBarry Smith       cia[i+1] = cia[i] + collengths[i];
25483acb8795SBarry Smith     }
2549580bdb30SBarry Smith     ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
25503acb8795SBarry Smith     jj   = a->j;
25513acb8795SBarry Smith     for (row=0; row<m; row++) {
25523acb8795SBarry Smith       mr = a->i[row+1] - a->i[row];
25533acb8795SBarry Smith       for (i=0; i<mr; i++) {
25543acb8795SBarry Smith         col = *jj++;
255526fbe8dcSKarl Rupp 
25563acb8795SBarry Smith         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
25573acb8795SBarry Smith       }
25583acb8795SBarry Smith     }
25593acb8795SBarry Smith     ierr = PetscFree(collengths);CHKERRQ(ierr);
25603acb8795SBarry Smith     *ia  = cia; *ja = cja;
25613acb8795SBarry Smith   }
25623acb8795SBarry Smith   PetscFunctionReturn(0);
25633acb8795SBarry Smith }
25643acb8795SBarry Smith 
25651a83f524SJed Brown PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
25663acb8795SBarry Smith {
25673acb8795SBarry Smith   PetscErrorCode ierr;
25683acb8795SBarry Smith 
25693acb8795SBarry Smith   PetscFunctionBegin;
25703acb8795SBarry Smith   if (!ia) PetscFunctionReturn(0);
25713acb8795SBarry Smith   ierr = PetscFree(*ia);CHKERRQ(ierr);
25723acb8795SBarry Smith   ierr = PetscFree(*ja);CHKERRQ(ierr);
25733acb8795SBarry Smith   PetscFunctionReturn(0);
25743acb8795SBarry Smith }
25753acb8795SBarry Smith 
2576525d23c0SHong Zhang /*
2577525d23c0SHong Zhang  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2578525d23c0SHong Zhang  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2579040ebd07SHong Zhang  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2580525d23c0SHong Zhang  */
2581525d23c0SHong Zhang PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2582f6d58c54SBarry Smith {
2583525d23c0SHong Zhang   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2584f6d58c54SBarry Smith   PetscErrorCode ierr;
2585c0349474SHong Zhang   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2586525d23c0SHong Zhang   PetscInt       nz = a->i[m],row,*jj,mr,col;
2587525d23c0SHong Zhang   PetscInt       *cspidx;
2588f6d58c54SBarry Smith 
2589f6d58c54SBarry Smith   PetscFunctionBegin;
2590525d23c0SHong Zhang   *nn = n;
2591525d23c0SHong Zhang   if (!ia) PetscFunctionReturn(0);
2592f6d58c54SBarry Smith 
2593b9e7e5c1SBarry Smith   ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2594854ce69bSBarry Smith   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2595b9e7e5c1SBarry Smith   ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2596b9e7e5c1SBarry Smith   ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr);
2597525d23c0SHong Zhang   jj   = a->j;
2598525d23c0SHong Zhang   for (i=0; i<nz; i++) {
2599525d23c0SHong Zhang     collengths[jj[i]]++;
2600f6d58c54SBarry Smith   }
2601525d23c0SHong Zhang   cia[0] = oshift;
2602525d23c0SHong Zhang   for (i=0; i<n; i++) {
2603525d23c0SHong Zhang     cia[i+1] = cia[i] + collengths[i];
2604525d23c0SHong Zhang   }
2605580bdb30SBarry Smith   ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2606525d23c0SHong Zhang   jj   = a->j;
2607525d23c0SHong Zhang   for (row=0; row<m; row++) {
2608525d23c0SHong Zhang     mr = a->i[row+1] - a->i[row];
2609525d23c0SHong Zhang     for (i=0; i<mr; i++) {
2610525d23c0SHong Zhang       col = *jj++;
2611525d23c0SHong Zhang       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2612525d23c0SHong Zhang       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2613525d23c0SHong Zhang     }
2614525d23c0SHong Zhang   }
2615525d23c0SHong Zhang   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2616071fcb05SBarry Smith   *ia    = cia;
2617071fcb05SBarry Smith   *ja    = cja;
2618525d23c0SHong Zhang   *spidx = cspidx;
2619525d23c0SHong Zhang   PetscFunctionReturn(0);
2620f6d58c54SBarry Smith }
2621f6d58c54SBarry Smith 
2622525d23c0SHong Zhang PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2623525d23c0SHong Zhang {
2624525d23c0SHong Zhang   PetscErrorCode ierr;
2625f6d58c54SBarry Smith 
2626525d23c0SHong Zhang   PetscFunctionBegin;
2627525d23c0SHong Zhang   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2628525d23c0SHong Zhang   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2629f6d58c54SBarry Smith   PetscFunctionReturn(0);
2630f6d58c54SBarry Smith }
263199cafbc1SBarry Smith 
26327d68702bSBarry Smith PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
26337d68702bSBarry Smith {
26347d68702bSBarry Smith   PetscErrorCode ierr;
26357d68702bSBarry Smith   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
26367d68702bSBarry Smith 
26377d68702bSBarry Smith   PetscFunctionBegin;
26386f33a894SBarry Smith   if (!Y->preallocated || !aij->nz) {
26397d68702bSBarry Smith     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
26407d68702bSBarry Smith   }
26417d68702bSBarry Smith   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
26427d68702bSBarry Smith   PetscFunctionReturn(0);
26437d68702bSBarry Smith }
26447d68702bSBarry Smith 
26452593348eSBarry Smith /* -------------------------------------------------------------------*/
26463964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2647cc2dc46cSBarry Smith                                        MatGetRow_SeqBAIJ,
2648cc2dc46cSBarry Smith                                        MatRestoreRow_SeqBAIJ,
2649cc2dc46cSBarry Smith                                        MatMult_SeqBAIJ_N,
265097304618SKris Buschelman                                /* 4*/  MatMultAdd_SeqBAIJ_N,
26517c922b88SBarry Smith                                        MatMultTranspose_SeqBAIJ,
26527c922b88SBarry Smith                                        MatMultTransposeAdd_SeqBAIJ,
2653f4259b30SLisandro Dalcin                                        NULL,
2654f4259b30SLisandro Dalcin                                        NULL,
2655f4259b30SLisandro Dalcin                                        NULL,
2656f4259b30SLisandro Dalcin                                /* 10*/ NULL,
2657cc2dc46cSBarry Smith                                        MatLUFactor_SeqBAIJ,
2658f4259b30SLisandro Dalcin                                        NULL,
2659f4259b30SLisandro Dalcin                                        NULL,
2660f2501298SSatish Balay                                        MatTranspose_SeqBAIJ,
266197304618SKris Buschelman                                /* 15*/ MatGetInfo_SeqBAIJ,
2662cc2dc46cSBarry Smith                                        MatEqual_SeqBAIJ,
2663cc2dc46cSBarry Smith                                        MatGetDiagonal_SeqBAIJ,
2664cc2dc46cSBarry Smith                                        MatDiagonalScale_SeqBAIJ,
2665cc2dc46cSBarry Smith                                        MatNorm_SeqBAIJ,
2666f4259b30SLisandro Dalcin                                /* 20*/ NULL,
2667cc2dc46cSBarry Smith                                        MatAssemblyEnd_SeqBAIJ,
2668cc2dc46cSBarry Smith                                        MatSetOption_SeqBAIJ,
2669cc2dc46cSBarry Smith                                        MatZeroEntries_SeqBAIJ,
2670d519adbfSMatthew Knepley                                /* 24*/ MatZeroRows_SeqBAIJ,
2671f4259b30SLisandro Dalcin                                        NULL,
2672f4259b30SLisandro Dalcin                                        NULL,
2673f4259b30SLisandro Dalcin                                        NULL,
2674f4259b30SLisandro Dalcin                                        NULL,
26754994cf47SJed Brown                                /* 29*/ MatSetUp_SeqBAIJ,
2676f4259b30SLisandro Dalcin                                        NULL,
2677f4259b30SLisandro Dalcin                                        NULL,
2678f4259b30SLisandro Dalcin                                        NULL,
2679f4259b30SLisandro Dalcin                                        NULL,
2680d519adbfSMatthew Knepley                                /* 34*/ MatDuplicate_SeqBAIJ,
2681f4259b30SLisandro Dalcin                                        NULL,
2682f4259b30SLisandro Dalcin                                        NULL,
2683cc2dc46cSBarry Smith                                        MatILUFactor_SeqBAIJ,
2684f4259b30SLisandro Dalcin                                        NULL,
2685d519adbfSMatthew Knepley                                /* 39*/ MatAXPY_SeqBAIJ,
26867dae84e0SHong Zhang                                        MatCreateSubMatrices_SeqBAIJ,
2687cc2dc46cSBarry Smith                                        MatIncreaseOverlap_SeqBAIJ,
2688cc2dc46cSBarry Smith                                        MatGetValues_SeqBAIJ,
26893c896bc6SHong Zhang                                        MatCopy_SeqBAIJ,
2690f4259b30SLisandro Dalcin                                /* 44*/ NULL,
2691cc2dc46cSBarry Smith                                        MatScale_SeqBAIJ,
26927d68702bSBarry Smith                                        MatShift_SeqBAIJ,
2693f4259b30SLisandro Dalcin                                        NULL,
269497b48c8fSBarry Smith                                        MatZeroRowsColumns_SeqBAIJ,
2695f4259b30SLisandro Dalcin                                /* 49*/ NULL,
26963b2fbd54SBarry Smith                                        MatGetRowIJ_SeqBAIJ,
269792c4ed94SBarry Smith                                        MatRestoreRowIJ_SeqBAIJ,
26983acb8795SBarry Smith                                        MatGetColumnIJ_SeqBAIJ,
26993acb8795SBarry Smith                                        MatRestoreColumnIJ_SeqBAIJ,
270093dfae19SHong Zhang                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2701f4259b30SLisandro Dalcin                                        NULL,
2702f4259b30SLisandro Dalcin                                        NULL,
2703090001bdSToby Isaac                                        NULL,
2704d3825aa8SBarry Smith                                        MatSetValuesBlocked_SeqBAIJ,
27057dae84e0SHong Zhang                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2706b9b97703SBarry Smith                                        MatDestroy_SeqBAIJ,
2707b9b97703SBarry Smith                                        MatView_SeqBAIJ,
2708f4259b30SLisandro Dalcin                                        NULL,
2709f4259b30SLisandro Dalcin                                        NULL,
2710f4259b30SLisandro Dalcin                                /* 64*/ NULL,
2711f4259b30SLisandro Dalcin                                        NULL,
2712f4259b30SLisandro Dalcin                                        NULL,
2713f4259b30SLisandro Dalcin                                        NULL,
2714f4259b30SLisandro Dalcin                                        NULL,
2715d519adbfSMatthew Knepley                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2716f4259b30SLisandro Dalcin                                        NULL,
2717c87e5d42SMatthew Knepley                                        MatConvert_Basic,
2718f4259b30SLisandro Dalcin                                        NULL,
2719f4259b30SLisandro Dalcin                                        NULL,
2720f4259b30SLisandro Dalcin                                /* 74*/ NULL,
2721f6d58c54SBarry Smith                                        MatFDColoringApply_BAIJ,
2722f4259b30SLisandro Dalcin                                        NULL,
2723f4259b30SLisandro Dalcin                                        NULL,
2724f4259b30SLisandro Dalcin                                        NULL,
2725f4259b30SLisandro Dalcin                                /* 79*/ NULL,
2726f4259b30SLisandro Dalcin                                        NULL,
2727f4259b30SLisandro Dalcin                                        NULL,
2728f4259b30SLisandro Dalcin                                        NULL,
27295bba2384SShri Abhyankar                                        MatLoad_SeqBAIJ,
2730f4259b30SLisandro Dalcin                                /* 84*/ NULL,
2731f4259b30SLisandro Dalcin                                        NULL,
2732f4259b30SLisandro Dalcin                                        NULL,
2733f4259b30SLisandro Dalcin                                        NULL,
2734f4259b30SLisandro Dalcin                                        NULL,
2735f4259b30SLisandro Dalcin                                /* 89*/ NULL,
2736f4259b30SLisandro Dalcin                                        NULL,
2737f4259b30SLisandro Dalcin                                        NULL,
2738f4259b30SLisandro Dalcin                                        NULL,
2739f4259b30SLisandro Dalcin                                        NULL,
2740f4259b30SLisandro Dalcin                                /* 94*/ NULL,
2741f4259b30SLisandro Dalcin                                        NULL,
2742f4259b30SLisandro Dalcin                                        NULL,
2743f4259b30SLisandro Dalcin                                        NULL,
2744f4259b30SLisandro Dalcin                                        NULL,
2745f4259b30SLisandro Dalcin                                /* 99*/ NULL,
2746f4259b30SLisandro Dalcin                                        NULL,
2747f4259b30SLisandro Dalcin                                        NULL,
27482726fb6dSPierre Jolivet                                        MatConjugate_SeqBAIJ,
2749f4259b30SLisandro Dalcin                                        NULL,
2750f4259b30SLisandro Dalcin                                /*104*/ NULL,
275199cafbc1SBarry Smith                                        MatRealPart_SeqBAIJ,
27522af78befSBarry Smith                                        MatImaginaryPart_SeqBAIJ,
2753f4259b30SLisandro Dalcin                                        NULL,
2754f4259b30SLisandro Dalcin                                        NULL,
2755f4259b30SLisandro Dalcin                                /*109*/ NULL,
2756f4259b30SLisandro Dalcin                                        NULL,
2757f4259b30SLisandro Dalcin                                        NULL,
2758f4259b30SLisandro Dalcin                                        NULL,
2759547795f9SHong Zhang                                        MatMissingDiagonal_SeqBAIJ,
2760f4259b30SLisandro Dalcin                                /*114*/ NULL,
2761f4259b30SLisandro Dalcin                                        NULL,
2762f4259b30SLisandro Dalcin                                        NULL,
2763f4259b30SLisandro Dalcin                                        NULL,
2764f4259b30SLisandro Dalcin                                        NULL,
2765f4259b30SLisandro Dalcin                                /*119*/ NULL,
2766f4259b30SLisandro Dalcin                                        NULL,
2767547795f9SHong Zhang                                        MatMultHermitianTranspose_SeqBAIJ,
2768d6037b41SHong Zhang                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2769f4259b30SLisandro Dalcin                                        NULL,
2770f4259b30SLisandro Dalcin                                /*124*/ NULL,
2771*9463ebdaSPierre Jolivet                                        MatGetColumnNorms_SeqBAIJ,
27723964eb88SJed Brown                                        MatInvertBlockDiagonal_SeqBAIJ,
2773f4259b30SLisandro Dalcin                                        NULL,
2774f4259b30SLisandro Dalcin                                        NULL,
2775f4259b30SLisandro Dalcin                                /*129*/ NULL,
2776f4259b30SLisandro Dalcin                                        NULL,
2777f4259b30SLisandro Dalcin                                        NULL,
2778f4259b30SLisandro Dalcin                                        NULL,
2779f4259b30SLisandro Dalcin                                        NULL,
2780f4259b30SLisandro Dalcin                                /*134*/ NULL,
2781f4259b30SLisandro Dalcin                                        NULL,
2782f4259b30SLisandro Dalcin                                        NULL,
2783f4259b30SLisandro Dalcin                                        NULL,
2784f4259b30SLisandro Dalcin                                        NULL,
278546533700Sstefano_zampini                                /*139*/ MatSetBlockSizes_Default,
2786f4259b30SLisandro Dalcin                                        NULL,
2787f4259b30SLisandro Dalcin                                        NULL,
2788bdf6f3fcSHong Zhang                                        MatFDColoringSetUp_SeqXAIJ,
2789f4259b30SLisandro Dalcin                                        NULL,
279086e85357SHong Zhang                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
279186e85357SHong Zhang                                        MatDestroySubMatrices_SeqBAIJ
279299cafbc1SBarry Smith };
27932593348eSBarry Smith 
27947087cfbeSBarry Smith PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
27953e90b805SBarry Smith {
27963e90b805SBarry Smith   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
27978ece6314SShri Abhyankar   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2798dfbe8321SBarry Smith   PetscErrorCode ierr;
27993e90b805SBarry Smith 
28003e90b805SBarry Smith   PetscFunctionBegin;
2801e7e72b3dSBarry Smith   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
28023e90b805SBarry Smith 
28033e90b805SBarry Smith   /* allocate space for values if not already there */
28043e90b805SBarry Smith   if (!aij->saved_values) {
2805854ce69bSBarry Smith     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
28063bb1ff40SBarry Smith     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
28073e90b805SBarry Smith   }
28083e90b805SBarry Smith 
28093e90b805SBarry Smith   /* copy values over */
2810580bdb30SBarry Smith   ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr);
28113e90b805SBarry Smith   PetscFunctionReturn(0);
28123e90b805SBarry Smith }
28133e90b805SBarry Smith 
28147087cfbeSBarry Smith PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
28153e90b805SBarry Smith {
28163e90b805SBarry Smith   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
28176849ba73SBarry Smith   PetscErrorCode ierr;
28188ece6314SShri Abhyankar   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
28193e90b805SBarry Smith 
28203e90b805SBarry Smith   PetscFunctionBegin;
2821e7e72b3dSBarry Smith   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2822e7e72b3dSBarry Smith   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
28233e90b805SBarry Smith 
28243e90b805SBarry Smith   /* copy values over */
2825580bdb30SBarry Smith   ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr);
28263e90b805SBarry Smith   PetscFunctionReturn(0);
28273e90b805SBarry Smith }
28283e90b805SBarry Smith 
2829cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2830cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2831273d9f13SBarry Smith 
2832b5b72c8aSIrina Sokolova PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2833a23d5eceSKris Buschelman {
2834a23d5eceSKris Buschelman   Mat_SeqBAIJ    *b;
28356849ba73SBarry Smith   PetscErrorCode ierr;
2836535b19f3SBarry Smith   PetscInt       i,mbs,nbs,bs2;
28378afaa268SBarry Smith   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2838a23d5eceSKris Buschelman 
2839a23d5eceSKris Buschelman   PetscFunctionBegin;
28402576faa2SJed Brown   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2841ab93d7beSBarry Smith   if (nz == MAT_SKIP_ALLOCATION) {
2842ab93d7beSBarry Smith     skipallocation = PETSC_TRUE;
2843ab93d7beSBarry Smith     nz             = 0;
2844ab93d7beSBarry Smith   }
28458c07d4e3SBarry Smith 
284633d57670SJed Brown   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
284726283091SBarry Smith   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
284826283091SBarry Smith   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2849e02043d6SBarry Smith   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2850899cda47SBarry Smith 
2851899cda47SBarry Smith   B->preallocated = PETSC_TRUE;
2852899cda47SBarry Smith 
2853d0f46423SBarry Smith   mbs = B->rmap->n/bs;
2854d0f46423SBarry Smith   nbs = B->cmap->n/bs;
2855a23d5eceSKris Buschelman   bs2 = bs*bs;
2856a23d5eceSKris Buschelman 
285765e19b50SBarry Smith   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2858a23d5eceSKris Buschelman 
2859a23d5eceSKris Buschelman   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2860e32f2f54SBarry Smith   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2861a23d5eceSKris Buschelman   if (nnz) {
2862a23d5eceSKris Buschelman     for (i=0; i<mbs; i++) {
2863e32f2f54SBarry Smith       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2864e32f2f54SBarry Smith       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2865a23d5eceSKris Buschelman     }
2866a23d5eceSKris Buschelman   }
2867a23d5eceSKris Buschelman 
2868a23d5eceSKris Buschelman   b    = (Mat_SeqBAIJ*)B->data;
2869ce94432eSBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
28708afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
28718c07d4e3SBarry Smith   ierr = PetscOptionsEnd();CHKERRQ(ierr);
28728c07d4e3SBarry Smith 
2873a23d5eceSKris Buschelman   if (!flg) {
2874a23d5eceSKris Buschelman     switch (bs) {
2875a23d5eceSKris Buschelman     case 1:
2876a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_1;
2877a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2878a23d5eceSKris Buschelman       break;
2879a23d5eceSKris Buschelman     case 2:
2880a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_2;
2881a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2882a23d5eceSKris Buschelman       break;
2883a23d5eceSKris Buschelman     case 3:
2884a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_3;
2885a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2886a23d5eceSKris Buschelman       break;
2887a23d5eceSKris Buschelman     case 4:
2888a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_4;
2889a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2890a23d5eceSKris Buschelman       break;
2891a23d5eceSKris Buschelman     case 5:
2892a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_5;
2893a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2894a23d5eceSKris Buschelman       break;
2895a23d5eceSKris Buschelman     case 6:
2896a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_6;
2897a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2898a23d5eceSKris Buschelman       break;
2899a23d5eceSKris Buschelman     case 7:
2900a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_7;
2901a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2902a23d5eceSKris Buschelman       break;
290396e086a2SDaniel Kokron     case 9:
29046679dcc1SBarry Smith     {
29056679dcc1SBarry Smith       PetscInt version = 1;
29066679dcc1SBarry Smith       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
29076679dcc1SBarry Smith       switch (version) {
29085f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
29096679dcc1SBarry Smith       case 1:
291096e086a2SDaniel Kokron         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
291196e086a2SDaniel Kokron         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
29121e1ea65dSPierre Jolivet         ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
29136679dcc1SBarry Smith         break;
29146679dcc1SBarry Smith #endif
29156679dcc1SBarry Smith       default:
291696e086a2SDaniel Kokron         B->ops->mult    = MatMult_SeqBAIJ_N;
291796e086a2SDaniel Kokron         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
29181e1ea65dSPierre Jolivet         ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
291996e086a2SDaniel Kokron         break;
29206679dcc1SBarry Smith       }
29216679dcc1SBarry Smith       break;
29226679dcc1SBarry Smith     }
2923ebada01fSBarry Smith     case 11:
2924ebada01fSBarry Smith       B->ops->mult    = MatMult_SeqBAIJ_11;
2925ebada01fSBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2926ebada01fSBarry Smith       break;
29276679dcc1SBarry Smith     case 12:
29286679dcc1SBarry Smith     {
29296679dcc1SBarry Smith       PetscInt version = 1;
29306679dcc1SBarry Smith       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
29316679dcc1SBarry Smith       switch (version) {
29326679dcc1SBarry Smith       case 1:
29336679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
29346679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
29351e1ea65dSPierre Jolivet         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
29368ab949d8SShri Abhyankar         break;
29376679dcc1SBarry Smith       case 2:
29386679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
29396679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
29401e1ea65dSPierre Jolivet         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
29416679dcc1SBarry Smith         break;
29426679dcc1SBarry Smith #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
29436679dcc1SBarry Smith       case 3:
29446679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
29456679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
29461e1ea65dSPierre Jolivet         ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
29476679dcc1SBarry Smith         break;
29486679dcc1SBarry Smith #endif
2949a23d5eceSKris Buschelman       default:
2950a23d5eceSKris Buschelman         B->ops->mult    = MatMult_SeqBAIJ_N;
2951a23d5eceSKris Buschelman         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
29521e1ea65dSPierre Jolivet         ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
29536679dcc1SBarry Smith         break;
29546679dcc1SBarry Smith       }
29556679dcc1SBarry Smith       break;
29566679dcc1SBarry Smith     }
29576679dcc1SBarry Smith     case 15:
29586679dcc1SBarry Smith     {
29596679dcc1SBarry Smith       PetscInt version = 1;
29606679dcc1SBarry Smith       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
29616679dcc1SBarry Smith       switch (version) {
29626679dcc1SBarry Smith       case 1:
29636679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
29641e1ea65dSPierre Jolivet         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
29656679dcc1SBarry Smith         break;
29666679dcc1SBarry Smith       case 2:
29676679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_15_ver2;
29681e1ea65dSPierre Jolivet         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
29696679dcc1SBarry Smith         break;
29706679dcc1SBarry Smith       case 3:
29716679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_15_ver3;
29721e1ea65dSPierre Jolivet         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
29736679dcc1SBarry Smith         break;
29746679dcc1SBarry Smith       case 4:
29756679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_15_ver4;
29761e1ea65dSPierre Jolivet         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
29776679dcc1SBarry Smith         break;
29786679dcc1SBarry Smith       default:
29796679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_N;
29801e1ea65dSPierre Jolivet         ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
29816679dcc1SBarry Smith         break;
29826679dcc1SBarry Smith       }
29836679dcc1SBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
29846679dcc1SBarry Smith       break;
29856679dcc1SBarry Smith     }
29866679dcc1SBarry Smith     default:
29876679dcc1SBarry Smith       B->ops->mult    = MatMult_SeqBAIJ_N;
29886679dcc1SBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
29891e1ea65dSPierre Jolivet       ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2990a23d5eceSKris Buschelman       break;
2991a23d5eceSKris Buschelman     }
2992a23d5eceSKris Buschelman   }
2993e48d15efSToby Isaac   B->ops->sor = MatSOR_SeqBAIJ;
2994a23d5eceSKris Buschelman   b->mbs = mbs;
2995a23d5eceSKris Buschelman   b->nbs = nbs;
2996ab93d7beSBarry Smith   if (!skipallocation) {
29972ee49352SLisandro Dalcin     if (!b->imax) {
2998dcca6d9dSJed Brown       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
29993bb1ff40SBarry Smith       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
300026fbe8dcSKarl Rupp 
30014fd072dbSBarry Smith       b->free_imax_ilen = PETSC_TRUE;
30022ee49352SLisandro Dalcin     }
3003ab93d7beSBarry Smith     /* b->ilen will count nonzeros in each block row so far. */
300426fbe8dcSKarl Rupp     for (i=0; i<mbs; i++) b->ilen[i] = 0;
3005a23d5eceSKris Buschelman     if (!nnz) {
3006a23d5eceSKris Buschelman       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3007c62bd62aSJed Brown       else if (nz < 0) nz = 1;
30085d2a9ed1SStefano Zampini       nz = PetscMin(nz,nbs);
3009a23d5eceSKris Buschelman       for (i=0; i<mbs; i++) b->imax[i] = nz;
301061778c46SBarry Smith       ierr = PetscIntMultError(nz,mbs,&nz);CHKERRQ(ierr);
3011a23d5eceSKris Buschelman     } else {
3012c73702f5SBarry Smith       PetscInt64 nz64 = 0;
3013c73702f5SBarry Smith       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
3014c73702f5SBarry Smith       ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr);
3015a23d5eceSKris Buschelman     }
3016a23d5eceSKris Buschelman 
3017a23d5eceSKris Buschelman     /* allocate the matrix space */
30182ee49352SLisandro Dalcin     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3019672ba085SHong Zhang     if (B->structure_only) {
3020672ba085SHong Zhang       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
3021672ba085SHong Zhang       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
3022672ba085SHong Zhang       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
3023672ba085SHong Zhang     } else {
30246679dcc1SBarry Smith       PetscInt nzbs2 = 0;
30256679dcc1SBarry Smith       ierr = PetscIntMultError(nz,bs2,&nzbs2);CHKERRQ(ierr);
30266679dcc1SBarry Smith       ierr = PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
30273bb1ff40SBarry Smith       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3028580bdb30SBarry Smith       ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr);
3029672ba085SHong Zhang     }
3030580bdb30SBarry Smith     ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr);
303126fbe8dcSKarl Rupp 
3032672ba085SHong Zhang     if (B->structure_only) {
3033672ba085SHong Zhang       b->singlemalloc = PETSC_FALSE;
3034672ba085SHong Zhang       b->free_a       = PETSC_FALSE;
3035672ba085SHong Zhang     } else {
3036a23d5eceSKris Buschelman       b->singlemalloc = PETSC_TRUE;
3037672ba085SHong Zhang       b->free_a       = PETSC_TRUE;
3038672ba085SHong Zhang     }
3039672ba085SHong Zhang     b->free_ij = PETSC_TRUE;
3040672ba085SHong Zhang 
3041a23d5eceSKris Buschelman     b->i[0] = 0;
3042a23d5eceSKris Buschelman     for (i=1; i<mbs+1; i++) {
3043a23d5eceSKris Buschelman       b->i[i] = b->i[i-1] + b->imax[i-1];
3044a23d5eceSKris Buschelman     }
3045672ba085SHong Zhang 
3046e811da20SHong Zhang   } else {
3047e6b907acSBarry Smith     b->free_a  = PETSC_FALSE;
3048e6b907acSBarry Smith     b->free_ij = PETSC_FALSE;
3049ab93d7beSBarry Smith   }
3050a23d5eceSKris Buschelman 
3051a23d5eceSKris Buschelman   b->bs2              = bs2;
3052a23d5eceSKris Buschelman   b->mbs              = mbs;
3053a23d5eceSKris Buschelman   b->nz               = 0;
3054b32cb4a7SJed Brown   b->maxnz            = nz;
3055b32cb4a7SJed Brown   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3056cb7b82ddSBarry Smith   B->was_assembled    = PETSC_FALSE;
3057cb7b82ddSBarry Smith   B->assembled        = PETSC_FALSE;
30582576faa2SJed Brown   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3059a23d5eceSKris Buschelman   PetscFunctionReturn(0);
3060a23d5eceSKris Buschelman }
3061a23d5eceSKris Buschelman 
3062cf12db73SBarry Smith PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3063725b52f3SLisandro Dalcin {
3064725b52f3SLisandro Dalcin   PetscInt       i,m,nz,nz_max=0,*nnz;
3065f4259b30SLisandro Dalcin   PetscScalar    *values=NULL;
3066d47bf9aaSJed Brown   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
3067725b52f3SLisandro Dalcin   PetscErrorCode ierr;
3068725b52f3SLisandro Dalcin 
3069725b52f3SLisandro Dalcin   PetscFunctionBegin;
3070e32f2f54SBarry Smith   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
307126283091SBarry Smith   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
307226283091SBarry Smith   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
307326283091SBarry Smith   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
307426283091SBarry Smith   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3075e02043d6SBarry Smith   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3076d0f46423SBarry Smith   m    = B->rmap->n/bs;
3077725b52f3SLisandro Dalcin 
307826fbe8dcSKarl Rupp   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
3079854ce69bSBarry Smith   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
3080725b52f3SLisandro Dalcin   for (i=0; i<m; i++) {
3081cf12db73SBarry Smith     nz = ii[i+1]- ii[i];
308226fbe8dcSKarl Rupp     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
3083725b52f3SLisandro Dalcin     nz_max = PetscMax(nz_max, nz);
3084725b52f3SLisandro Dalcin     nnz[i] = nz;
3085725b52f3SLisandro Dalcin   }
3086725b52f3SLisandro Dalcin   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3087725b52f3SLisandro Dalcin   ierr = PetscFree(nnz);CHKERRQ(ierr);
3088725b52f3SLisandro Dalcin 
3089725b52f3SLisandro Dalcin   values = (PetscScalar*)V;
3090725b52f3SLisandro Dalcin   if (!values) {
30911795a4d1SJed Brown     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
3092725b52f3SLisandro Dalcin   }
3093725b52f3SLisandro Dalcin   for (i=0; i<m; i++) {
3094cf12db73SBarry Smith     PetscInt          ncols  = ii[i+1] - ii[i];
3095cf12db73SBarry Smith     const PetscInt    *icols = jj + ii[i];
3096bb80cfbbSStefano Zampini     if (bs == 1 || !roworiented) {
3097cf12db73SBarry Smith       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3098725b52f3SLisandro Dalcin       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
30993adadaf3SJed Brown     } else {
31003adadaf3SJed Brown       PetscInt j;
31013adadaf3SJed Brown       for (j=0; j<ncols; j++) {
31023adadaf3SJed Brown         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3103d47bf9aaSJed Brown         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
31043adadaf3SJed Brown       }
31053adadaf3SJed Brown     }
3106725b52f3SLisandro Dalcin   }
3107725b52f3SLisandro Dalcin   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3108725b52f3SLisandro Dalcin   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3109725b52f3SLisandro Dalcin   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31107827cd58SJed Brown   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3111725b52f3SLisandro Dalcin   PetscFunctionReturn(0);
3112725b52f3SLisandro Dalcin }
3113725b52f3SLisandro Dalcin 
3114cda14afcSprj- /*@C
3115cda14afcSprj-    MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored
3116cda14afcSprj- 
3117cda14afcSprj-    Not Collective
3118cda14afcSprj- 
3119cda14afcSprj-    Input Parameter:
3120cda14afcSprj- .  mat - a MATSEQBAIJ matrix
3121cda14afcSprj- 
3122cda14afcSprj-    Output Parameter:
3123cda14afcSprj- .   array - pointer to the data
3124cda14afcSprj- 
3125cda14afcSprj-    Level: intermediate
3126cda14afcSprj- 
3127cda14afcSprj- .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3128cda14afcSprj- @*/
3129cda14afcSprj- PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3130cda14afcSprj- {
3131cda14afcSprj-   PetscErrorCode ierr;
3132cda14afcSprj- 
3133cda14afcSprj-   PetscFunctionBegin;
3134cda14afcSprj-   ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3135cda14afcSprj-   PetscFunctionReturn(0);
3136cda14afcSprj- }
3137cda14afcSprj- 
3138cda14afcSprj- /*@C
3139cda14afcSprj-    MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()
3140cda14afcSprj- 
3141cda14afcSprj-    Not Collective
3142cda14afcSprj- 
3143cda14afcSprj-    Input Parameters:
3144cda14afcSprj- +  mat - a MATSEQBAIJ matrix
3145cda14afcSprj- -  array - pointer to the data
3146cda14afcSprj- 
3147cda14afcSprj-    Level: intermediate
3148cda14afcSprj- 
3149cda14afcSprj- .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3150cda14afcSprj- @*/
3151cda14afcSprj- PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3152cda14afcSprj- {
3153cda14afcSprj-   PetscErrorCode ierr;
3154cda14afcSprj- 
3155cda14afcSprj-   PetscFunctionBegin;
3156cda14afcSprj-   ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3157cda14afcSprj-   PetscFunctionReturn(0);
3158cda14afcSprj- }
3159cda14afcSprj- 
31600bad9183SKris Buschelman /*MC
3161fafad747SKris Buschelman    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
31620bad9183SKris Buschelman    block sparse compressed row format.
31630bad9183SKris Buschelman 
31640bad9183SKris Buschelman    Options Database Keys:
31656679dcc1SBarry Smith + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
31666679dcc1SBarry Smith - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
31670bad9183SKris Buschelman 
31680bad9183SKris Buschelman    Level: beginner
31690cd7f59aSBarry Smith 
31700cd7f59aSBarry Smith    Notes:
31710cd7f59aSBarry Smith     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
31720cd7f59aSBarry Smith     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
31730bad9183SKris Buschelman 
31746679dcc1SBarry Smith    Run with -info to see what version of the matrix-vector product is being used
31756679dcc1SBarry Smith 
3176f0c06035SSatish Balay .seealso: MatCreateSeqBAIJ()
31770bad9183SKris Buschelman M*/
31780bad9183SKris Buschelman 
3179cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3180b24902e0SBarry Smith 
31818cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
31822593348eSBarry Smith {
3183dfbe8321SBarry Smith   PetscErrorCode ierr;
3184c1ac3661SBarry Smith   PetscMPIInt    size;
3185b6490206SBarry Smith   Mat_SeqBAIJ    *b;
31863b2fbd54SBarry Smith 
31873a40ed3dSBarry Smith   PetscFunctionBegin;
3188ffc4695bSBarry Smith   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
3189e32f2f54SBarry Smith   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3190b6490206SBarry Smith 
3191b00a9115SJed Brown   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3192b0a32e0cSBarry Smith   B->data = (void*)b;
3193549d3d68SSatish Balay   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
319426fbe8dcSKarl Rupp 
3195f4259b30SLisandro Dalcin   b->row          = NULL;
3196f4259b30SLisandro Dalcin   b->col          = NULL;
3197f4259b30SLisandro Dalcin   b->icol         = NULL;
31982593348eSBarry Smith   b->reallocs     = 0;
3199f4259b30SLisandro Dalcin   b->saved_values = NULL;
32002593348eSBarry Smith 
3201c4992f7dSBarry Smith   b->roworiented        = PETSC_TRUE;
32022593348eSBarry Smith   b->nonew              = 0;
3203f4259b30SLisandro Dalcin   b->diag               = NULL;
3204f4259b30SLisandro Dalcin   B->spptr              = NULL;
3205b32cb4a7SJed Brown   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3206a9817697SBarry Smith   b->keepnonzeropattern = PETSC_FALSE;
32074e220ebcSLois Curfman McInnes 
3208cda14afcSprj-   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr);
3209cda14afcSprj-   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr);
3210bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3211bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3212bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3213bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3214bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3215bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3216bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3217bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3218bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
32197ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
3220c9225affSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
32217ea3e4caSstefano_zampini #endif
3222c9225affSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
322317667f90SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
32243a40ed3dSBarry Smith   PetscFunctionReturn(0);
32252593348eSBarry Smith }
32262593348eSBarry Smith 
3227ace3abfcSBarry Smith PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
32282593348eSBarry Smith {
3229b24902e0SBarry Smith   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
32306849ba73SBarry Smith   PetscErrorCode ierr;
3231a96a251dSBarry Smith   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3232de6a44a3SBarry Smith 
32333a40ed3dSBarry Smith   PetscFunctionBegin;
3234e32f2f54SBarry Smith   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
32352593348eSBarry Smith 
32364fd072dbSBarry Smith   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
32374fd072dbSBarry Smith     c->imax           = a->imax;
32384fd072dbSBarry Smith     c->ilen           = a->ilen;
32394fd072dbSBarry Smith     c->free_imax_ilen = PETSC_FALSE;
32404fd072dbSBarry Smith   } else {
3241dcca6d9dSJed Brown     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
32423bb1ff40SBarry Smith     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3243b6490206SBarry Smith     for (i=0; i<mbs; i++) {
32442593348eSBarry Smith       c->imax[i] = a->imax[i];
32452593348eSBarry Smith       c->ilen[i] = a->ilen[i];
32462593348eSBarry Smith     }
32474fd072dbSBarry Smith     c->free_imax_ilen = PETSC_TRUE;
32484fd072dbSBarry Smith   }
32492593348eSBarry Smith 
32502593348eSBarry Smith   /* allocate the matrix space */
325116a2bf60SHong Zhang   if (mallocmatspace) {
32524fd072dbSBarry Smith     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
32531795a4d1SJed Brown       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
32543bb1ff40SBarry Smith       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
325526fbe8dcSKarl Rupp 
32564fd072dbSBarry Smith       c->i            = a->i;
32574fd072dbSBarry Smith       c->j            = a->j;
3258379be0ddSLisandro Dalcin       c->singlemalloc = PETSC_FALSE;
3259379be0ddSLisandro Dalcin       c->free_a       = PETSC_TRUE;
3260379be0ddSLisandro Dalcin       c->free_ij      = PETSC_FALSE;
32614fd072dbSBarry Smith       c->parent       = A;
32621e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
32631e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
326426fbe8dcSKarl Rupp 
32654fd072dbSBarry Smith       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
32664fd072dbSBarry Smith       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
32674fd072dbSBarry Smith       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
32684fd072dbSBarry Smith     } else {
3269dcca6d9dSJed Brown       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
32703bb1ff40SBarry Smith       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
327126fbe8dcSKarl Rupp 
3272c4992f7dSBarry Smith       c->singlemalloc = PETSC_TRUE;
3273379be0ddSLisandro Dalcin       c->free_a       = PETSC_TRUE;
32744fd072dbSBarry Smith       c->free_ij      = PETSC_TRUE;
327526fbe8dcSKarl Rupp 
3276580bdb30SBarry Smith       ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr);
3277b6490206SBarry Smith       if (mbs > 0) {
3278580bdb30SBarry Smith         ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr);
32792e8a6d31SBarry Smith         if (cpvalues == MAT_COPY_VALUES) {
3280580bdb30SBarry Smith           ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr);
32812e8a6d31SBarry Smith         } else {
3282580bdb30SBarry Smith           ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr);
32832593348eSBarry Smith         }
32842593348eSBarry Smith       }
32851e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
32861e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
328716a2bf60SHong Zhang     }
32884fd072dbSBarry Smith   }
328916a2bf60SHong Zhang 
32902593348eSBarry Smith   c->roworiented = a->roworiented;
32912593348eSBarry Smith   c->nonew       = a->nonew;
329226fbe8dcSKarl Rupp 
32931e1e43feSBarry Smith   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
32941e1e43feSBarry Smith   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
329526fbe8dcSKarl Rupp 
32965c9eb25fSBarry Smith   c->bs2         = a->bs2;
32975c9eb25fSBarry Smith   c->mbs         = a->mbs;
32985c9eb25fSBarry Smith   c->nbs         = a->nbs;
32992593348eSBarry Smith 
33002593348eSBarry Smith   if (a->diag) {
33014fd072dbSBarry Smith     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
33024fd072dbSBarry Smith       c->diag      = a->diag;
33034fd072dbSBarry Smith       c->free_diag = PETSC_FALSE;
33044fd072dbSBarry Smith     } else {
3305854ce69bSBarry Smith       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
33063bb1ff40SBarry Smith       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
330726fbe8dcSKarl Rupp       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
33084fd072dbSBarry Smith       c->free_diag = PETSC_TRUE;
33094fd072dbSBarry Smith     }
3310f4259b30SLisandro Dalcin   } else c->diag = NULL;
331126fbe8dcSKarl Rupp 
33122593348eSBarry Smith   c->nz         = a->nz;
3313f2cbd3d5SJed Brown   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3314f361c04dSBarry Smith   c->solve_work = NULL;
3315f361c04dSBarry Smith   c->mult_work  = NULL;
3316f361c04dSBarry Smith   c->sor_workt  = NULL;
3317f361c04dSBarry Smith   c->sor_work   = NULL;
331888e51ccdSHong Zhang 
331988e51ccdSHong Zhang   c->compressedrow.use   = a->compressedrow.use;
332088e51ccdSHong Zhang   c->compressedrow.nrows = a->compressedrow.nrows;
3321cd6b891eSBarry Smith   if (a->compressedrow.use) {
332288e51ccdSHong Zhang     i    = a->compressedrow.nrows;
3323dcca6d9dSJed Brown     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
33243bb1ff40SBarry Smith     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3325580bdb30SBarry Smith     ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr);
3326580bdb30SBarry Smith     ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr);
332788e51ccdSHong Zhang   } else {
332888e51ccdSHong Zhang     c->compressedrow.use    = PETSC_FALSE;
33290298fd71SBarry Smith     c->compressedrow.i      = NULL;
33300298fd71SBarry Smith     c->compressedrow.rindex = NULL;
333188e51ccdSHong Zhang   }
3332e56f5c9eSBarry Smith   C->nonzerostate = A->nonzerostate;
333326fbe8dcSKarl Rupp 
3334140e18c1SBarry Smith   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
33353a40ed3dSBarry Smith   PetscFunctionReturn(0);
33362593348eSBarry Smith }
33372593348eSBarry Smith 
3338b24902e0SBarry Smith PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3339b24902e0SBarry Smith {
3340b24902e0SBarry Smith   PetscErrorCode ierr;
3341b24902e0SBarry Smith 
3342b24902e0SBarry Smith   PetscFunctionBegin;
3343ce94432eSBarry Smith   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3344d0f46423SBarry Smith   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
33455c9eb25fSBarry Smith   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
334698ad0f72SJed Brown   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3347b24902e0SBarry Smith   PetscFunctionReturn(0);
3348b24902e0SBarry Smith }
3349b24902e0SBarry Smith 
3350618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3351b51a4376SLisandro Dalcin PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
3352f501eaabSShri Abhyankar {
3353b51a4376SLisandro Dalcin   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3354b51a4376SLisandro Dalcin   PetscInt       *rowidxs,*colidxs;
3355b51a4376SLisandro Dalcin   PetscScalar    *matvals;
3356f501eaabSShri Abhyankar   PetscErrorCode ierr;
3357b51a4376SLisandro Dalcin 
3358b51a4376SLisandro Dalcin   PetscFunctionBegin;
3359b51a4376SLisandro Dalcin   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3360b51a4376SLisandro Dalcin 
3361b51a4376SLisandro Dalcin   /* read matrix header */
3362b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3363b51a4376SLisandro Dalcin   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3364b51a4376SLisandro Dalcin   M = header[1]; N = header[2]; nz = header[3];
3365b51a4376SLisandro Dalcin   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3366b51a4376SLisandro Dalcin   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3367b51a4376SLisandro Dalcin   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ");
3368b51a4376SLisandro Dalcin 
3369b51a4376SLisandro Dalcin   /* set block sizes from the viewer's .info file */
3370b51a4376SLisandro Dalcin   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3371b51a4376SLisandro Dalcin   /* set local and global sizes if not set already */
3372b51a4376SLisandro Dalcin   if (mat->rmap->n < 0) mat->rmap->n = M;
3373b51a4376SLisandro Dalcin   if (mat->cmap->n < 0) mat->cmap->n = N;
3374b51a4376SLisandro Dalcin   if (mat->rmap->N < 0) mat->rmap->N = M;
3375b51a4376SLisandro Dalcin   if (mat->cmap->N < 0) mat->cmap->N = N;
3376b51a4376SLisandro Dalcin   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3377b51a4376SLisandro Dalcin   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3378b51a4376SLisandro Dalcin 
3379b51a4376SLisandro Dalcin   /* check if the matrix sizes are correct */
3380b51a4376SLisandro Dalcin   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3381b51a4376SLisandro Dalcin   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3382b51a4376SLisandro Dalcin   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3383b51a4376SLisandro Dalcin   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3384b51a4376SLisandro Dalcin   mbs = m/bs; nbs = n/bs;
3385b51a4376SLisandro Dalcin 
3386b51a4376SLisandro Dalcin   /* read in row lengths, column indices and nonzero values */
3387b51a4376SLisandro Dalcin   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3388b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr);
3389b51a4376SLisandro Dalcin   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3390b51a4376SLisandro Dalcin   sum = rowidxs[m];
3391b51a4376SLisandro Dalcin   if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3392b51a4376SLisandro Dalcin 
3393b51a4376SLisandro Dalcin   /* read in column indices and nonzero values */
3394b51a4376SLisandro Dalcin   ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr);
3395b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr);
3396b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr);
3397b51a4376SLisandro Dalcin 
3398b51a4376SLisandro Dalcin   { /* preallocate matrix storage */
3399b51a4376SLisandro Dalcin     PetscBT   bt; /* helper bit set to count nonzeros */
3400b51a4376SLisandro Dalcin     PetscInt  *nnz;
3401618cc2edSLisandro Dalcin     PetscBool sbaij;
3402b51a4376SLisandro Dalcin 
3403b51a4376SLisandro Dalcin     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3404b51a4376SLisandro Dalcin     ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr);
3405618cc2edSLisandro Dalcin     ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr);
3406b51a4376SLisandro Dalcin     for (i=0; i<mbs; i++) {
3407b51a4376SLisandro Dalcin       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3408618cc2edSLisandro Dalcin       for (k=0; k<bs; k++) {
3409618cc2edSLisandro Dalcin         PetscInt row = bs*i + k;
3410618cc2edSLisandro Dalcin         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3411618cc2edSLisandro Dalcin           PetscInt col = colidxs[j];
3412618cc2edSLisandro Dalcin           if (!sbaij || col >= row)
3413618cc2edSLisandro Dalcin             if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++;
3414618cc2edSLisandro Dalcin         }
3415618cc2edSLisandro Dalcin       }
3416b51a4376SLisandro Dalcin     }
3417b51a4376SLisandro Dalcin     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3418b51a4376SLisandro Dalcin     ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3419618cc2edSLisandro Dalcin     ierr = MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3420b51a4376SLisandro Dalcin     ierr = PetscFree(nnz);CHKERRQ(ierr);
3421b51a4376SLisandro Dalcin   }
3422b51a4376SLisandro Dalcin 
3423b51a4376SLisandro Dalcin   /* store matrix values */
3424b51a4376SLisandro Dalcin   for (i=0; i<m; i++) {
3425b51a4376SLisandro Dalcin     PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1];
3426618cc2edSLisandro Dalcin     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3427b51a4376SLisandro Dalcin   }
3428b51a4376SLisandro Dalcin 
3429b51a4376SLisandro Dalcin   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3430b51a4376SLisandro Dalcin   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3431b51a4376SLisandro Dalcin   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3432b51a4376SLisandro Dalcin   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3433b51a4376SLisandro Dalcin   PetscFunctionReturn(0);
3434b51a4376SLisandro Dalcin }
3435b51a4376SLisandro Dalcin 
3436b51a4376SLisandro Dalcin PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)
3437b51a4376SLisandro Dalcin {
3438b51a4376SLisandro Dalcin   PetscErrorCode ierr;
34397f489da9SVaclav Hapla   PetscBool      isbinary;
3440f501eaabSShri Abhyankar 
3441f501eaabSShri Abhyankar   PetscFunctionBegin;
34427f489da9SVaclav Hapla   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3443b51a4376SLisandro Dalcin   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3444b51a4376SLisandro Dalcin   ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3445f501eaabSShri Abhyankar   PetscFunctionReturn(0);
3446f501eaabSShri Abhyankar }
3447f501eaabSShri Abhyankar 
3448273d9f13SBarry Smith /*@C
3449273d9f13SBarry Smith    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3450273d9f13SBarry Smith    compressed row) format.  For good matrix assembly performance the
3451273d9f13SBarry Smith    user should preallocate the matrix storage by setting the parameter nz
3452273d9f13SBarry Smith    (or the array nnz).  By setting these parameters accurately, performance
3453273d9f13SBarry Smith    during matrix assembly can be increased by more than a factor of 50.
34542593348eSBarry Smith 
3455d083f849SBarry Smith    Collective
3456273d9f13SBarry Smith 
3457273d9f13SBarry Smith    Input Parameters:
3458273d9f13SBarry Smith +  comm - MPI communicator, set to PETSC_COMM_SELF
3459bb7ae925SBarry Smith .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3460bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3461273d9f13SBarry Smith .  m - number of rows
3462273d9f13SBarry Smith .  n - number of columns
346335d8aa7fSBarry Smith .  nz - number of nonzero blocks  per block row (same for all rows)
346435d8aa7fSBarry Smith -  nnz - array containing the number of nonzero blocks in the various block rows
34650298fd71SBarry Smith          (possibly different for each block row) or NULL
3466273d9f13SBarry Smith 
3467273d9f13SBarry Smith    Output Parameter:
3468273d9f13SBarry Smith .  A - the matrix
3469273d9f13SBarry Smith 
3470175b88e8SBarry Smith    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3471f6f02116SRichard Tran Mills    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3472175b88e8SBarry Smith    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3473175b88e8SBarry Smith 
3474273d9f13SBarry Smith    Options Database Keys:
3475a2b725a8SWilliam Gropp +   -mat_no_unroll - uses code that does not unroll the loops in the
3476273d9f13SBarry Smith                      block calculations (much slower)
3477a2b725a8SWilliam Gropp -    -mat_block_size - size of the blocks to use
3478273d9f13SBarry Smith 
3479273d9f13SBarry Smith    Level: intermediate
3480273d9f13SBarry Smith 
3481273d9f13SBarry Smith    Notes:
3482d1be2dadSMatthew Knepley    The number of rows and columns must be divisible by blocksize.
3483d1be2dadSMatthew Knepley 
348449a6f317SBarry Smith    If the nnz parameter is given then the nz parameter is ignored
348549a6f317SBarry Smith 
348635d8aa7fSBarry Smith    A nonzero block is any block that as 1 or more nonzeros in it
348735d8aa7fSBarry Smith 
3488273d9f13SBarry Smith    The block AIJ format is fully compatible with standard Fortran 77
3489273d9f13SBarry Smith    storage.  That is, the stored row and column indices can begin at
3490273d9f13SBarry Smith    either one (as in Fortran) or zero.  See the users' manual for details.
3491273d9f13SBarry Smith 
3492273d9f13SBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
34930298fd71SBarry Smith    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3494a7f22e61SSatish Balay    allocation.  See Users-Manual: ch_mat for details.
3495273d9f13SBarry Smith    matrices.
3496273d9f13SBarry Smith 
349769b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3498273d9f13SBarry Smith @*/
34997087cfbeSBarry Smith PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3500273d9f13SBarry Smith {
3501dfbe8321SBarry Smith   PetscErrorCode ierr;
3502273d9f13SBarry Smith 
3503273d9f13SBarry Smith   PetscFunctionBegin;
3504f69a0ea3SMatthew Knepley   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3505f69a0ea3SMatthew Knepley   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3506273d9f13SBarry Smith   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3507367daffbSBarry Smith   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3508273d9f13SBarry Smith   PetscFunctionReturn(0);
3509273d9f13SBarry Smith }
3510273d9f13SBarry Smith 
3511273d9f13SBarry Smith /*@C
3512273d9f13SBarry Smith    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3513273d9f13SBarry Smith    per row in the matrix. For good matrix assembly performance the
3514273d9f13SBarry Smith    user should preallocate the matrix storage by setting the parameter nz
3515273d9f13SBarry Smith    (or the array nnz).  By setting these parameters accurately, performance
3516273d9f13SBarry Smith    during matrix assembly can be increased by more than a factor of 50.
3517273d9f13SBarry Smith 
3518d083f849SBarry Smith    Collective
3519273d9f13SBarry Smith 
3520273d9f13SBarry Smith    Input Parameters:
35211c4f3114SJed Brown +  B - the matrix
3522bb7ae925SBarry Smith .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3523bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3524273d9f13SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
3525273d9f13SBarry Smith -  nnz - array containing the number of block nonzeros in the various block rows
35260298fd71SBarry Smith          (possibly different for each block row) or NULL
3527273d9f13SBarry Smith 
3528273d9f13SBarry Smith    Options Database Keys:
3529a2b725a8SWilliam Gropp +   -mat_no_unroll - uses code that does not unroll the loops in the
3530273d9f13SBarry Smith                      block calculations (much slower)
3531a2b725a8SWilliam Gropp -   -mat_block_size - size of the blocks to use
3532273d9f13SBarry Smith 
3533273d9f13SBarry Smith    Level: intermediate
3534273d9f13SBarry Smith 
3535273d9f13SBarry Smith    Notes:
353649a6f317SBarry Smith    If the nnz parameter is given then the nz parameter is ignored
353749a6f317SBarry Smith 
3538aa95bbe8SBarry Smith    You can call MatGetInfo() to get information on how effective the preallocation was;
3539aa95bbe8SBarry Smith    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3540aa95bbe8SBarry Smith    You can also run with the option -info and look for messages with the string
3541aa95bbe8SBarry Smith    malloc in them to see if additional memory allocation was needed.
3542aa95bbe8SBarry Smith 
3543273d9f13SBarry Smith    The block AIJ format is fully compatible with standard Fortran 77
3544273d9f13SBarry Smith    storage.  That is, the stored row and column indices can begin at
3545273d9f13SBarry Smith    either one (as in Fortran) or zero.  See the users' manual for details.
3546273d9f13SBarry Smith 
3547273d9f13SBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
35480298fd71SBarry Smith    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3549a7f22e61SSatish Balay    allocation.  See Users-Manual: ch_mat for details.
3550273d9f13SBarry Smith 
355169b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3552273d9f13SBarry Smith @*/
35537087cfbeSBarry Smith PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3554273d9f13SBarry Smith {
35554ac538c5SBarry Smith   PetscErrorCode ierr;
3556273d9f13SBarry Smith 
3557273d9f13SBarry Smith   PetscFunctionBegin;
35586ba663aaSJed Brown   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
35596ba663aaSJed Brown   PetscValidType(B,1);
35606ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B,bs,2);
35614ac538c5SBarry Smith   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3562273d9f13SBarry Smith   PetscFunctionReturn(0);
3563273d9f13SBarry Smith }
3564a1d92eedSBarry Smith 
3565725b52f3SLisandro Dalcin /*@C
3566664954b6SBarry Smith    MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
3567725b52f3SLisandro Dalcin 
3568d083f849SBarry Smith    Collective
3569725b52f3SLisandro Dalcin 
3570725b52f3SLisandro Dalcin    Input Parameters:
35711c4f3114SJed Brown +  B - the matrix
3572725b52f3SLisandro Dalcin .  i - the indices into j for the start of each local row (starts with zero)
3573725b52f3SLisandro Dalcin .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3574725b52f3SLisandro Dalcin -  v - optional values in the matrix
3575725b52f3SLisandro Dalcin 
3576664954b6SBarry Smith    Level: advanced
3577725b52f3SLisandro Dalcin 
35783adadaf3SJed Brown    Notes:
35793adadaf3SJed Brown    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
35803adadaf3SJed Brown    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
35813adadaf3SJed Brown    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
35823adadaf3SJed Brown    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
35833adadaf3SJed Brown    block column and the second index is over columns within a block.
35843adadaf3SJed Brown 
3585664954b6SBarry Smith    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3586664954b6SBarry Smith 
3587725b52f3SLisandro Dalcin .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3588725b52f3SLisandro Dalcin @*/
35897087cfbeSBarry Smith PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3590725b52f3SLisandro Dalcin {
35914ac538c5SBarry Smith   PetscErrorCode ierr;
3592725b52f3SLisandro Dalcin 
3593725b52f3SLisandro Dalcin   PetscFunctionBegin;
35946ba663aaSJed Brown   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
35956ba663aaSJed Brown   PetscValidType(B,1);
35966ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B,bs,2);
35974ac538c5SBarry Smith   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3598725b52f3SLisandro Dalcin   PetscFunctionReturn(0);
3599725b52f3SLisandro Dalcin }
3600725b52f3SLisandro Dalcin 
3601c75a6043SHong Zhang /*@
3602dfb205c3SBarry Smith      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3603c75a6043SHong Zhang 
3604d083f849SBarry Smith      Collective
3605c75a6043SHong Zhang 
3606c75a6043SHong Zhang    Input Parameters:
3607c75a6043SHong Zhang +  comm - must be an MPI communicator of size 1
3608c75a6043SHong Zhang .  bs - size of block
3609c75a6043SHong Zhang .  m - number of rows
3610c75a6043SHong Zhang .  n - number of columns
3611483a2f95SBarry Smith .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3612c75a6043SHong Zhang .  j - column indices
3613c75a6043SHong Zhang -  a - matrix values
3614c75a6043SHong Zhang 
3615c75a6043SHong Zhang    Output Parameter:
3616c75a6043SHong Zhang .  mat - the matrix
3617c75a6043SHong Zhang 
3618dfb205c3SBarry Smith    Level: advanced
3619c75a6043SHong Zhang 
3620c75a6043SHong Zhang    Notes:
3621c75a6043SHong Zhang        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3622c75a6043SHong Zhang     once the matrix is destroyed
3623c75a6043SHong Zhang 
3624c75a6043SHong Zhang        You cannot set new nonzero locations into this matrix, that will generate an error.
3625c75a6043SHong Zhang 
3626c75a6043SHong Zhang        The i and j indices are 0 based
3627c75a6043SHong Zhang 
3628dfb205c3SBarry Smith        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3629dfb205c3SBarry Smith 
36303adadaf3SJed Brown       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
36313adadaf3SJed Brown       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
36323adadaf3SJed Brown       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
36333adadaf3SJed Brown       with column-major ordering within blocks.
3634dfb205c3SBarry Smith 
363569b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3636c75a6043SHong Zhang 
3637c75a6043SHong Zhang @*/
3638c3c607ccSBarry Smith PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3639c75a6043SHong Zhang {
3640c75a6043SHong Zhang   PetscErrorCode ierr;
3641c75a6043SHong Zhang   PetscInt       ii;
3642c75a6043SHong Zhang   Mat_SeqBAIJ    *baij;
3643c75a6043SHong Zhang 
3644c75a6043SHong Zhang   PetscFunctionBegin;
3645e32f2f54SBarry Smith   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
364641096f02SStefano Zampini   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3647c75a6043SHong Zhang 
3648c75a6043SHong Zhang   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3649c75a6043SHong Zhang   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3650c75a6043SHong Zhang   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3651f4259b30SLisandro Dalcin   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
3652c75a6043SHong Zhang   baij = (Mat_SeqBAIJ*)(*mat)->data;
3653dcca6d9dSJed Brown   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
36543bb1ff40SBarry Smith   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3655c75a6043SHong Zhang 
3656c75a6043SHong Zhang   baij->i = i;
3657c75a6043SHong Zhang   baij->j = j;
3658c75a6043SHong Zhang   baij->a = a;
365926fbe8dcSKarl Rupp 
3660c75a6043SHong Zhang   baij->singlemalloc = PETSC_FALSE;
3661c75a6043SHong Zhang   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3662e6b907acSBarry Smith   baij->free_a       = PETSC_FALSE;
3663e6b907acSBarry Smith   baij->free_ij      = PETSC_FALSE;
3664c75a6043SHong Zhang 
3665c75a6043SHong Zhang   for (ii=0; ii<m; ii++) {
3666c75a6043SHong Zhang     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3667cf9c20a2SJed Brown     if (PetscUnlikelyDebug(i[ii+1] - i[ii] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3668c75a6043SHong Zhang   }
366976bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {
3670c75a6043SHong Zhang     for (ii=0; ii<baij->i[m]; ii++) {
3671e32f2f54SBarry Smith       if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3672e32f2f54SBarry Smith       if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3673c75a6043SHong Zhang     }
367476bd3646SJed Brown   }
3675c75a6043SHong Zhang 
3676c75a6043SHong Zhang   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3677c75a6043SHong Zhang   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3678c75a6043SHong Zhang   PetscFunctionReturn(0);
3679c75a6043SHong Zhang }
3680bdf6f3fcSHong Zhang 
3681bdf6f3fcSHong Zhang PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3682bdf6f3fcSHong Zhang {
3683bdf6f3fcSHong Zhang   PetscErrorCode ierr;
36848761c3d6SHong Zhang   PetscMPIInt    size;
3685bdf6f3fcSHong Zhang 
3686bdf6f3fcSHong Zhang   PetscFunctionBegin;
3687ffc4695bSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
36888761c3d6SHong Zhang   if (size == 1 && scall == MAT_REUSE_MATRIX) {
36898761c3d6SHong Zhang     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
36908761c3d6SHong Zhang   } else {
3691bdf6f3fcSHong Zhang     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
36928761c3d6SHong Zhang   }
3693bdf6f3fcSHong Zhang   PetscFunctionReturn(0);
3694bdf6f3fcSHong Zhang }
3695