xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 1e1ea65d8de51fde77ce8a787efbef25e407badc)
1be1d678aSKris Buschelman 
22593348eSBarry Smith /*
3b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
42593348eSBarry Smith   matrix storage format.
52593348eSBarry Smith */
6c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7c6db04a5SJed Brown #include <petscblaslapack.h>
8af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h>
9af0996ceSBarry Smith #include <petsc/private/kernels/blockmatmult.h>
1043516a2dSKris Buschelman 
117ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
127ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
137ea3e4caSstefano_zampini #endif
147ea3e4caSstefano_zampini 
15b5b72c8aSIrina Sokolova #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16fd9d3c67SJed Brown PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17b5b72c8aSIrina Sokolova #endif
18c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19b5b72c8aSIrina Sokolova 
20713ccfa9SJed Brown PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
21b01c7715SBarry Smith {
22b01c7715SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
236849ba73SBarry Smith   PetscErrorCode ierr;
24de80f912SBarry Smith   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
257f0c90edSBarry Smith   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
2662bba022SBarry Smith   PetscReal      shift = 0.0;
271a9391e3SHong Zhang   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
28b01c7715SBarry Smith 
29b01c7715SBarry Smith   PetscFunctionBegin;
30a455e926SHong Zhang   allowzeropivot = PetscNot(A->erroriffailure);
31a455e926SHong Zhang 
329797317bSBarry Smith   if (a->idiagvalid) {
339797317bSBarry Smith     if (values) *values = a->idiag;
349797317bSBarry Smith     PetscFunctionReturn(0);
359797317bSBarry Smith   }
36b01c7715SBarry Smith   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
37b01c7715SBarry Smith   diag_offset = a->diag;
38b01c7715SBarry Smith   if (!a->idiag) {
397f0c90edSBarry Smith     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
407f0c90edSBarry Smith     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
41b01c7715SBarry Smith   }
42b01c7715SBarry Smith   diag  = a->idiag;
43bbead8a2SBarry Smith   if (values) *values = a->idiag;
44b01c7715SBarry Smith   /* factor and invert each block */
45521d7252SBarry Smith   switch (bs) {
46ab040260SJed Brown   case 1:
47ab040260SJed Brown     for (i=0; i<mbs; i++) {
48ab040260SJed Brown       odiag    = v + 1*diag_offset[i];
49ab040260SJed Brown       diag[0]  = odiag[0];
50ec1892c8SHong Zhang 
51ec1892c8SHong Zhang       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
52ec1892c8SHong Zhang         if (allowzeropivot) {
537b6c816cSBarry Smith           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
547b6c816cSBarry Smith           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
557b6c816cSBarry Smith           A->factorerror_zeropivot_row   = i;
56ec1892c8SHong Zhang           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
577b6c816cSBarry Smith         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
58ec1892c8SHong Zhang       }
59ec1892c8SHong Zhang 
60d4a378daSJed Brown       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
61ab040260SJed Brown       diag    += 1;
62ab040260SJed Brown     }
63ab040260SJed Brown     break;
64b01c7715SBarry Smith   case 2:
65b01c7715SBarry Smith     for (i=0; i<mbs; i++) {
66b01c7715SBarry Smith       odiag    = v + 4*diag_offset[i];
67b01c7715SBarry Smith       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68a455e926SHong Zhang       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
697b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
70b01c7715SBarry Smith       diag    += 4;
71b01c7715SBarry Smith     }
72b01c7715SBarry Smith     break;
73b01c7715SBarry Smith   case 3:
74b01c7715SBarry Smith     for (i=0; i<mbs; i++) {
75b01c7715SBarry Smith       odiag    = v + 9*diag_offset[i];
76b01c7715SBarry Smith       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
77b01c7715SBarry Smith       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
78b01c7715SBarry Smith       diag[8]  = odiag[8];
79a455e926SHong Zhang       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
807b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
81b01c7715SBarry Smith       diag    += 9;
82b01c7715SBarry Smith     }
83b01c7715SBarry Smith     break;
84b01c7715SBarry Smith   case 4:
85b01c7715SBarry Smith     for (i=0; i<mbs; i++) {
86b01c7715SBarry Smith       odiag  = v + 16*diag_offset[i];
87580bdb30SBarry Smith       ierr   = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr);
88a455e926SHong Zhang       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
897b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
90b01c7715SBarry Smith       diag  += 16;
91b01c7715SBarry Smith     }
92b01c7715SBarry Smith     break;
93b01c7715SBarry Smith   case 5:
94b01c7715SBarry Smith     for (i=0; i<mbs; i++) {
95b01c7715SBarry Smith       odiag  = v + 25*diag_offset[i];
96580bdb30SBarry Smith       ierr   = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr);
97a455e926SHong Zhang       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
987b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
99b01c7715SBarry Smith       diag  += 25;
100b01c7715SBarry Smith     }
101b01c7715SBarry Smith     break;
102d49b2adcSBarry Smith   case 6:
103d49b2adcSBarry Smith     for (i=0; i<mbs; i++) {
104d49b2adcSBarry Smith       odiag  = v + 36*diag_offset[i];
105580bdb30SBarry Smith       ierr   = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr);
106a455e926SHong Zhang       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
1077b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
108d49b2adcSBarry Smith       diag  += 36;
109d49b2adcSBarry Smith     }
110d49b2adcSBarry Smith     break;
111de80f912SBarry Smith   case 7:
112de80f912SBarry Smith     for (i=0; i<mbs; i++) {
113de80f912SBarry Smith       odiag  = v + 49*diag_offset[i];
114580bdb30SBarry Smith       ierr   = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr);
115a455e926SHong Zhang       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
1167b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
117de80f912SBarry Smith       diag  += 49;
118de80f912SBarry Smith     }
119de80f912SBarry Smith     break;
120b01c7715SBarry Smith   default:
121dcca6d9dSJed Brown     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
122de80f912SBarry Smith     for (i=0; i<mbs; i++) {
123de80f912SBarry Smith       odiag  = v + bs2*diag_offset[i];
124580bdb30SBarry Smith       ierr   = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr);
1255f8bbccaSHong Zhang       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
1267b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
127de80f912SBarry Smith       diag  += bs2;
128de80f912SBarry Smith     }
129de80f912SBarry Smith     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
130b01c7715SBarry Smith   }
131b01c7715SBarry Smith   a->idiagvalid = PETSC_TRUE;
132b01c7715SBarry Smith   PetscFunctionReturn(0);
133b01c7715SBarry Smith }
134b01c7715SBarry Smith 
135e48d15efSToby Isaac PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1366d3beeddSMatthew Knepley {
1376d3beeddSMatthew Knepley   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
138e48d15efSToby Isaac   PetscScalar       *x,*work,*w,*workt,*t;
139e48d15efSToby Isaac   const MatScalar   *v,*aa = a->a, *idiag;
140e48d15efSToby Isaac   const PetscScalar *b,*xb;
1415455b99fSToby Isaac   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
1426d3beeddSMatthew Knepley   PetscErrorCode    ierr;
143e48d15efSToby Isaac   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
144c1ac3661SBarry Smith   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
145b01c7715SBarry Smith 
146b01c7715SBarry Smith   PetscFunctionBegin;
147b01c7715SBarry Smith   its = its*lits;
148e32f2f54SBarry Smith   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
149e32f2f54SBarry Smith   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
15052c4f950SSatish Balay   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
15152c4f950SSatish Balay   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor");
15252c4f950SSatish Balay   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
153b01c7715SBarry Smith 
1540298fd71SBarry Smith   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
155b01c7715SBarry Smith 
156b2ec919aSToby Isaac   if (!m) PetscFunctionReturn(0);
157b01c7715SBarry Smith   diag  = a->diag;
158b01c7715SBarry Smith   idiag = a->idiag;
159de80f912SBarry Smith   k    = PetscMax(A->rmap->n,A->cmap->n);
160e48d15efSToby Isaac   if (!a->mult_work) {
161f361c04dSBarry Smith     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
162de80f912SBarry Smith   }
1633475c22fSBarry Smith   if (!a->sor_workt) {
164f361c04dSBarry Smith     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
165de80f912SBarry Smith   }
166de80f912SBarry Smith   if (!a->sor_work) {
167785e854fSJed Brown     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
168de80f912SBarry Smith   }
1693475c22fSBarry Smith   work = a->mult_work;
1703475c22fSBarry Smith   t    = a->sor_workt;
171de80f912SBarry Smith   w    = a->sor_work;
172de80f912SBarry Smith 
173de80f912SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
174de80f912SBarry Smith   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
175de80f912SBarry Smith 
176de80f912SBarry Smith   if (flag & SOR_ZERO_INITIAL_GUESS) {
177de80f912SBarry Smith     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
178e48d15efSToby Isaac       switch (bs) {
179e48d15efSToby Isaac       case 1:
180e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
181e48d15efSToby Isaac         t[0] = b[0];
182e48d15efSToby Isaac         i2     = 1;
183e48d15efSToby Isaac         idiag += 1;
184e48d15efSToby Isaac         for (i=1; i<m; i++) {
185e48d15efSToby Isaac           v  = aa + ai[i];
186e48d15efSToby Isaac           vi = aj + ai[i];
187e48d15efSToby Isaac           nz = diag[i] - ai[i];
188e48d15efSToby Isaac           s[0] = b[i2];
189e48d15efSToby Isaac           for (j=0; j<nz; j++) {
190e48d15efSToby Isaac             xw[0] = x[vi[j]];
191e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
192e48d15efSToby Isaac           }
193e48d15efSToby Isaac           t[i2] = s[0];
194e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
195e48d15efSToby Isaac           x[i2]  = xw[0];
196e48d15efSToby Isaac           idiag += 1;
197e48d15efSToby Isaac           i2    += 1;
198e48d15efSToby Isaac         }
199e48d15efSToby Isaac         break;
200e48d15efSToby Isaac       case 2:
201e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
202e48d15efSToby Isaac         t[0] = b[0]; t[1] = b[1];
203e48d15efSToby Isaac         i2     = 2;
204e48d15efSToby Isaac         idiag += 4;
205e48d15efSToby Isaac         for (i=1; i<m; i++) {
206e48d15efSToby Isaac           v  = aa + 4*ai[i];
207e48d15efSToby Isaac           vi = aj + ai[i];
208e48d15efSToby Isaac           nz = diag[i] - ai[i];
209e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1];
210e48d15efSToby Isaac           for (j=0; j<nz; j++) {
211e48d15efSToby Isaac             idx = 2*vi[j];
212e48d15efSToby Isaac             it  = 4*j;
213e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx];
214e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
215e48d15efSToby Isaac           }
216e48d15efSToby Isaac           t[i2] = s[0]; t[i2+1] = s[1];
217e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
218e48d15efSToby Isaac           x[i2]   = xw[0]; x[i2+1] = xw[1];
219e48d15efSToby Isaac           idiag  += 4;
220e48d15efSToby Isaac           i2     += 2;
221e48d15efSToby Isaac         }
222e48d15efSToby Isaac         break;
223e48d15efSToby Isaac       case 3:
224e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
225e48d15efSToby Isaac         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
226e48d15efSToby Isaac         i2     = 3;
227e48d15efSToby Isaac         idiag += 9;
228e48d15efSToby Isaac         for (i=1; i<m; i++) {
229e48d15efSToby Isaac           v  = aa + 9*ai[i];
230e48d15efSToby Isaac           vi = aj + ai[i];
231e48d15efSToby Isaac           nz = diag[i] - ai[i];
232e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
233e48d15efSToby Isaac           while (nz--) {
234e48d15efSToby Isaac             idx = 3*(*vi++);
235e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
236e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
237e48d15efSToby Isaac             v  += 9;
238e48d15efSToby Isaac           }
239e48d15efSToby Isaac           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
240e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
241e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
242e48d15efSToby Isaac           idiag  += 9;
243e48d15efSToby Isaac           i2     += 3;
244e48d15efSToby Isaac         }
245e48d15efSToby Isaac         break;
246e48d15efSToby Isaac       case 4:
247e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
248e48d15efSToby Isaac         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
249e48d15efSToby Isaac         i2     = 4;
250e48d15efSToby Isaac         idiag += 16;
251e48d15efSToby Isaac         for (i=1; i<m; i++) {
252e48d15efSToby Isaac           v  = aa + 16*ai[i];
253e48d15efSToby Isaac           vi = aj + ai[i];
254e48d15efSToby Isaac           nz = diag[i] - ai[i];
255e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
256e48d15efSToby Isaac           while (nz--) {
257e48d15efSToby Isaac             idx = 4*(*vi++);
258e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
259e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
260e48d15efSToby Isaac             v  += 16;
261e48d15efSToby Isaac           }
262e48d15efSToby Isaac           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
263e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
264e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
265e48d15efSToby Isaac           idiag  += 16;
266e48d15efSToby Isaac           i2     += 4;
267e48d15efSToby Isaac         }
268e48d15efSToby Isaac         break;
269e48d15efSToby Isaac       case 5:
270e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
271e48d15efSToby Isaac         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
272e48d15efSToby Isaac         i2     = 5;
273e48d15efSToby Isaac         idiag += 25;
274e48d15efSToby Isaac         for (i=1; i<m; i++) {
275e48d15efSToby Isaac           v  = aa + 25*ai[i];
276e48d15efSToby Isaac           vi = aj + ai[i];
277e48d15efSToby Isaac           nz = diag[i] - ai[i];
278e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
279e48d15efSToby Isaac           while (nz--) {
280e48d15efSToby Isaac             idx = 5*(*vi++);
281e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
282e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
283e48d15efSToby Isaac             v  += 25;
284e48d15efSToby Isaac           }
285e48d15efSToby Isaac           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
286e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
287e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
288e48d15efSToby Isaac           idiag  += 25;
289e48d15efSToby Isaac           i2     += 5;
290e48d15efSToby Isaac         }
291e48d15efSToby Isaac         break;
292e48d15efSToby Isaac       case 6:
293e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
294e48d15efSToby Isaac         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
295e48d15efSToby Isaac         i2     = 6;
296e48d15efSToby Isaac         idiag += 36;
297e48d15efSToby Isaac         for (i=1; i<m; i++) {
298e48d15efSToby Isaac           v  = aa + 36*ai[i];
299e48d15efSToby Isaac           vi = aj + ai[i];
300e48d15efSToby Isaac           nz = diag[i] - ai[i];
301e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
302e48d15efSToby Isaac           while (nz--) {
303e48d15efSToby Isaac             idx = 6*(*vi++);
304e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
305e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
306e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
307e48d15efSToby Isaac             v  += 36;
308e48d15efSToby Isaac           }
309e48d15efSToby Isaac           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
310e48d15efSToby Isaac           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
311e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
312e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
313e48d15efSToby Isaac           idiag  += 36;
314e48d15efSToby Isaac           i2     += 6;
315e48d15efSToby Isaac         }
316e48d15efSToby Isaac         break;
317e48d15efSToby Isaac       case 7:
318e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
319e48d15efSToby Isaac         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
320e48d15efSToby Isaac         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
321e48d15efSToby Isaac         i2     = 7;
322e48d15efSToby Isaac         idiag += 49;
323e48d15efSToby Isaac         for (i=1; i<m; i++) {
324e48d15efSToby Isaac           v  = aa + 49*ai[i];
325e48d15efSToby Isaac           vi = aj + ai[i];
326e48d15efSToby Isaac           nz = diag[i] - ai[i];
327e48d15efSToby Isaac           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
328e48d15efSToby Isaac           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
329e48d15efSToby Isaac           while (nz--) {
330e48d15efSToby Isaac             idx = 7*(*vi++);
331e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
332e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
333e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
334e48d15efSToby Isaac             v  += 49;
335e48d15efSToby Isaac           }
336e48d15efSToby Isaac           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
337e48d15efSToby Isaac           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
338e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
339e48d15efSToby Isaac           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
340e48d15efSToby Isaac           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
341e48d15efSToby Isaac           idiag  += 49;
342e48d15efSToby Isaac           i2     += 7;
343e48d15efSToby Isaac         }
344e48d15efSToby Isaac         break;
345e48d15efSToby Isaac       default:
34696b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
347580bdb30SBarry Smith         ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr);
348de80f912SBarry Smith         i2     = bs;
349de80f912SBarry Smith         idiag += bs2;
350de80f912SBarry Smith         for (i=1; i<m; i++) {
351de80f912SBarry Smith           v  = aa + bs2*ai[i];
352de80f912SBarry Smith           vi = aj + ai[i];
353de80f912SBarry Smith           nz = diag[i] - ai[i];
354de80f912SBarry Smith 
355580bdb30SBarry Smith           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
356de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
357de80f912SBarry Smith           workt = work;
358de80f912SBarry Smith           for (j=0; j<nz; j++) {
359580bdb30SBarry Smith             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
360de80f912SBarry Smith             workt += bs;
361de80f912SBarry Smith           }
36296b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
363580bdb30SBarry Smith           ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr);
36496b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
365de80f912SBarry Smith 
366de80f912SBarry Smith           idiag += bs2;
367de80f912SBarry Smith           i2    += bs;
368de80f912SBarry Smith         }
369e48d15efSToby Isaac         break;
370e48d15efSToby Isaac       }
371de80f912SBarry Smith       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
372e48d15efSToby Isaac       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
373e48d15efSToby Isaac       xb = t;
374de80f912SBarry Smith     }
375e48d15efSToby Isaac     else xb = b;
376de80f912SBarry Smith     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
377e48d15efSToby Isaac       idiag = a->idiag+bs2*(a->mbs-1);
378e48d15efSToby Isaac       i2 = bs * (m-1);
379e48d15efSToby Isaac       switch (bs) {
380e48d15efSToby Isaac       case 1:
381e48d15efSToby Isaac         s[0]  = xb[i2];
382e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
383e48d15efSToby Isaac         x[i2] = xw[0];
384e48d15efSToby Isaac         i2   -= 1;
385e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
386e48d15efSToby Isaac           v  = aa + (diag[i]+1);
387e48d15efSToby Isaac           vi = aj + diag[i] + 1;
388e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
389e48d15efSToby Isaac           s[0] = xb[i2];
390e48d15efSToby Isaac           for (j=0; j<nz; j++) {
391e48d15efSToby Isaac             xw[0] = x[vi[j]];
392e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
393e48d15efSToby Isaac           }
394e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
395e48d15efSToby Isaac           x[i2]  = xw[0];
396e48d15efSToby Isaac           idiag -= 1;
397e48d15efSToby Isaac           i2    -= 1;
398e48d15efSToby Isaac         }
399e48d15efSToby Isaac         break;
400e48d15efSToby Isaac       case 2:
401e48d15efSToby Isaac         s[0]  = xb[i2]; s[1] = xb[i2+1];
402e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
403e48d15efSToby Isaac         x[i2] = xw[0]; x[i2+1] = xw[1];
404e48d15efSToby Isaac         i2    -= 2;
405e48d15efSToby Isaac         idiag -= 4;
406e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
407e48d15efSToby Isaac           v  = aa + 4*(diag[i] + 1);
408e48d15efSToby Isaac           vi = aj + diag[i] + 1;
409e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
410e48d15efSToby Isaac           s[0] = xb[i2]; s[1] = xb[i2+1];
411e48d15efSToby Isaac           for (j=0; j<nz; j++) {
412e48d15efSToby Isaac             idx = 2*vi[j];
413e48d15efSToby Isaac             it  = 4*j;
414e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx];
415e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
416e48d15efSToby Isaac           }
417e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
418e48d15efSToby Isaac           x[i2]   = xw[0]; x[i2+1] = xw[1];
419e48d15efSToby Isaac           idiag  -= 4;
420e48d15efSToby Isaac           i2     -= 2;
421e48d15efSToby Isaac         }
422e48d15efSToby Isaac         break;
423e48d15efSToby Isaac       case 3:
424e48d15efSToby Isaac         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
425e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
426e48d15efSToby Isaac         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
427e48d15efSToby Isaac         i2    -= 3;
428e48d15efSToby Isaac         idiag -= 9;
429e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
430e48d15efSToby Isaac           v  = aa + 9*(diag[i]+1);
431e48d15efSToby Isaac           vi = aj + diag[i] + 1;
432e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
433e48d15efSToby Isaac           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
434e48d15efSToby Isaac           while (nz--) {
435e48d15efSToby Isaac             idx = 3*(*vi++);
436e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
437e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
438e48d15efSToby Isaac             v  += 9;
439e48d15efSToby Isaac           }
440e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
441e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
442e48d15efSToby Isaac           idiag  -= 9;
443e48d15efSToby Isaac           i2     -= 3;
444e48d15efSToby Isaac         }
445e48d15efSToby Isaac         break;
446e48d15efSToby Isaac       case 4:
447e48d15efSToby Isaac         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
448e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
449e48d15efSToby Isaac         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
450e48d15efSToby Isaac         i2    -= 4;
451e48d15efSToby Isaac         idiag -= 16;
452e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
453e48d15efSToby Isaac           v  = aa + 16*(diag[i]+1);
454e48d15efSToby Isaac           vi = aj + diag[i] + 1;
455e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
456e48d15efSToby Isaac           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
457e48d15efSToby Isaac           while (nz--) {
458e48d15efSToby Isaac             idx = 4*(*vi++);
459e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
460e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
461e48d15efSToby Isaac             v  += 16;
462e48d15efSToby Isaac           }
463e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
464e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
465e48d15efSToby Isaac           idiag  -= 16;
466e48d15efSToby Isaac           i2     -= 4;
467e48d15efSToby Isaac         }
468e48d15efSToby Isaac         break;
469e48d15efSToby Isaac       case 5:
470e48d15efSToby Isaac         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
471e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
472e48d15efSToby Isaac         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
473e48d15efSToby Isaac         i2    -= 5;
474e48d15efSToby Isaac         idiag -= 25;
475e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
476e48d15efSToby Isaac           v  = aa + 25*(diag[i]+1);
477e48d15efSToby Isaac           vi = aj + diag[i] + 1;
478e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
479e48d15efSToby Isaac           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
480e48d15efSToby Isaac           while (nz--) {
481e48d15efSToby Isaac             idx = 5*(*vi++);
482e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
483e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
484e48d15efSToby Isaac             v  += 25;
485e48d15efSToby Isaac           }
486e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
487e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
488e48d15efSToby Isaac           idiag  -= 25;
489e48d15efSToby Isaac           i2     -= 5;
490e48d15efSToby Isaac         }
491e48d15efSToby Isaac         break;
492e48d15efSToby Isaac       case 6:
493e48d15efSToby Isaac         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
494e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
495e48d15efSToby Isaac         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
496e48d15efSToby Isaac         i2    -= 6;
497e48d15efSToby Isaac         idiag -= 36;
498e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
499e48d15efSToby Isaac           v  = aa + 36*(diag[i]+1);
500e48d15efSToby Isaac           vi = aj + diag[i] + 1;
501e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
502e48d15efSToby Isaac           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
503e48d15efSToby Isaac           while (nz--) {
504e48d15efSToby Isaac             idx = 6*(*vi++);
505e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
506e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
507e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
508e48d15efSToby Isaac             v  += 36;
509e48d15efSToby Isaac           }
510e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
511e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
512e48d15efSToby Isaac           idiag  -= 36;
513e48d15efSToby Isaac           i2     -= 6;
514e48d15efSToby Isaac         }
515e48d15efSToby Isaac         break;
516e48d15efSToby Isaac       case 7:
517e48d15efSToby Isaac         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
518e48d15efSToby Isaac         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
519e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
520e48d15efSToby Isaac         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
521e48d15efSToby Isaac         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
522e48d15efSToby Isaac         i2    -= 7;
523e48d15efSToby Isaac         idiag -= 49;
524e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
525e48d15efSToby Isaac           v  = aa + 49*(diag[i]+1);
526e48d15efSToby Isaac           vi = aj + diag[i] + 1;
527e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
528e48d15efSToby Isaac           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
529e48d15efSToby Isaac           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
530e48d15efSToby Isaac           while (nz--) {
531e48d15efSToby Isaac             idx = 7*(*vi++);
532e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
533e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
534e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
535e48d15efSToby Isaac             v  += 49;
536e48d15efSToby Isaac           }
537e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
538e48d15efSToby Isaac           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
539e48d15efSToby Isaac           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
540e48d15efSToby Isaac           idiag  -= 49;
541e48d15efSToby Isaac           i2     -= 7;
542e48d15efSToby Isaac         }
543e48d15efSToby Isaac         break;
544e48d15efSToby Isaac       default:
545580bdb30SBarry Smith         ierr  = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
54696b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
547de80f912SBarry Smith         i2    -= bs;
548e48d15efSToby Isaac         idiag -= bs2;
549de80f912SBarry Smith         for (i=m-2; i>=0; i--) {
550de80f912SBarry Smith           v  = aa + bs2*(diag[i]+1);
551de80f912SBarry Smith           vi = aj + diag[i] + 1;
552de80f912SBarry Smith           nz = ai[i+1] - diag[i] - 1;
553de80f912SBarry Smith 
554580bdb30SBarry Smith           ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
555de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
556de80f912SBarry Smith           workt = work;
557de80f912SBarry Smith           for (j=0; j<nz; j++) {
558580bdb30SBarry Smith             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
559de80f912SBarry Smith             workt += bs;
560de80f912SBarry Smith           }
56196b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
56296b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
563e48d15efSToby Isaac 
564de80f912SBarry Smith           idiag -= bs2;
565de80f912SBarry Smith           i2    -= bs;
566de80f912SBarry Smith         }
567e48d15efSToby Isaac         break;
568e48d15efSToby Isaac       }
569de80f912SBarry Smith       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
570de80f912SBarry Smith     }
571e48d15efSToby Isaac     its--;
572e48d15efSToby Isaac   }
573e48d15efSToby Isaac   while (its--) {
574e48d15efSToby Isaac     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
575e48d15efSToby Isaac       idiag = a->idiag;
576e48d15efSToby Isaac       i2 = 0;
577e48d15efSToby Isaac       switch (bs) {
578e48d15efSToby Isaac       case 1:
579e48d15efSToby Isaac         for (i=0; i<m; i++) {
580e48d15efSToby Isaac           v  = aa + ai[i];
581e48d15efSToby Isaac           vi = aj + ai[i];
582e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
583e48d15efSToby Isaac           s[0] = b[i2];
584e48d15efSToby Isaac           for (j=0; j<nz; j++) {
585e48d15efSToby Isaac             xw[0] = x[vi[j]];
586e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
587e48d15efSToby Isaac           }
588e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
589e48d15efSToby Isaac           x[i2] += xw[0];
590e48d15efSToby Isaac           idiag += 1;
591e48d15efSToby Isaac           i2    += 1;
592e48d15efSToby Isaac         }
593e48d15efSToby Isaac         break;
594e48d15efSToby Isaac       case 2:
595e48d15efSToby Isaac         for (i=0; i<m; i++) {
596e48d15efSToby Isaac           v  = aa + 4*ai[i];
597e48d15efSToby Isaac           vi = aj + ai[i];
598e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
599e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1];
600e48d15efSToby Isaac           for (j=0; j<nz; j++) {
601e48d15efSToby Isaac             idx = 2*vi[j];
602e48d15efSToby Isaac             it  = 4*j;
603e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx];
604e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
605e48d15efSToby Isaac           }
606e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
607e48d15efSToby Isaac           x[i2]  += xw[0]; x[i2+1] += xw[1];
608e48d15efSToby Isaac           idiag  += 4;
609e48d15efSToby Isaac           i2     += 2;
610e48d15efSToby Isaac         }
611e48d15efSToby Isaac         break;
612e48d15efSToby Isaac       case 3:
613e48d15efSToby Isaac         for (i=0; i<m; i++) {
614e48d15efSToby Isaac           v  = aa + 9*ai[i];
615e48d15efSToby Isaac           vi = aj + ai[i];
616e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
617e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
618e48d15efSToby Isaac           while (nz--) {
619e48d15efSToby Isaac             idx = 3*(*vi++);
620e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
621e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
622e48d15efSToby Isaac             v  += 9;
623e48d15efSToby Isaac           }
624e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
625e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
626e48d15efSToby Isaac           idiag  += 9;
627e48d15efSToby Isaac           i2     += 3;
628e48d15efSToby Isaac         }
629e48d15efSToby Isaac         break;
630e48d15efSToby Isaac       case 4:
631e48d15efSToby Isaac         for (i=0; i<m; i++) {
632e48d15efSToby Isaac           v  = aa + 16*ai[i];
633e48d15efSToby Isaac           vi = aj + ai[i];
634e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
635e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
636e48d15efSToby Isaac           while (nz--) {
637e48d15efSToby Isaac             idx = 4*(*vi++);
638e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
639e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
640e48d15efSToby Isaac             v  += 16;
641e48d15efSToby Isaac           }
642e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
643e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
644e48d15efSToby Isaac           idiag  += 16;
645e48d15efSToby Isaac           i2     += 4;
646e48d15efSToby Isaac         }
647e48d15efSToby Isaac         break;
648e48d15efSToby Isaac       case 5:
649e48d15efSToby Isaac         for (i=0; i<m; i++) {
650e48d15efSToby Isaac           v  = aa + 25*ai[i];
651e48d15efSToby Isaac           vi = aj + ai[i];
652e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
653e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
654e48d15efSToby Isaac           while (nz--) {
655e48d15efSToby Isaac             idx = 5*(*vi++);
656e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
657e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
658e48d15efSToby Isaac             v  += 25;
659e48d15efSToby Isaac           }
660e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
661e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
662e48d15efSToby Isaac           idiag  += 25;
663e48d15efSToby Isaac           i2     += 5;
664e48d15efSToby Isaac         }
665e48d15efSToby Isaac         break;
666e48d15efSToby Isaac       case 6:
667e48d15efSToby Isaac         for (i=0; i<m; i++) {
668e48d15efSToby Isaac           v  = aa + 36*ai[i];
669e48d15efSToby Isaac           vi = aj + ai[i];
670e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
671e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
672e48d15efSToby Isaac           while (nz--) {
673e48d15efSToby Isaac             idx = 6*(*vi++);
674e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
675e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
676e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
677e48d15efSToby Isaac             v  += 36;
678e48d15efSToby Isaac           }
679e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
680e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
681e48d15efSToby Isaac           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
682e48d15efSToby Isaac           idiag  += 36;
683e48d15efSToby Isaac           i2     += 6;
684e48d15efSToby Isaac         }
685e48d15efSToby Isaac         break;
686e48d15efSToby Isaac       case 7:
687e48d15efSToby Isaac         for (i=0; i<m; i++) {
688e48d15efSToby Isaac           v  = aa + 49*ai[i];
689e48d15efSToby Isaac           vi = aj + ai[i];
690e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
691e48d15efSToby Isaac           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
692e48d15efSToby Isaac           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
693e48d15efSToby Isaac           while (nz--) {
694e48d15efSToby Isaac             idx = 7*(*vi++);
695e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
696e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
697e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
698e48d15efSToby Isaac             v  += 49;
699e48d15efSToby Isaac           }
700e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
701e48d15efSToby Isaac           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
702e48d15efSToby Isaac           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
703e48d15efSToby Isaac           idiag  += 49;
704e48d15efSToby Isaac           i2     += 7;
705e48d15efSToby Isaac         }
706e48d15efSToby Isaac         break;
707e48d15efSToby Isaac       default:
708e48d15efSToby Isaac         for (i=0; i<m; i++) {
709e48d15efSToby Isaac           v  = aa + bs2*ai[i];
710e48d15efSToby Isaac           vi = aj + ai[i];
711e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
712e48d15efSToby Isaac 
713580bdb30SBarry Smith           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
714e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
715e48d15efSToby Isaac           workt = work;
716e48d15efSToby Isaac           for (j=0; j<nz; j++) {
717580bdb30SBarry Smith             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
718e48d15efSToby Isaac             workt += bs;
719e48d15efSToby Isaac           }
720e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
721e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
722e48d15efSToby Isaac 
723e48d15efSToby Isaac           idiag += bs2;
724e48d15efSToby Isaac           i2    += bs;
725e48d15efSToby Isaac         }
726e48d15efSToby Isaac         break;
727e48d15efSToby Isaac       }
728e48d15efSToby Isaac       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
729e48d15efSToby Isaac     }
730e48d15efSToby Isaac     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
731e48d15efSToby Isaac       idiag = a->idiag+bs2*(a->mbs-1);
732e48d15efSToby Isaac       i2 = bs * (m-1);
733e48d15efSToby Isaac       switch (bs) {
734e48d15efSToby Isaac       case 1:
735e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
736e48d15efSToby Isaac           v  = aa + ai[i];
737e48d15efSToby Isaac           vi = aj + ai[i];
738e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
739e48d15efSToby Isaac           s[0] = b[i2];
740e48d15efSToby Isaac           for (j=0; j<nz; j++) {
741e48d15efSToby Isaac             xw[0] = x[vi[j]];
742e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
743e48d15efSToby Isaac           }
744e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
745e48d15efSToby Isaac           x[i2] += xw[0];
746e48d15efSToby Isaac           idiag -= 1;
747e48d15efSToby Isaac           i2    -= 1;
748e48d15efSToby Isaac         }
749e48d15efSToby Isaac         break;
750e48d15efSToby Isaac       case 2:
751e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
752e48d15efSToby Isaac           v  = aa + 4*ai[i];
753e48d15efSToby Isaac           vi = aj + ai[i];
754e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
755e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1];
756e48d15efSToby Isaac           for (j=0; j<nz; j++) {
757e48d15efSToby Isaac             idx = 2*vi[j];
758e48d15efSToby Isaac             it  = 4*j;
759e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx];
760e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
761e48d15efSToby Isaac           }
762e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
763e48d15efSToby Isaac           x[i2]  += xw[0]; x[i2+1] += xw[1];
764e48d15efSToby Isaac           idiag  -= 4;
765e48d15efSToby Isaac           i2     -= 2;
766e48d15efSToby Isaac         }
767e48d15efSToby Isaac         break;
768e48d15efSToby Isaac       case 3:
769e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
770e48d15efSToby Isaac           v  = aa + 9*ai[i];
771e48d15efSToby Isaac           vi = aj + ai[i];
772e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
773e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
774e48d15efSToby Isaac           while (nz--) {
775e48d15efSToby Isaac             idx = 3*(*vi++);
776e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
777e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
778e48d15efSToby Isaac             v  += 9;
779e48d15efSToby Isaac           }
780e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
781e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
782e48d15efSToby Isaac           idiag  -= 9;
783e48d15efSToby Isaac           i2     -= 3;
784e48d15efSToby Isaac         }
785e48d15efSToby Isaac         break;
786e48d15efSToby Isaac       case 4:
787e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
788e48d15efSToby Isaac           v  = aa + 16*ai[i];
789e48d15efSToby Isaac           vi = aj + ai[i];
790e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
791e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
792e48d15efSToby Isaac           while (nz--) {
793e48d15efSToby Isaac             idx = 4*(*vi++);
794e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
795e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
796e48d15efSToby Isaac             v  += 16;
797e48d15efSToby Isaac           }
798e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
799e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
800e48d15efSToby Isaac           idiag  -= 16;
801e48d15efSToby Isaac           i2     -= 4;
802e48d15efSToby Isaac         }
803e48d15efSToby Isaac         break;
804e48d15efSToby Isaac       case 5:
805e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
806e48d15efSToby Isaac           v  = aa + 25*ai[i];
807e48d15efSToby Isaac           vi = aj + ai[i];
808e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
809e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
810e48d15efSToby Isaac           while (nz--) {
811e48d15efSToby Isaac             idx = 5*(*vi++);
812e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
813e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
814e48d15efSToby Isaac             v  += 25;
815e48d15efSToby Isaac           }
816e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
817e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
818e48d15efSToby Isaac           idiag  -= 25;
819e48d15efSToby Isaac           i2     -= 5;
820e48d15efSToby Isaac         }
821e48d15efSToby Isaac         break;
822e48d15efSToby Isaac       case 6:
823e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
824e48d15efSToby Isaac           v  = aa + 36*ai[i];
825e48d15efSToby Isaac           vi = aj + ai[i];
826e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
827e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
828e48d15efSToby Isaac           while (nz--) {
829e48d15efSToby Isaac             idx = 6*(*vi++);
830e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
831e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
832e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
833e48d15efSToby Isaac             v  += 36;
834e48d15efSToby Isaac           }
835e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
836e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
837e48d15efSToby Isaac           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
838e48d15efSToby Isaac           idiag  -= 36;
839e48d15efSToby Isaac           i2     -= 6;
840e48d15efSToby Isaac         }
841e48d15efSToby Isaac         break;
842e48d15efSToby Isaac       case 7:
843e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
844e48d15efSToby Isaac           v  = aa + 49*ai[i];
845e48d15efSToby Isaac           vi = aj + ai[i];
846e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
847e48d15efSToby Isaac           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
848e48d15efSToby Isaac           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
849e48d15efSToby Isaac           while (nz--) {
850e48d15efSToby Isaac             idx = 7*(*vi++);
851e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
852e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
853e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
854e48d15efSToby Isaac             v  += 49;
855e48d15efSToby Isaac           }
856e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
857e48d15efSToby Isaac           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
858e48d15efSToby Isaac           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
859e48d15efSToby Isaac           idiag  -= 49;
860e48d15efSToby Isaac           i2     -= 7;
861e48d15efSToby Isaac         }
862e48d15efSToby Isaac         break;
863e48d15efSToby Isaac       default:
864e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
865e48d15efSToby Isaac           v  = aa + bs2*ai[i];
866e48d15efSToby Isaac           vi = aj + ai[i];
867e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
868e48d15efSToby Isaac 
869580bdb30SBarry Smith           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
870e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
871e48d15efSToby Isaac           workt = work;
872e48d15efSToby Isaac           for (j=0; j<nz; j++) {
873580bdb30SBarry Smith             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
874e48d15efSToby Isaac             workt += bs;
875e48d15efSToby Isaac           }
876e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
877e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
878e48d15efSToby Isaac 
879e48d15efSToby Isaac           idiag -= bs2;
880e48d15efSToby Isaac           i2    -= bs;
881e48d15efSToby Isaac         }
882e48d15efSToby Isaac         break;
883e48d15efSToby Isaac       }
884e48d15efSToby Isaac       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
885e48d15efSToby Isaac     }
886e48d15efSToby Isaac   }
887de80f912SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
888de80f912SBarry Smith   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
889de80f912SBarry Smith   PetscFunctionReturn(0);
890de80f912SBarry Smith }
891de80f912SBarry Smith 
892af674e45SBarry Smith /*
89381824310SBarry Smith     Special version for direct calls from Fortran (Used in PETSc-fun3d)
894af674e45SBarry Smith */
895af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
896af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
897af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
898af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4
899af674e45SBarry Smith #endif
900af674e45SBarry Smith 
9018cc058d9SJed Brown PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
902af674e45SBarry Smith {
903af674e45SBarry Smith   Mat               A  = *AA;
904af674e45SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
905c1ac3661SBarry Smith   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
906c1ac3661SBarry Smith   PetscInt          *ai    =a->i,*ailen=a->ilen;
90717ec6a02SBarry Smith   PetscInt          *aj    =a->j,stepval,lastcol = -1;
908f15d580aSBarry Smith   const PetscScalar *value = v;
9094bb09213Spetsc   MatScalar         *ap,*aa = a->a,*bap;
91070990e77SSatish Balay   PetscErrorCode    ierr;
911af674e45SBarry Smith 
912af674e45SBarry Smith   PetscFunctionBegin;
913ce94432eSBarry Smith   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
914af674e45SBarry Smith   stepval = (n-1)*4;
915af674e45SBarry Smith   for (k=0; k<m; k++) { /* loop over added rows */
916af674e45SBarry Smith     row  = im[k];
917af674e45SBarry Smith     rp   = aj + ai[row];
918af674e45SBarry Smith     ap   = aa + 16*ai[row];
919af674e45SBarry Smith     nrow = ailen[row];
920af674e45SBarry Smith     low  = 0;
92117ec6a02SBarry Smith     high = nrow;
922af674e45SBarry Smith     for (l=0; l<n; l++) { /* loop over added columns */
923af674e45SBarry Smith       col = in[l];
924db4deed7SKarl Rupp       if (col <= lastcol)  low = 0;
925db4deed7SKarl Rupp       else                high = nrow;
92617ec6a02SBarry Smith       lastcol = col;
9271e3347e8SBarry Smith       value   = v + k*(stepval+4 + l)*4;
928af674e45SBarry Smith       while (high-low > 7) {
929af674e45SBarry Smith         t = (low+high)/2;
930af674e45SBarry Smith         if (rp[t] > col) high = t;
931af674e45SBarry Smith         else             low  = t;
932af674e45SBarry Smith       }
933af674e45SBarry Smith       for (i=low; i<high; i++) {
934af674e45SBarry Smith         if (rp[i] > col) break;
935af674e45SBarry Smith         if (rp[i] == col) {
936af674e45SBarry Smith           bap = ap +  16*i;
937af674e45SBarry Smith           for (ii=0; ii<4; ii++,value+=stepval) {
938af674e45SBarry Smith             for (jj=ii; jj<16; jj+=4) {
939af674e45SBarry Smith               bap[jj] += *value++;
940af674e45SBarry Smith             }
941af674e45SBarry Smith           }
942af674e45SBarry Smith           goto noinsert2;
943af674e45SBarry Smith         }
944af674e45SBarry Smith       }
945af674e45SBarry Smith       N = nrow++ - 1;
94617ec6a02SBarry Smith       high++; /* added new column index thus must search to one higher than before */
947af674e45SBarry Smith       /* shift up all the later entries in this row */
948af674e45SBarry Smith       for (ii=N; ii>=i; ii--) {
949af674e45SBarry Smith         rp[ii+1] = rp[ii];
95070990e77SSatish Balay         ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr);
951af674e45SBarry Smith       }
952af674e45SBarry Smith       if (N >= i) {
95370990e77SSatish Balay         ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
954af674e45SBarry Smith       }
955af674e45SBarry Smith       rp[i] = col;
956af674e45SBarry Smith       bap   = ap +  16*i;
957af674e45SBarry Smith       for (ii=0; ii<4; ii++,value+=stepval) {
958af674e45SBarry Smith         for (jj=ii; jj<16; jj+=4) {
959af674e45SBarry Smith           bap[jj] = *value++;
960af674e45SBarry Smith         }
961af674e45SBarry Smith       }
962af674e45SBarry Smith       noinsert2:;
963af674e45SBarry Smith       low = i;
964af674e45SBarry Smith     }
965af674e45SBarry Smith     ailen[row] = nrow;
966af674e45SBarry Smith   }
967be1d678aSKris Buschelman   PetscFunctionReturnVoid();
968af674e45SBarry Smith }
969af674e45SBarry Smith 
970af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
971af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4
972af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
973af674e45SBarry Smith #define matsetvalues4_ matsetvalues4
974af674e45SBarry Smith #endif
975af674e45SBarry Smith 
9768cc058d9SJed Brown PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
977af674e45SBarry Smith {
978af674e45SBarry Smith   Mat         A  = *AA;
979af674e45SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
980580bdb30SBarry Smith   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
981c1ac3661SBarry Smith   PetscInt    *ai=a->i,*ailen=a->ilen;
982c1ac3661SBarry Smith   PetscInt    *aj=a->j,brow,bcol;
98317ec6a02SBarry Smith   PetscInt    ridx,cidx,lastcol = -1;
984af674e45SBarry Smith   MatScalar   *ap,value,*aa=a->a,*bap;
98570990e77SSatish Balay   PetscErrorCode ierr;
986af674e45SBarry Smith 
987af674e45SBarry Smith   PetscFunctionBegin;
988af674e45SBarry Smith   for (k=0; k<m; k++) { /* loop over added rows */
989af674e45SBarry Smith     row  = im[k]; brow = row/4;
990af674e45SBarry Smith     rp   = aj + ai[brow];
991af674e45SBarry Smith     ap   = aa + 16*ai[brow];
992af674e45SBarry Smith     nrow = ailen[brow];
993af674e45SBarry Smith     low  = 0;
99417ec6a02SBarry Smith     high = nrow;
995af674e45SBarry Smith     for (l=0; l<n; l++) { /* loop over added columns */
996af674e45SBarry Smith       col   = in[l]; bcol = col/4;
997af674e45SBarry Smith       ridx  = row % 4; cidx = col % 4;
998af674e45SBarry Smith       value = v[l + k*n];
999db4deed7SKarl Rupp       if (col <= lastcol)  low = 0;
1000db4deed7SKarl Rupp       else                high = nrow;
100117ec6a02SBarry Smith       lastcol = col;
1002af674e45SBarry Smith       while (high-low > 7) {
1003af674e45SBarry Smith         t = (low+high)/2;
1004af674e45SBarry Smith         if (rp[t] > bcol) high = t;
1005af674e45SBarry Smith         else              low  = t;
1006af674e45SBarry Smith       }
1007af674e45SBarry Smith       for (i=low; i<high; i++) {
1008af674e45SBarry Smith         if (rp[i] > bcol) break;
1009af674e45SBarry Smith         if (rp[i] == bcol) {
1010af674e45SBarry Smith           bap   = ap +  16*i + 4*cidx + ridx;
1011af674e45SBarry Smith           *bap += value;
1012af674e45SBarry Smith           goto noinsert1;
1013af674e45SBarry Smith         }
1014af674e45SBarry Smith       }
1015af674e45SBarry Smith       N = nrow++ - 1;
101617ec6a02SBarry Smith       high++; /* added new column thus must search to one higher than before */
1017af674e45SBarry Smith       /* shift up all the later entries in this row */
101870990e77SSatish Balay       ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr);
101970990e77SSatish Balay       ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr);
102070990e77SSatish Balay       ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1021af674e45SBarry Smith       rp[i]                    = bcol;
1022af674e45SBarry Smith       ap[16*i + 4*cidx + ridx] = value;
1023af674e45SBarry Smith noinsert1:;
1024af674e45SBarry Smith       low = i;
1025af674e45SBarry Smith     }
1026af674e45SBarry Smith     ailen[brow] = nrow;
1027af674e45SBarry Smith   }
1028be1d678aSKris Buschelman   PetscFunctionReturnVoid();
1029af674e45SBarry Smith }
1030af674e45SBarry Smith 
1031be5855fcSBarry Smith /*
1032be5855fcSBarry Smith      Checks for missing diagonals
1033be5855fcSBarry Smith */
1034ace3abfcSBarry Smith PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1035be5855fcSBarry Smith {
1036be5855fcSBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
10376849ba73SBarry Smith   PetscErrorCode ierr;
10387734d3b5SMatthew G. Knepley   PetscInt       *diag,*ii = a->i,i;
1039be5855fcSBarry Smith 
1040be5855fcSBarry Smith   PetscFunctionBegin;
1041c4992f7dSBarry Smith   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
10422af78befSBarry Smith   *missing = PETSC_FALSE;
10437734d3b5SMatthew G. Knepley   if (A->rmap->n > 0 && !ii) {
10442efa7f71SHong Zhang     *missing = PETSC_TRUE;
10452efa7f71SHong Zhang     if (d) *d = 0;
1046994fe344SLisandro Dalcin     ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
10472efa7f71SHong Zhang   } else {
104801445905SHong Zhang     PetscInt n;
104901445905SHong Zhang     n = PetscMin(a->mbs, a->nbs);
1050883fce79SBarry Smith     diag = a->diag;
105101445905SHong Zhang     for (i=0; i<n; i++) {
10527734d3b5SMatthew G. Knepley       if (diag[i] >= ii[i+1]) {
10532af78befSBarry Smith         *missing = PETSC_TRUE;
10542af78befSBarry Smith         if (d) *d = i;
1055994fe344SLisandro Dalcin         ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr);
1056358d2f5dSShri Abhyankar         break;
10572efa7f71SHong Zhang       }
1058be5855fcSBarry Smith     }
1059be5855fcSBarry Smith   }
1060be5855fcSBarry Smith   PetscFunctionReturn(0);
1061be5855fcSBarry Smith }
1062be5855fcSBarry Smith 
1063dfbe8321SBarry Smith PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1064de6a44a3SBarry Smith {
1065de6a44a3SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
10666849ba73SBarry Smith   PetscErrorCode ierr;
106709f38230SBarry Smith   PetscInt       i,j,m = a->mbs;
1068de6a44a3SBarry Smith 
10693a40ed3dSBarry Smith   PetscFunctionBegin;
107009f38230SBarry Smith   if (!a->diag) {
1071785e854fSJed Brown     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
10723bb1ff40SBarry Smith     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
10734fd072dbSBarry Smith     a->free_diag = PETSC_TRUE;
107409f38230SBarry Smith   }
10757fc0212eSBarry Smith   for (i=0; i<m; i++) {
107609f38230SBarry Smith     a->diag[i] = a->i[i+1];
1077de6a44a3SBarry Smith     for (j=a->i[i]; j<a->i[i+1]; j++) {
1078de6a44a3SBarry Smith       if (a->j[j] == i) {
107909f38230SBarry Smith         a->diag[i] = j;
1080de6a44a3SBarry Smith         break;
1081de6a44a3SBarry Smith       }
1082de6a44a3SBarry Smith     }
1083de6a44a3SBarry Smith   }
10843a40ed3dSBarry Smith   PetscFunctionReturn(0);
1085de6a44a3SBarry Smith }
10862593348eSBarry Smith 
10871a83f524SJed Brown static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
10883b2fbd54SBarry Smith {
10893b2fbd54SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1090dfbe8321SBarry Smith   PetscErrorCode ierr;
10911a83f524SJed Brown   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
10921a83f524SJed Brown   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
10933b2fbd54SBarry Smith 
10943a40ed3dSBarry Smith   PetscFunctionBegin;
10953b2fbd54SBarry Smith   *nn = n;
10963a40ed3dSBarry Smith   if (!ia) PetscFunctionReturn(0);
10973b2fbd54SBarry Smith   if (symmetric) {
10982462f5fdSStefano Zampini     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1099553b3c51SBarry Smith     nz   = tia[n];
11003b2fbd54SBarry Smith   } else {
11018f7157efSSatish Balay     tia = a->i; tja = a->j;
11023b2fbd54SBarry Smith   }
11033b2fbd54SBarry Smith 
1104ecc77c7aSBarry Smith   if (!blockcompressed && bs > 1) {
1105ecc77c7aSBarry Smith     (*nn) *= bs;
11068f7157efSSatish Balay     /* malloc & create the natural set of indices */
1107785e854fSJed Brown     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
11089985e31cSBarry Smith     if (n) {
11092462f5fdSStefano Zampini       (*ia)[0] = oshift;
1110ecc77c7aSBarry Smith       for (j=1; j<bs; j++) {
1111ecc77c7aSBarry Smith         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1112ecc77c7aSBarry Smith       }
11139985e31cSBarry Smith     }
1114ecc77c7aSBarry Smith 
1115ecc77c7aSBarry Smith     for (i=1; i<n; i++) {
1116ecc77c7aSBarry Smith       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1117ecc77c7aSBarry Smith       for (j=1; j<bs; j++) {
1118ecc77c7aSBarry Smith         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
11198f7157efSSatish Balay       }
11208f7157efSSatish Balay     }
11219985e31cSBarry Smith     if (n) {
1122ecc77c7aSBarry Smith       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
11239985e31cSBarry Smith     }
1124ecc77c7aSBarry Smith 
11251a83f524SJed Brown     if (inja) {
1126785e854fSJed Brown       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
11279985e31cSBarry Smith       cnt = 0;
11289985e31cSBarry Smith       for (i=0; i<n; i++) {
11299985e31cSBarry Smith         for (j=0; j<bs; j++) {
11309985e31cSBarry Smith           for (k=tia[i]; k<tia[i+1]; k++) {
11319985e31cSBarry Smith             for (l=0; l<bs; l++) {
11329985e31cSBarry Smith               (*ja)[cnt++] = bs*tja[k] + l;
11339985e31cSBarry Smith             }
11349985e31cSBarry Smith           }
11359985e31cSBarry Smith         }
11369985e31cSBarry Smith       }
11379985e31cSBarry Smith     }
11389985e31cSBarry Smith 
11398f7157efSSatish Balay     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
11408f7157efSSatish Balay       ierr = PetscFree(tia);CHKERRQ(ierr);
11418f7157efSSatish Balay       ierr = PetscFree(tja);CHKERRQ(ierr);
11428f7157efSSatish Balay     }
1143f6d58c54SBarry Smith   } else if (oshift == 1) {
1144715a17b5SBarry Smith     if (symmetric) {
1145a2ea699eSBarry Smith       nz = tia[A->rmap->n/bs];
1146715a17b5SBarry Smith       /*  add 1 to i and j indices */
1147715a17b5SBarry Smith       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1148715a17b5SBarry Smith       *ia = tia;
1149715a17b5SBarry Smith       if (ja) {
1150715a17b5SBarry Smith         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1151715a17b5SBarry Smith         *ja = tja;
1152715a17b5SBarry Smith       }
1153715a17b5SBarry Smith     } else {
1154a2ea699eSBarry Smith       nz = a->i[A->rmap->n/bs];
1155f6d58c54SBarry Smith       /* malloc space and  add 1 to i and j indices */
1156854ce69bSBarry Smith       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1157f6d58c54SBarry Smith       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1158f6d58c54SBarry Smith       if (ja) {
1159785e854fSJed Brown         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1160f6d58c54SBarry Smith         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1161f6d58c54SBarry Smith       }
1162715a17b5SBarry Smith     }
11638f7157efSSatish Balay   } else {
11648f7157efSSatish Balay     *ia = tia;
1165ecc77c7aSBarry Smith     if (ja) *ja = tja;
11668f7157efSSatish Balay   }
11673a40ed3dSBarry Smith   PetscFunctionReturn(0);
11683b2fbd54SBarry Smith }
11693b2fbd54SBarry Smith 
11701a83f524SJed Brown static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
11713b2fbd54SBarry Smith {
11726849ba73SBarry Smith   PetscErrorCode ierr;
11733b2fbd54SBarry Smith 
11743a40ed3dSBarry Smith   PetscFunctionBegin;
11753a40ed3dSBarry Smith   if (!ia) PetscFunctionReturn(0);
1176715a17b5SBarry Smith   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1177606d414cSSatish Balay     ierr = PetscFree(*ia);CHKERRQ(ierr);
11789985e31cSBarry Smith     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
11793b2fbd54SBarry Smith   }
11803a40ed3dSBarry Smith   PetscFunctionReturn(0);
11813b2fbd54SBarry Smith }
11823b2fbd54SBarry Smith 
1183dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
11842d61bbb3SSatish Balay {
11852d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1186dfbe8321SBarry Smith   PetscErrorCode ierr;
11872d61bbb3SSatish Balay 
1188433994e6SBarry Smith   PetscFunctionBegin;
1189aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1190d0f46423SBarry Smith   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
11912d61bbb3SSatish Balay #endif
1192e6b907acSBarry Smith   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
11936bf464f9SBarry Smith   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
11946bf464f9SBarry Smith   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
11954fd072dbSBarry Smith   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
119605b42c5fSBarry Smith   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
11974fd072dbSBarry Smith   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
119805b42c5fSBarry Smith   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
119905b42c5fSBarry Smith   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1200f361c04dSBarry Smith   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1201de80f912SBarry Smith   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
12026bf464f9SBarry Smith   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
120305b42c5fSBarry Smith   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1204cd6b891eSBarry Smith   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1205c4319e64SHong Zhang 
12066bf464f9SBarry Smith   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
12076bf464f9SBarry Smith   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1208bf0cc555SLisandro Dalcin   ierr = PetscFree(A->data);CHKERRQ(ierr);
1209901853e0SKris Buschelman 
1210f4259b30SLisandro Dalcin   ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr);
1211cda14afcSprj-   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr);
1212cda14afcSprj-   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr);
1213bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1214bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1215bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1216bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1217bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1218bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1219bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1220bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1221bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1222bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
12237ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
1224c9225affSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr);
12257ea3e4caSstefano_zampini #endif
1226c9225affSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr);
12272d61bbb3SSatish Balay   PetscFunctionReturn(0);
12282d61bbb3SSatish Balay }
12292d61bbb3SSatish Balay 
1230ace3abfcSBarry Smith PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
12312d61bbb3SSatish Balay {
12322d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
123363ba0a88SBarry Smith   PetscErrorCode ierr;
12342d61bbb3SSatish Balay 
12352d61bbb3SSatish Balay   PetscFunctionBegin;
1236aa275fccSKris Buschelman   switch (op) {
1237aa275fccSKris Buschelman   case MAT_ROW_ORIENTED:
12384e0d8c25SBarry Smith     a->roworiented = flg;
1239aa275fccSKris Buschelman     break;
1240a9817697SBarry Smith   case MAT_KEEP_NONZERO_PATTERN:
1241a9817697SBarry Smith     a->keepnonzeropattern = flg;
1242aa275fccSKris Buschelman     break;
1243512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
1244512a5fc5SBarry Smith     a->nonew = (flg ? 0 : 1);
1245aa275fccSKris Buschelman     break;
1246aa275fccSKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
12474e0d8c25SBarry Smith     a->nonew = (flg ? -1 : 0);
1248aa275fccSKris Buschelman     break;
1249aa275fccSKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
12504e0d8c25SBarry Smith     a->nonew = (flg ? -2 : 0);
1251aa275fccSKris Buschelman     break;
125228b2fa4aSMatthew Knepley   case MAT_UNUSED_NONZERO_LOCATION_ERR:
125328b2fa4aSMatthew Knepley     a->nounused = (flg ? -1 : 0);
125428b2fa4aSMatthew Knepley     break;
12558c78258cSHong Zhang   case MAT_FORCE_DIAGONAL_ENTRIES:
1256aa275fccSKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
1257aa275fccSKris Buschelman   case MAT_USE_HASH_TABLE:
1258071fcb05SBarry Smith   case MAT_SORTED_FULL:
1259290bbb0aSBarry Smith     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1260aa275fccSKris Buschelman     break;
12615021d80fSJed Brown   case MAT_SPD:
126277e54ba9SKris Buschelman   case MAT_SYMMETRIC:
126377e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
12649a4540c5SBarry Smith   case MAT_HERMITIAN:
12659a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
1266c10200c1SHong Zhang   case MAT_SUBMAT_SINGLEIS:
1267672ba085SHong Zhang   case MAT_STRUCTURE_ONLY:
12685021d80fSJed Brown     /* These options are handled directly by MatSetOption() */
126977e54ba9SKris Buschelman     break;
1270aa275fccSKris Buschelman   default:
1271e32f2f54SBarry Smith     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
12722d61bbb3SSatish Balay   }
12732d61bbb3SSatish Balay   PetscFunctionReturn(0);
12742d61bbb3SSatish Balay }
12752d61bbb3SSatish Balay 
127652768537SHong Zhang /* used for both SeqBAIJ and SeqSBAIJ matrices */
127752768537SHong Zhang PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
12782d61bbb3SSatish Balay {
12796849ba73SBarry Smith   PetscErrorCode ierr;
128052768537SHong Zhang   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
128152768537SHong Zhang   MatScalar      *aa_i;
128287828ca2SBarry Smith   PetscScalar    *v_i;
12832d61bbb3SSatish Balay 
12842d61bbb3SSatish Balay   PetscFunctionBegin;
1285d0f46423SBarry Smith   bs  = A->rmap->bs;
128652768537SHong Zhang   bs2 = bs*bs;
1287e32f2f54SBarry Smith   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
12882d61bbb3SSatish Balay 
12892d61bbb3SSatish Balay   bn  = row/bs;   /* Block number */
12902d61bbb3SSatish Balay   bp  = row % bs; /* Block Position */
12912d61bbb3SSatish Balay   M   = ai[bn+1] - ai[bn];
12922d61bbb3SSatish Balay   *nz = bs*M;
12932d61bbb3SSatish Balay 
12942d61bbb3SSatish Balay   if (v) {
1295f4259b30SLisandro Dalcin     *v = NULL;
12962d61bbb3SSatish Balay     if (*nz) {
1297854ce69bSBarry Smith       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
12982d61bbb3SSatish Balay       for (i=0; i<M; i++) { /* for each block in the block row */
12992d61bbb3SSatish Balay         v_i  = *v + i*bs;
13002d61bbb3SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
130126fbe8dcSKarl Rupp         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
13022d61bbb3SSatish Balay       }
13032d61bbb3SSatish Balay     }
13042d61bbb3SSatish Balay   }
13052d61bbb3SSatish Balay 
13062d61bbb3SSatish Balay   if (idx) {
1307f4259b30SLisandro Dalcin     *idx = NULL;
13082d61bbb3SSatish Balay     if (*nz) {
1309854ce69bSBarry Smith       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
13102d61bbb3SSatish Balay       for (i=0; i<M; i++) { /* for each block in the block row */
13112d61bbb3SSatish Balay         idx_i = *idx + i*bs;
13122d61bbb3SSatish Balay         itmp  = bs*aj[ai[bn] + i];
131326fbe8dcSKarl Rupp         for (j=0; j<bs; j++) idx_i[j] = itmp++;
13142d61bbb3SSatish Balay       }
13152d61bbb3SSatish Balay     }
13162d61bbb3SSatish Balay   }
13172d61bbb3SSatish Balay   PetscFunctionReturn(0);
13182d61bbb3SSatish Balay }
13192d61bbb3SSatish Balay 
132052768537SHong Zhang PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
132152768537SHong Zhang {
132252768537SHong Zhang   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
132352768537SHong Zhang   PetscErrorCode ierr;
132452768537SHong Zhang 
132552768537SHong Zhang   PetscFunctionBegin;
132652768537SHong Zhang   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
132752768537SHong Zhang   PetscFunctionReturn(0);
132852768537SHong Zhang }
132952768537SHong Zhang 
1330c1ac3661SBarry Smith PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
13312d61bbb3SSatish Balay {
1332dfbe8321SBarry Smith   PetscErrorCode ierr;
1333606d414cSSatish Balay 
13342d61bbb3SSatish Balay   PetscFunctionBegin;
1335cb4a9cd9SHong Zhang   if (nz)  *nz = 0;
133605b42c5fSBarry Smith   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
133705b42c5fSBarry Smith   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
13382d61bbb3SSatish Balay   PetscFunctionReturn(0);
13392d61bbb3SSatish Balay }
13402d61bbb3SSatish Balay 
1341fc4dec0aSBarry Smith PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
13422d61bbb3SSatish Balay {
134320e84f26SHong Zhang   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
13442d61bbb3SSatish Balay   Mat            C;
13456849ba73SBarry Smith   PetscErrorCode ierr;
134620e84f26SHong Zhang   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
134720e84f26SHong Zhang   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
134820e84f26SHong Zhang   MatScalar      *ata,*aa=a->a;
13492d61bbb3SSatish Balay 
13502d61bbb3SSatish Balay   PetscFunctionBegin;
135120e84f26SHong Zhang   ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr);
1352cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
135320e84f26SHong Zhang     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
13542d61bbb3SSatish Balay 
1355ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1356d0f46423SBarry Smith     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
13577adad957SLisandro Dalcin     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
135820e84f26SHong Zhang     ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr);
135920e84f26SHong Zhang 
136020e84f26SHong Zhang     at  = (Mat_SeqBAIJ*)C->data;
136120e84f26SHong Zhang     ati = at->i;
136220e84f26SHong Zhang     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1363fc4dec0aSBarry Smith   } else {
1364fc4dec0aSBarry Smith     C = *B;
136520e84f26SHong Zhang     at = (Mat_SeqBAIJ*)C->data;
136620e84f26SHong Zhang     ati = at->i;
1367fc4dec0aSBarry Smith   }
1368fc4dec0aSBarry Smith 
136920e84f26SHong Zhang   atj = at->j;
137020e84f26SHong Zhang   ata = at->a;
137120e84f26SHong Zhang 
137220e84f26SHong Zhang   /* Copy ati into atfill so we have locations of the next free space in atj */
1373580bdb30SBarry Smith   ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr);
137420e84f26SHong Zhang 
137520e84f26SHong Zhang   /* Walk through A row-wise and mark nonzero entries of A^T. */
13762d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
137720e84f26SHong Zhang     anzj = ai[i+1] - ai[i];
137820e84f26SHong Zhang     for (j=0; j<anzj; j++) {
137920e84f26SHong Zhang       atj[atfill[*aj]] = i;
138020e84f26SHong Zhang       for (kr=0; kr<bs; kr++) {
138120e84f26SHong Zhang         for (k=0; k<bs; k++) {
138220e84f26SHong Zhang           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
13832d61bbb3SSatish Balay         }
13842d61bbb3SSatish Balay       }
138520e84f26SHong Zhang       atfill[*aj++] += 1;
138620e84f26SHong Zhang     }
138720e84f26SHong Zhang   }
13882d61bbb3SSatish Balay   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
13892d61bbb3SSatish Balay   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
13902d61bbb3SSatish Balay 
139120e84f26SHong Zhang   /* Clean up temporary space and complete requests. */
139220e84f26SHong Zhang   ierr = PetscFree(atfill);CHKERRQ(ierr);
139320e84f26SHong Zhang 
1394cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
139520e84f26SHong Zhang     ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
13962d61bbb3SSatish Balay     *B = C;
13972d61bbb3SSatish Balay   } else {
139828be2f97SBarry Smith     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
13992d61bbb3SSatish Balay   }
14002d61bbb3SSatish Balay   PetscFunctionReturn(0);
14012d61bbb3SSatish Balay }
14022d61bbb3SSatish Balay 
1403453d3561SHong Zhang PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1404453d3561SHong Zhang {
1405453d3561SHong Zhang   PetscErrorCode ierr;
1406453d3561SHong Zhang   Mat            Btrans;
1407453d3561SHong Zhang 
1408453d3561SHong Zhang   PetscFunctionBegin;
1409453d3561SHong Zhang   *f   = PETSC_FALSE;
1410453d3561SHong Zhang   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1411453d3561SHong Zhang   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1412453d3561SHong Zhang   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1413453d3561SHong Zhang   PetscFunctionReturn(0);
1414453d3561SHong Zhang }
1415453d3561SHong Zhang 
1416618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1417b51a4376SLisandro Dalcin PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
14182593348eSBarry Smith {
1419b51a4376SLisandro Dalcin   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)mat->data;
1420b51a4376SLisandro Dalcin   PetscInt       header[4],M,N,m,bs,nz,cnt,i,j,k,l;
1421b51a4376SLisandro Dalcin   PetscInt       *rowlens,*colidxs;
1422b51a4376SLisandro Dalcin   PetscScalar    *matvals;
14236849ba73SBarry Smith   PetscErrorCode ierr;
14242593348eSBarry Smith 
14253a40ed3dSBarry Smith   PetscFunctionBegin;
1426b51a4376SLisandro Dalcin   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
14273b2fbd54SBarry Smith 
1428b51a4376SLisandro Dalcin   M  = mat->rmap->N;
1429b51a4376SLisandro Dalcin   N  = mat->cmap->N;
1430b51a4376SLisandro Dalcin   m  = mat->rmap->n;
1431b51a4376SLisandro Dalcin   bs = mat->rmap->bs;
1432b51a4376SLisandro Dalcin   nz = bs*bs*A->nz;
14332593348eSBarry Smith 
1434b51a4376SLisandro Dalcin   /* write matrix header */
1435b51a4376SLisandro Dalcin   header[0] = MAT_FILE_CLASSID;
1436b51a4376SLisandro Dalcin   header[1] = M; header[2] = N; header[3] = nz;
1437b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
14382593348eSBarry Smith 
1439b51a4376SLisandro Dalcin   /* store row lengths */
1440b51a4376SLisandro Dalcin   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1441b51a4376SLisandro Dalcin   for (cnt=0, i=0; i<A->mbs; i++)
1442b51a4376SLisandro Dalcin     for (j=0; j<bs; j++)
1443b51a4376SLisandro Dalcin       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]);
1444b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr);
1445b51a4376SLisandro Dalcin   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1446b51a4376SLisandro Dalcin 
1447b51a4376SLisandro Dalcin   /* store column indices  */
1448b51a4376SLisandro Dalcin   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1449b51a4376SLisandro Dalcin   for (cnt=0, i=0; i<A->mbs; i++)
1450b51a4376SLisandro Dalcin     for (k=0; k<bs; k++)
1451b51a4376SLisandro Dalcin       for (j=A->i[i]; j<A->i[i+1]; j++)
1452b51a4376SLisandro Dalcin         for (l=0; l<bs; l++)
1453b51a4376SLisandro Dalcin           colidxs[cnt++] = bs*A->j[j] + l;
1454b51a4376SLisandro Dalcin   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1455b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr);
1456b51a4376SLisandro Dalcin   ierr = PetscFree(colidxs);CHKERRQ(ierr);
14572593348eSBarry Smith 
14582593348eSBarry Smith   /* store nonzero values */
1459b51a4376SLisandro Dalcin   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1460b51a4376SLisandro Dalcin   for (cnt=0, i=0; i<A->mbs; i++)
1461b51a4376SLisandro Dalcin     for (k=0; k<bs; k++)
1462b51a4376SLisandro Dalcin       for (j=A->i[i]; j<A->i[i+1]; j++)
1463b51a4376SLisandro Dalcin         for (l=0; l<bs; l++)
1464b51a4376SLisandro Dalcin           matvals[cnt++] = A->a[bs*(bs*j + l) + k];
1465b51a4376SLisandro Dalcin   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1466b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1467b51a4376SLisandro Dalcin   ierr = PetscFree(matvals);CHKERRQ(ierr);
1468ce6f0cecSBarry Smith 
1469b51a4376SLisandro Dalcin   /* write block size option to the viewer's .info file */
1470b51a4376SLisandro Dalcin   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
14713a40ed3dSBarry Smith   PetscFunctionReturn(0);
14722593348eSBarry Smith }
14732593348eSBarry Smith 
14747dc0baabSHong Zhang static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
14757dc0baabSHong Zhang {
14767dc0baabSHong Zhang   PetscErrorCode ierr;
14777dc0baabSHong Zhang   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
14787dc0baabSHong Zhang   PetscInt       i,bs = A->rmap->bs,k;
14797dc0baabSHong Zhang 
14807dc0baabSHong Zhang   PetscFunctionBegin;
14817dc0baabSHong Zhang   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
14827dc0baabSHong Zhang   for (i=0; i<a->mbs; i++) {
14837dc0baabSHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
14847dc0baabSHong Zhang     for (k=a->i[i]; k<a->i[i+1]; k++) {
14857dc0baabSHong Zhang       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
14867dc0baabSHong Zhang     }
14877dc0baabSHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
14887dc0baabSHong Zhang   }
14897dc0baabSHong Zhang   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
14907dc0baabSHong Zhang   PetscFunctionReturn(0);
14917dc0baabSHong Zhang }
14927dc0baabSHong Zhang 
14936849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
14942593348eSBarry Smith {
1495b6490206SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1496dfbe8321SBarry Smith   PetscErrorCode    ierr;
1497d0f46423SBarry Smith   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1498f3ef73ceSBarry Smith   PetscViewerFormat format;
14992593348eSBarry Smith 
15003a40ed3dSBarry Smith   PetscFunctionBegin;
15017dc0baabSHong Zhang   if (A->structure_only) {
15027dc0baabSHong Zhang     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
15037dc0baabSHong Zhang     PetscFunctionReturn(0);
15047dc0baabSHong Zhang   }
15057dc0baabSHong Zhang 
1506b0a32e0cSBarry Smith   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1507456192e2SBarry Smith   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
150877431f27SBarry Smith     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1509fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1510ade3a672SBarry Smith     const char *matname;
1511bcd9e38bSBarry Smith     Mat        aij;
1512ceb03754SKris Buschelman     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1513ade3a672SBarry Smith     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1514ade3a672SBarry Smith     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1515bcd9e38bSBarry Smith     ierr = MatView(aij,viewer);CHKERRQ(ierr);
15166bf464f9SBarry Smith     ierr = MatDestroy(&aij);CHKERRQ(ierr);
151704929863SHong Zhang   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
151804929863SHong Zhang       PetscFunctionReturn(0);
1519fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1520d00279f6SBarry Smith     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
152144cd7ae7SLois Curfman McInnes     for (i=0; i<a->mbs; i++) {
152244cd7ae7SLois Curfman McInnes       for (j=0; j<bs; j++) {
152377431f27SBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
152444cd7ae7SLois Curfman McInnes         for (k=a->i[i]; k<a->i[i+1]; k++) {
152544cd7ae7SLois Curfman McInnes           for (l=0; l<bs; l++) {
1526aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
15270e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
152857622a8eSBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
152957622a8eSBarry Smith                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
15300e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
153157622a8eSBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
153257622a8eSBarry Smith                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
15330e6d2581SBarry Smith             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
153457622a8eSBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
15350ef38995SBarry Smith             }
153644cd7ae7SLois Curfman McInnes #else
15370ef38995SBarry Smith             if (a->a[bs2*k + l*bs + j] != 0.0) {
153857622a8eSBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
15390ef38995SBarry Smith             }
154044cd7ae7SLois Curfman McInnes #endif
154144cd7ae7SLois Curfman McInnes           }
154244cd7ae7SLois Curfman McInnes         }
1543b0a32e0cSBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
154444cd7ae7SLois Curfman McInnes       }
154544cd7ae7SLois Curfman McInnes     }
1546d00279f6SBarry Smith     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
15470ef38995SBarry Smith   } else {
1548d00279f6SBarry Smith     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1549b6490206SBarry Smith     for (i=0; i<a->mbs; i++) {
1550b6490206SBarry Smith       for (j=0; j<bs; j++) {
155177431f27SBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1552b6490206SBarry Smith         for (k=a->i[i]; k<a->i[i+1]; k++) {
1553b6490206SBarry Smith           for (l=0; l<bs; l++) {
1554aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
15550e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
155657622a8eSBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
155757622a8eSBarry Smith                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
15580e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
155957622a8eSBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
156057622a8eSBarry Smith                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
15610ef38995SBarry Smith             } else {
156257622a8eSBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
156388685aaeSLois Curfman McInnes             }
156488685aaeSLois Curfman McInnes #else
156557622a8eSBarry Smith             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
156688685aaeSLois Curfman McInnes #endif
15672593348eSBarry Smith           }
15682593348eSBarry Smith         }
1569b0a32e0cSBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
15702593348eSBarry Smith       }
15712593348eSBarry Smith     }
1572d00279f6SBarry Smith     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1573b6490206SBarry Smith   }
1574b0a32e0cSBarry Smith   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
15753a40ed3dSBarry Smith   PetscFunctionReturn(0);
15762593348eSBarry Smith }
15772593348eSBarry Smith 
15789804daf3SBarry Smith #include <petscdraw.h>
15796849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
15803270192aSSatish Balay {
158177ed5343SBarry Smith   Mat               A = (Mat) Aa;
15823270192aSSatish Balay   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
15836849ba73SBarry Smith   PetscErrorCode    ierr;
1584d0f46423SBarry Smith   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
15850e6d2581SBarry Smith   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
15863f1db9ecSBarry Smith   MatScalar         *aa;
1587b0a32e0cSBarry Smith   PetscViewer       viewer;
1588b3e7f47fSJed Brown   PetscViewerFormat format;
15893270192aSSatish Balay 
15903a40ed3dSBarry Smith   PetscFunctionBegin;
159177ed5343SBarry Smith   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1592b3e7f47fSJed Brown   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1593b0a32e0cSBarry Smith   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
159477ed5343SBarry Smith 
15953270192aSSatish Balay   /* loop over matrix elements drawing boxes */
1596b3e7f47fSJed Brown 
1597b3e7f47fSJed Brown   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1598383922c3SLisandro Dalcin     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1599383922c3SLisandro Dalcin     /* Blue for negative, Cyan for zero and  Red for positive */
1600b0a32e0cSBarry Smith     color = PETSC_DRAW_BLUE;
16013270192aSSatish Balay     for (i=0,row=0; i<mbs; i++,row+=bs) {
16023270192aSSatish Balay       for (j=a->i[i]; j<a->i[i+1]; j++) {
1603d0f46423SBarry Smith         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
16043270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
16053270192aSSatish Balay         aa  = a->a + j*bs2;
16063270192aSSatish Balay         for (k=0; k<bs; k++) {
16073270192aSSatish Balay           for (l=0; l<bs; l++) {
16080e6d2581SBarry Smith             if (PetscRealPart(*aa++) >=  0.) continue;
1609b0a32e0cSBarry Smith             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
16103270192aSSatish Balay           }
16113270192aSSatish Balay         }
16123270192aSSatish Balay       }
16133270192aSSatish Balay     }
1614b0a32e0cSBarry Smith     color = PETSC_DRAW_CYAN;
16153270192aSSatish Balay     for (i=0,row=0; i<mbs; i++,row+=bs) {
16163270192aSSatish Balay       for (j=a->i[i]; j<a->i[i+1]; j++) {
1617d0f46423SBarry Smith         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
16183270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
16193270192aSSatish Balay         aa  = a->a + j*bs2;
16203270192aSSatish Balay         for (k=0; k<bs; k++) {
16213270192aSSatish Balay           for (l=0; l<bs; l++) {
16220e6d2581SBarry Smith             if (PetscRealPart(*aa++) != 0.) continue;
1623b0a32e0cSBarry Smith             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
16243270192aSSatish Balay           }
16253270192aSSatish Balay         }
16263270192aSSatish Balay       }
16273270192aSSatish Balay     }
1628b0a32e0cSBarry Smith     color = PETSC_DRAW_RED;
16293270192aSSatish Balay     for (i=0,row=0; i<mbs; i++,row+=bs) {
16303270192aSSatish Balay       for (j=a->i[i]; j<a->i[i+1]; j++) {
1631d0f46423SBarry Smith         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
16323270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
16333270192aSSatish Balay         aa  = a->a + j*bs2;
16343270192aSSatish Balay         for (k=0; k<bs; k++) {
16353270192aSSatish Balay           for (l=0; l<bs; l++) {
16360e6d2581SBarry Smith             if (PetscRealPart(*aa++) <= 0.) continue;
1637b0a32e0cSBarry Smith             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
16383270192aSSatish Balay           }
16393270192aSSatish Balay         }
16403270192aSSatish Balay       }
16413270192aSSatish Balay     }
1642383922c3SLisandro Dalcin     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1643b3e7f47fSJed Brown   } else {
1644b3e7f47fSJed Brown     /* use contour shading to indicate magnitude of values */
1645b3e7f47fSJed Brown     /* first determine max of all nonzero values */
1646b05fc000SLisandro Dalcin     PetscReal minv = 0.0, maxv = 0.0;
1647b3e7f47fSJed Brown     PetscDraw popup;
1648b3e7f47fSJed Brown 
1649b3e7f47fSJed Brown     for (i=0; i<a->nz*a->bs2; i++) {
1650b3e7f47fSJed Brown       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1651b3e7f47fSJed Brown     }
1652383922c3SLisandro Dalcin     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1653b3e7f47fSJed Brown     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
165445f3bb6eSLisandro Dalcin     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1655383922c3SLisandro Dalcin 
1656383922c3SLisandro Dalcin     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1657b3e7f47fSJed Brown     for (i=0,row=0; i<mbs; i++,row+=bs) {
1658b3e7f47fSJed Brown       for (j=a->i[i]; j<a->i[i+1]; j++) {
1659b3e7f47fSJed Brown         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1660b3e7f47fSJed Brown         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1661b3e7f47fSJed Brown         aa  = a->a + j*bs2;
1662b3e7f47fSJed Brown         for (k=0; k<bs; k++) {
1663b3e7f47fSJed Brown           for (l=0; l<bs; l++) {
1664383922c3SLisandro Dalcin             MatScalar v = *aa++;
1665383922c3SLisandro Dalcin             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1666b3e7f47fSJed Brown             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1667b3e7f47fSJed Brown           }
1668b3e7f47fSJed Brown         }
1669b3e7f47fSJed Brown       }
1670b3e7f47fSJed Brown     }
1671383922c3SLisandro Dalcin     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1672b3e7f47fSJed Brown   }
167377ed5343SBarry Smith   PetscFunctionReturn(0);
167477ed5343SBarry Smith }
16753270192aSSatish Balay 
16766849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
167777ed5343SBarry Smith {
1678dfbe8321SBarry Smith   PetscErrorCode ierr;
16790e6d2581SBarry Smith   PetscReal      xl,yl,xr,yr,w,h;
1680b0a32e0cSBarry Smith   PetscDraw      draw;
1681ace3abfcSBarry Smith   PetscBool      isnull;
16823270192aSSatish Balay 
168377ed5343SBarry Smith   PetscFunctionBegin;
1684b0a32e0cSBarry Smith   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
168545f3bb6eSLisandro Dalcin   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
168645f3bb6eSLisandro Dalcin   if (isnull) PetscFunctionReturn(0);
168777ed5343SBarry Smith 
1688d0f46423SBarry Smith   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
168977ed5343SBarry Smith   xr  += w;          yr += h;        xl = -w;     yl = -h;
1690b0a32e0cSBarry Smith   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1691832b7cebSLisandro Dalcin   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1692b0a32e0cSBarry Smith   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
16930298fd71SBarry Smith   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1694832b7cebSLisandro Dalcin   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
16953a40ed3dSBarry Smith   PetscFunctionReturn(0);
16963270192aSSatish Balay }
16973270192aSSatish Balay 
1698dfbe8321SBarry Smith PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
16992593348eSBarry Smith {
1700dfbe8321SBarry Smith   PetscErrorCode ierr;
1701ace3abfcSBarry Smith   PetscBool      iascii,isbinary,isdraw;
17022593348eSBarry Smith 
17033a40ed3dSBarry Smith   PetscFunctionBegin;
1704251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1705251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1706251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
170732077d6dSBarry Smith   if (iascii) {
17083a40ed3dSBarry Smith     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
17090f5bd95cSBarry Smith   } else if (isbinary) {
17103a40ed3dSBarry Smith     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
17110f5bd95cSBarry Smith   } else if (isdraw) {
17123a40ed3dSBarry Smith     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
17135cd90555SBarry Smith   } else {
1714a5e6ed63SBarry Smith     Mat B;
1715ceb03754SKris Buschelman     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1716a5e6ed63SBarry Smith     ierr = MatView(B,viewer);CHKERRQ(ierr);
17176bf464f9SBarry Smith     ierr = MatDestroy(&B);CHKERRQ(ierr);
17182593348eSBarry Smith   }
17193a40ed3dSBarry Smith   PetscFunctionReturn(0);
17202593348eSBarry Smith }
1721b6490206SBarry Smith 
1722c1ac3661SBarry Smith PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1723cd0e1443SSatish Balay {
1724cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1725c1ac3661SBarry Smith   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1726c1ac3661SBarry Smith   PetscInt    *ai = a->i,*ailen = a->ilen;
1727d0f46423SBarry Smith   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
172897e567efSBarry Smith   MatScalar   *ap,*aa = a->a;
1729cd0e1443SSatish Balay 
17303a40ed3dSBarry Smith   PetscFunctionBegin;
17312d61bbb3SSatish Balay   for (k=0; k<m; k++) { /* loop over rows */
1732cd0e1443SSatish Balay     row = im[k]; brow = row/bs;
1733e32f2f54SBarry Smith     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1734e32f2f54SBarry Smith     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
17352d61bbb3SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
17362c3acbe9SBarry Smith     nrow = ailen[brow];
17372d61bbb3SSatish Balay     for (l=0; l<n; l++) { /* loop over columns */
1738e32f2f54SBarry Smith       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1739e32f2f54SBarry Smith       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
17402d61bbb3SSatish Balay       col  = in[l];
17412d61bbb3SSatish Balay       bcol = col/bs;
17422d61bbb3SSatish Balay       cidx = col%bs;
17432d61bbb3SSatish Balay       ridx = row%bs;
17442d61bbb3SSatish Balay       high = nrow;
17452d61bbb3SSatish Balay       low  = 0; /* assume unsorted */
17462d61bbb3SSatish Balay       while (high-low > 5) {
1747cd0e1443SSatish Balay         t = (low+high)/2;
1748cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
1749cd0e1443SSatish Balay         else             low  = t;
1750cd0e1443SSatish Balay       }
1751cd0e1443SSatish Balay       for (i=low; i<high; i++) {
1752cd0e1443SSatish Balay         if (rp[i] > bcol) break;
1753cd0e1443SSatish Balay         if (rp[i] == bcol) {
17542d61bbb3SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
17552d61bbb3SSatish Balay           goto finished;
1756cd0e1443SSatish Balay         }
1757cd0e1443SSatish Balay       }
175897e567efSBarry Smith       *v++ = 0.0;
17592d61bbb3SSatish Balay finished:;
1760cd0e1443SSatish Balay     }
1761cd0e1443SSatish Balay   }
17623a40ed3dSBarry Smith   PetscFunctionReturn(0);
1763cd0e1443SSatish Balay }
1764cd0e1443SSatish Balay 
1765dd6ea824SBarry Smith PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
176692c4ed94SBarry Smith {
176792c4ed94SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1768e2ee6c50SBarry Smith   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1769c1ac3661SBarry Smith   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
17706849ba73SBarry Smith   PetscErrorCode    ierr;
1771d0f46423SBarry Smith   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1772ace3abfcSBarry Smith   PetscBool         roworiented=a->roworiented;
1773dd6ea824SBarry Smith   const PetscScalar *value     = v;
17749d243f67SHong Zhang   MatScalar         *ap=NULL,*aa = a->a,*bap;
177592c4ed94SBarry Smith 
17763a40ed3dSBarry Smith   PetscFunctionBegin;
17770e324ae4SSatish Balay   if (roworiented) {
17780e324ae4SSatish Balay     stepval = (n-1)*bs;
17790e324ae4SSatish Balay   } else {
17800e324ae4SSatish Balay     stepval = (m-1)*bs;
17810e324ae4SSatish Balay   }
178292c4ed94SBarry Smith   for (k=0; k<m; k++) { /* loop over added rows */
178392c4ed94SBarry Smith     row = im[k];
17845ef9f2a5SBarry Smith     if (row < 0) continue;
1785cf9c20a2SJed Brown     if (PetscUnlikelyDebug(row >= a->mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
178692c4ed94SBarry Smith     rp   = aj + ai[row];
17877dc0baabSHong Zhang     if (!A->structure_only) ap = aa + bs2*ai[row];
178892c4ed94SBarry Smith     rmax = imax[row];
178992c4ed94SBarry Smith     nrow = ailen[row];
179092c4ed94SBarry Smith     low  = 0;
1791c71e6ed7SBarry Smith     high = nrow;
179292c4ed94SBarry Smith     for (l=0; l<n; l++) { /* loop over added columns */
17935ef9f2a5SBarry Smith       if (in[l] < 0) continue;
1794cf9c20a2SJed Brown       if (PetscUnlikelyDebug(in[l] >= a->nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
179592c4ed94SBarry Smith       col = in[l];
17967dc0baabSHong Zhang       if (!A->structure_only) {
179792c4ed94SBarry Smith         if (roworiented) {
179853ef36baSBarry Smith           value = v + (k*(stepval+bs) + l)*bs;
17990e324ae4SSatish Balay         } else {
180053ef36baSBarry Smith           value = v + (l*(stepval+bs) + k)*bs;
180192c4ed94SBarry Smith         }
18027dc0baabSHong Zhang       }
180326fbe8dcSKarl Rupp       if (col <= lastcol) low = 0;
180426fbe8dcSKarl Rupp       else high = nrow;
1805e2ee6c50SBarry Smith       lastcol = col;
180692c4ed94SBarry Smith       while (high-low > 7) {
180792c4ed94SBarry Smith         t = (low+high)/2;
180892c4ed94SBarry Smith         if (rp[t] > col) high = t;
180992c4ed94SBarry Smith         else             low  = t;
181092c4ed94SBarry Smith       }
181192c4ed94SBarry Smith       for (i=low; i<high; i++) {
181292c4ed94SBarry Smith         if (rp[i] > col) break;
181392c4ed94SBarry Smith         if (rp[i] == col) {
18147dc0baabSHong Zhang           if (A->structure_only) goto noinsert2;
18158a84c255SSatish Balay           bap = ap +  bs2*i;
18160e324ae4SSatish Balay           if (roworiented) {
18178a84c255SSatish Balay             if (is == ADD_VALUES) {
1818dd9472c6SBarry Smith               for (ii=0; ii<bs; ii++,value+=stepval) {
1819dd9472c6SBarry Smith                 for (jj=ii; jj<bs2; jj+=bs) {
18208a84c255SSatish Balay                   bap[jj] += *value++;
1821dd9472c6SBarry Smith                 }
1822dd9472c6SBarry Smith               }
18230e324ae4SSatish Balay             } else {
1824dd9472c6SBarry Smith               for (ii=0; ii<bs; ii++,value+=stepval) {
1825dd9472c6SBarry Smith                 for (jj=ii; jj<bs2; jj+=bs) {
18260e324ae4SSatish Balay                   bap[jj] = *value++;
18278a84c255SSatish Balay                 }
1828dd9472c6SBarry Smith               }
1829dd9472c6SBarry Smith             }
18300e324ae4SSatish Balay           } else {
18310e324ae4SSatish Balay             if (is == ADD_VALUES) {
183253ef36baSBarry Smith               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1833dd9472c6SBarry Smith                 for (jj=0; jj<bs; jj++) {
183453ef36baSBarry Smith                   bap[jj] += value[jj];
1835dd9472c6SBarry Smith                 }
183653ef36baSBarry Smith                 bap += bs;
1837dd9472c6SBarry Smith               }
18380e324ae4SSatish Balay             } else {
183953ef36baSBarry Smith               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1840dd9472c6SBarry Smith                 for (jj=0; jj<bs; jj++) {
184153ef36baSBarry Smith                   bap[jj]  = value[jj];
18420e324ae4SSatish Balay                 }
184353ef36baSBarry Smith                 bap += bs;
18448a84c255SSatish Balay               }
1845dd9472c6SBarry Smith             }
1846dd9472c6SBarry Smith           }
1847f1241b54SBarry Smith           goto noinsert2;
184892c4ed94SBarry Smith         }
184992c4ed94SBarry Smith       }
185089280ab3SLois Curfman McInnes       if (nonew == 1) goto noinsert2;
18512f7d4af7SBarry Smith       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
18527dc0baabSHong Zhang       if (A->structure_only) {
18537dc0baabSHong Zhang         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
18547dc0baabSHong Zhang       } else {
1855fef13f97SBarry Smith         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
18567dc0baabSHong Zhang       }
1857c03d1d03SSatish Balay       N = nrow++ - 1; high++;
185892c4ed94SBarry Smith       /* shift up all the later entries in this row */
1859580bdb30SBarry Smith       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
186092c4ed94SBarry Smith       rp[i] = col;
18617dc0baabSHong Zhang       if (!A->structure_only) {
1862580bdb30SBarry Smith         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
18638a84c255SSatish Balay         bap   = ap +  bs2*i;
18640e324ae4SSatish Balay         if (roworiented) {
1865dd9472c6SBarry Smith           for (ii=0; ii<bs; ii++,value+=stepval) {
1866dd9472c6SBarry Smith             for (jj=ii; jj<bs2; jj+=bs) {
18670e324ae4SSatish Balay               bap[jj] = *value++;
1868dd9472c6SBarry Smith             }
1869dd9472c6SBarry Smith           }
18700e324ae4SSatish Balay         } else {
1871dd9472c6SBarry Smith           for (ii=0; ii<bs; ii++,value+=stepval) {
1872dd9472c6SBarry Smith             for (jj=0; jj<bs; jj++) {
18730e324ae4SSatish Balay               *bap++ = *value++;
18740e324ae4SSatish Balay             }
1875dd9472c6SBarry Smith           }
1876dd9472c6SBarry Smith         }
18777dc0baabSHong Zhang       }
1878f1241b54SBarry Smith noinsert2:;
187992c4ed94SBarry Smith       low = i;
188092c4ed94SBarry Smith     }
188192c4ed94SBarry Smith     ailen[row] = nrow;
188292c4ed94SBarry Smith   }
18833a40ed3dSBarry Smith   PetscFunctionReturn(0);
188492c4ed94SBarry Smith }
188526e093fcSHong Zhang 
1886dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1887584200bdSSatish Balay {
1888584200bdSSatish Balay   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1889580bdb30SBarry Smith   PetscInt       fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1890d0f46423SBarry Smith   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
18916849ba73SBarry Smith   PetscErrorCode ierr;
1892c1ac3661SBarry Smith   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
18933f1db9ecSBarry Smith   MatScalar      *aa  = a->a,*ap;
18943447b6efSHong Zhang   PetscReal      ratio=0.6;
1895584200bdSSatish Balay 
18963a40ed3dSBarry Smith   PetscFunctionBegin;
18973a40ed3dSBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1898584200bdSSatish Balay 
189943ee02c3SBarry Smith   if (m) rmax = ailen[0];
1900584200bdSSatish Balay   for (i=1; i<mbs; i++) {
1901584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
1902584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
1903d402145bSBarry Smith     rmax    = PetscMax(rmax,ailen[i]);
1904584200bdSSatish Balay     if (fshift) {
1905580bdb30SBarry Smith       ip = aj + ai[i];
1906580bdb30SBarry Smith       ap = aa + bs2*ai[i];
1907584200bdSSatish Balay       N  = ailen[i];
1908580bdb30SBarry Smith       ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr);
1909672ba085SHong Zhang       if (!A->structure_only) {
1910580bdb30SBarry Smith         ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr);
1911584200bdSSatish Balay       }
1912672ba085SHong Zhang     }
1913584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
1914584200bdSSatish Balay   }
1915584200bdSSatish Balay   if (mbs) {
1916584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
1917584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1918584200bdSSatish Balay   }
19197c565772SBarry Smith 
1920584200bdSSatish Balay   /* reset ilen and imax for each row */
19217c565772SBarry Smith   a->nonzerorowcnt = 0;
1922672ba085SHong Zhang   if (A->structure_only) {
1923672ba085SHong Zhang     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1924672ba085SHong Zhang   } else { /* !A->structure_only */
1925584200bdSSatish Balay     for (i=0; i<mbs; i++) {
1926584200bdSSatish Balay       ailen[i] = imax[i] = ai[i+1] - ai[i];
19277c565772SBarry Smith       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1928584200bdSSatish Balay     }
1929672ba085SHong Zhang   }
1930a7c10996SSatish Balay   a->nz = ai[mbs];
1931584200bdSSatish Balay 
1932584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
1933b01c7715SBarry Smith   a->idiagvalid = PETSC_FALSE;
1934584200bdSSatish Balay   if (fshift && a->diag) {
1935606d414cSSatish Balay     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
19363bb1ff40SBarry Smith     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1937f4259b30SLisandro Dalcin     a->diag = NULL;
1938584200bdSSatish Balay   }
193965e19b50SBarry Smith   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1940d0f46423SBarry Smith   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1941ae15b995SBarry Smith   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1942ae15b995SBarry Smith   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
194326fbe8dcSKarl Rupp 
19448e58a170SBarry Smith   A->info.mallocs    += a->reallocs;
1945e2f3b5e9SSatish Balay   a->reallocs         = 0;
19460e6d2581SBarry Smith   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1947647a6520SHong Zhang   a->rmax             = rmax;
1948cf4441caSHong Zhang 
1949672ba085SHong Zhang   if (!A->structure_only) {
195011e456e1SBarry Smith     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1951672ba085SHong Zhang   }
19523a40ed3dSBarry Smith   PetscFunctionReturn(0);
1953584200bdSSatish Balay }
1954584200bdSSatish Balay 
1955bea157c4SSatish Balay /*
1956bea157c4SSatish Balay    This function returns an array of flags which indicate the locations of contiguous
1957bea157c4SSatish Balay    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1958bea157c4SSatish Balay    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1959bea157c4SSatish Balay    Assume: sizes should be long enough to hold all the values.
1960bea157c4SSatish Balay */
1961c1ac3661SBarry Smith static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1962d9b7c43dSSatish Balay {
1963c1ac3661SBarry Smith   PetscInt  i,j,k,row;
1964ace3abfcSBarry Smith   PetscBool flg;
19653a40ed3dSBarry Smith 
1966433994e6SBarry Smith   PetscFunctionBegin;
1967bea157c4SSatish Balay   for (i=0,j=0; i<n; j++) {
1968bea157c4SSatish Balay     row = idx[i];
1969bea157c4SSatish Balay     if (row%bs!=0) { /* Not the begining of a block */
1970bea157c4SSatish Balay       sizes[j] = 1;
1971bea157c4SSatish Balay       i++;
1972e4fda26cSSatish Balay     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1973bea157c4SSatish Balay       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1974bea157c4SSatish Balay       i++;
1975bea157c4SSatish Balay     } else { /* Begining of the block, so check if the complete block exists */
1976bea157c4SSatish Balay       flg = PETSC_TRUE;
1977bea157c4SSatish Balay       for (k=1; k<bs; k++) {
1978bea157c4SSatish Balay         if (row+k != idx[i+k]) { /* break in the block */
1979bea157c4SSatish Balay           flg = PETSC_FALSE;
1980bea157c4SSatish Balay           break;
1981d9b7c43dSSatish Balay         }
1982bea157c4SSatish Balay       }
1983abc0a331SBarry Smith       if (flg) { /* No break in the bs */
1984bea157c4SSatish Balay         sizes[j] = bs;
1985bea157c4SSatish Balay         i       += bs;
1986bea157c4SSatish Balay       } else {
1987bea157c4SSatish Balay         sizes[j] = 1;
1988bea157c4SSatish Balay         i++;
1989bea157c4SSatish Balay       }
1990bea157c4SSatish Balay     }
1991bea157c4SSatish Balay   }
1992bea157c4SSatish Balay   *bs_max = j;
19933a40ed3dSBarry Smith   PetscFunctionReturn(0);
1994d9b7c43dSSatish Balay }
1995d9b7c43dSSatish Balay 
19962b40b63fSBarry Smith PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1997d9b7c43dSSatish Balay {
1998d9b7c43dSSatish Balay   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
1999dfbe8321SBarry Smith   PetscErrorCode    ierr;
2000f4df32b1SMatthew Knepley   PetscInt          i,j,k,count,*rows;
2001d0f46423SBarry Smith   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
200287828ca2SBarry Smith   PetscScalar       zero = 0.0;
20033f1db9ecSBarry Smith   MatScalar         *aa;
200497b48c8fSBarry Smith   const PetscScalar *xx;
200597b48c8fSBarry Smith   PetscScalar       *bb;
2006d9b7c43dSSatish Balay 
20073a40ed3dSBarry Smith   PetscFunctionBegin;
200897b48c8fSBarry Smith   /* fix right hand side if needed */
200997b48c8fSBarry Smith   if (x && b) {
201097b48c8fSBarry Smith     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
201197b48c8fSBarry Smith     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
201297b48c8fSBarry Smith     for (i=0; i<is_n; i++) {
201397b48c8fSBarry Smith       bb[is_idx[i]] = diag*xx[is_idx[i]];
201497b48c8fSBarry Smith     }
201597b48c8fSBarry Smith     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
201697b48c8fSBarry Smith     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
201797b48c8fSBarry Smith   }
201897b48c8fSBarry Smith 
2019d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
2020bea157c4SSatish Balay   /* allocate memory for rows,sizes */
2021dcca6d9dSJed Brown   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2022bea157c4SSatish Balay 
2023563b5814SBarry Smith   /* copy IS values to rows, and sort them */
202426fbe8dcSKarl Rupp   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2025bea157c4SSatish Balay   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
202697b48c8fSBarry Smith 
2027a9817697SBarry Smith   if (baij->keepnonzeropattern) {
202826fbe8dcSKarl Rupp     for (i=0; i<is_n; i++) sizes[i] = 1;
2029dffd3267SBarry Smith     bs_max          = is_n;
2030dffd3267SBarry Smith   } else {
2031bea157c4SSatish Balay     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2032e56f5c9eSBarry Smith     A->nonzerostate++;
2033dffd3267SBarry Smith   }
2034bea157c4SSatish Balay 
2035bea157c4SSatish Balay   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2036bea157c4SSatish Balay     row = rows[j];
2037e32f2f54SBarry Smith     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2038bea157c4SSatish Balay     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2039b31fbe3bSSatish Balay     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2040a9817697SBarry Smith     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2041d4a378daSJed Brown       if (diag != (PetscScalar)0.0) {
2042bea157c4SSatish Balay         if (baij->ilen[row/bs] > 0) {
2043bea157c4SSatish Balay           baij->ilen[row/bs]       = 1;
2044bea157c4SSatish Balay           baij->j[baij->i[row/bs]] = row/bs;
204526fbe8dcSKarl Rupp 
2046580bdb30SBarry Smith           ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr);
2047a07cd24cSSatish Balay         }
2048563b5814SBarry Smith         /* Now insert all the diagonal values for this bs */
2049bea157c4SSatish Balay         for (k=0; k<bs; k++) {
2050f4df32b1SMatthew Knepley           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2051bea157c4SSatish Balay         }
2052f4df32b1SMatthew Knepley       } else { /* (diag == 0.0) */
2053bea157c4SSatish Balay         baij->ilen[row/bs] = 0;
2054f4df32b1SMatthew Knepley       } /* end (diag == 0.0) */
2055bea157c4SSatish Balay     } else { /* (sizes[i] != bs) */
2056cf9c20a2SJed Brown       if (PetscUnlikelyDebug(sizes[i] != 1)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2057bea157c4SSatish Balay       for (k=0; k<count; k++) {
2058d9b7c43dSSatish Balay         aa[0] =  zero;
2059d9b7c43dSSatish Balay         aa   += bs;
2060d9b7c43dSSatish Balay       }
2061d4a378daSJed Brown       if (diag != (PetscScalar)0.0) {
2062f4df32b1SMatthew Knepley         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2063d9b7c43dSSatish Balay       }
2064d9b7c43dSSatish Balay     }
2065bea157c4SSatish Balay   }
2066bea157c4SSatish Balay 
2067fca92195SBarry Smith   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
20689a8dea36SBarry Smith   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
20693a40ed3dSBarry Smith   PetscFunctionReturn(0);
2070d9b7c43dSSatish Balay }
20711c351548SSatish Balay 
207297b48c8fSBarry Smith PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
207397b48c8fSBarry Smith {
207497b48c8fSBarry Smith   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
207597b48c8fSBarry Smith   PetscErrorCode    ierr;
207697b48c8fSBarry Smith   PetscInt          i,j,k,count;
207797b48c8fSBarry Smith   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
207897b48c8fSBarry Smith   PetscScalar       zero = 0.0;
207997b48c8fSBarry Smith   MatScalar         *aa;
208097b48c8fSBarry Smith   const PetscScalar *xx;
208197b48c8fSBarry Smith   PetscScalar       *bb;
208256777dd2SBarry Smith   PetscBool         *zeroed,vecs = PETSC_FALSE;
208397b48c8fSBarry Smith 
208497b48c8fSBarry Smith   PetscFunctionBegin;
208597b48c8fSBarry Smith   /* fix right hand side if needed */
208697b48c8fSBarry Smith   if (x && b) {
208797b48c8fSBarry Smith     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
208897b48c8fSBarry Smith     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
208956777dd2SBarry Smith     vecs = PETSC_TRUE;
209097b48c8fSBarry Smith   }
209197b48c8fSBarry Smith 
209297b48c8fSBarry Smith   /* zero the columns */
20931795a4d1SJed Brown   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
209497b48c8fSBarry Smith   for (i=0; i<is_n; i++) {
209597b48c8fSBarry Smith     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
209697b48c8fSBarry Smith     zeroed[is_idx[i]] = PETSC_TRUE;
209797b48c8fSBarry Smith   }
209897b48c8fSBarry Smith   for (i=0; i<A->rmap->N; i++) {
209997b48c8fSBarry Smith     if (!zeroed[i]) {
210097b48c8fSBarry Smith       row = i/bs;
210197b48c8fSBarry Smith       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
210297b48c8fSBarry Smith         for (k=0; k<bs; k++) {
210397b48c8fSBarry Smith           col = bs*baij->j[j] + k;
210497b48c8fSBarry Smith           if (zeroed[col]) {
210597b48c8fSBarry Smith             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
210656777dd2SBarry Smith             if (vecs) bb[i] -= aa[0]*xx[col];
210797b48c8fSBarry Smith             aa[0] = 0.0;
210897b48c8fSBarry Smith           }
210997b48c8fSBarry Smith         }
211097b48c8fSBarry Smith       }
211156777dd2SBarry Smith     } else if (vecs) bb[i] = diag*xx[i];
211297b48c8fSBarry Smith   }
211397b48c8fSBarry Smith   ierr = PetscFree(zeroed);CHKERRQ(ierr);
211456777dd2SBarry Smith   if (vecs) {
211556777dd2SBarry Smith     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
211656777dd2SBarry Smith     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
211756777dd2SBarry Smith   }
211897b48c8fSBarry Smith 
211997b48c8fSBarry Smith   /* zero the rows */
212097b48c8fSBarry Smith   for (i=0; i<is_n; i++) {
212197b48c8fSBarry Smith     row   = is_idx[i];
212297b48c8fSBarry Smith     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
212397b48c8fSBarry Smith     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
212497b48c8fSBarry Smith     for (k=0; k<count; k++) {
212597b48c8fSBarry Smith       aa[0] =  zero;
212697b48c8fSBarry Smith       aa   += bs;
212797b48c8fSBarry Smith     }
2128d4a378daSJed Brown     if (diag != (PetscScalar)0.0) {
212997b48c8fSBarry Smith       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
213097b48c8fSBarry Smith     }
213197b48c8fSBarry Smith   }
213297b48c8fSBarry Smith   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
213397b48c8fSBarry Smith   PetscFunctionReturn(0);
213497b48c8fSBarry Smith }
213597b48c8fSBarry Smith 
2136c1ac3661SBarry Smith PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
21372d61bbb3SSatish Balay {
21382d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2139e2ee6c50SBarry Smith   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2140c1ac3661SBarry Smith   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2141d0f46423SBarry Smith   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
21426849ba73SBarry Smith   PetscErrorCode ierr;
2143c1ac3661SBarry Smith   PetscInt       ridx,cidx,bs2=a->bs2;
2144ace3abfcSBarry Smith   PetscBool      roworiented=a->roworiented;
2145d8cdefa3SHong Zhang   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
21462d61bbb3SSatish Balay 
21472d61bbb3SSatish Balay   PetscFunctionBegin;
21482d61bbb3SSatish Balay   for (k=0; k<m; k++) { /* loop over added rows */
2149085a36d4SBarry Smith     row  = im[k];
2150085a36d4SBarry Smith     brow = row/bs;
21515ef9f2a5SBarry Smith     if (row < 0) continue;
2152cf9c20a2SJed Brown     if (PetscUnlikelyDebug(row >= A->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
21532d61bbb3SSatish Balay     rp   = aj + ai[brow];
2154672ba085SHong Zhang     if (!A->structure_only) ap = aa + bs2*ai[brow];
21552d61bbb3SSatish Balay     rmax = imax[brow];
21562d61bbb3SSatish Balay     nrow = ailen[brow];
21572d61bbb3SSatish Balay     low  = 0;
2158c71e6ed7SBarry Smith     high = nrow;
21592d61bbb3SSatish Balay     for (l=0; l<n; l++) { /* loop over added columns */
21605ef9f2a5SBarry Smith       if (in[l] < 0) continue;
2161cf9c20a2SJed Brown       if (PetscUnlikelyDebug(in[l] >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
21622d61bbb3SSatish Balay       col  = in[l]; bcol = col/bs;
21632d61bbb3SSatish Balay       ridx = row % bs; cidx = col % bs;
2164672ba085SHong Zhang       if (!A->structure_only) {
21652d61bbb3SSatish Balay         if (roworiented) {
21665ef9f2a5SBarry Smith           value = v[l + k*n];
21672d61bbb3SSatish Balay         } else {
21682d61bbb3SSatish Balay           value = v[k + l*m];
21692d61bbb3SSatish Balay         }
2170672ba085SHong Zhang       }
21717cd84e04SBarry Smith       if (col <= lastcol) low = 0; else high = nrow;
2172e2ee6c50SBarry Smith       lastcol = col;
21732d61bbb3SSatish Balay       while (high-low > 7) {
21742d61bbb3SSatish Balay         t = (low+high)/2;
21752d61bbb3SSatish Balay         if (rp[t] > bcol) high = t;
21762d61bbb3SSatish Balay         else              low  = t;
21772d61bbb3SSatish Balay       }
21782d61bbb3SSatish Balay       for (i=low; i<high; i++) {
21792d61bbb3SSatish Balay         if (rp[i] > bcol) break;
21802d61bbb3SSatish Balay         if (rp[i] == bcol) {
21812d61bbb3SSatish Balay           bap = ap +  bs2*i + bs*cidx + ridx;
2182672ba085SHong Zhang           if (!A->structure_only) {
21832d61bbb3SSatish Balay             if (is == ADD_VALUES) *bap += value;
21842d61bbb3SSatish Balay             else                  *bap  = value;
2185672ba085SHong Zhang           }
21862d61bbb3SSatish Balay           goto noinsert1;
21872d61bbb3SSatish Balay         }
21882d61bbb3SSatish Balay       }
21892d61bbb3SSatish Balay       if (nonew == 1) goto noinsert1;
2190e32f2f54SBarry Smith       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2191672ba085SHong Zhang       if (A->structure_only) {
2192672ba085SHong Zhang         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2193672ba085SHong Zhang       } else {
2194fef13f97SBarry Smith         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2195672ba085SHong Zhang       }
2196c03d1d03SSatish Balay       N = nrow++ - 1; high++;
21972d61bbb3SSatish Balay       /* shift up all the later entries in this row */
2198580bdb30SBarry Smith       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
21992d61bbb3SSatish Balay       rp[i] = bcol;
2200580bdb30SBarry Smith       if (!A->structure_only) {
2201580bdb30SBarry Smith         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
2202580bdb30SBarry Smith         ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr);
2203580bdb30SBarry Smith         ap[bs2*i + bs*cidx + ridx] = value;
2204580bdb30SBarry Smith       }
2205085a36d4SBarry Smith       a->nz++;
2206e56f5c9eSBarry Smith       A->nonzerostate++;
22072d61bbb3SSatish Balay noinsert1:;
22082d61bbb3SSatish Balay       low = i;
22092d61bbb3SSatish Balay     }
22102d61bbb3SSatish Balay     ailen[brow] = nrow;
22112d61bbb3SSatish Balay   }
22122d61bbb3SSatish Balay   PetscFunctionReturn(0);
22132d61bbb3SSatish Balay }
22142d61bbb3SSatish Balay 
22150481f469SBarry Smith PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
22162d61bbb3SSatish Balay {
22172d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
22182d61bbb3SSatish Balay   Mat            outA;
2219dfbe8321SBarry Smith   PetscErrorCode ierr;
2220ace3abfcSBarry Smith   PetscBool      row_identity,col_identity;
22212d61bbb3SSatish Balay 
22222d61bbb3SSatish Balay   PetscFunctionBegin;
2223e32f2f54SBarry Smith   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2224667159a5SBarry Smith   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2225667159a5SBarry Smith   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2226f23aa3ddSBarry Smith   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
22272d61bbb3SSatish Balay 
22282d61bbb3SSatish Balay   outA            = inA;
2229d5f3da31SBarry Smith   inA->factortype = MAT_FACTOR_LU;
2230f6224b95SHong Zhang   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2231f6224b95SHong Zhang   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
22322d61bbb3SSatish Balay 
2233c4992f7dSBarry Smith   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2234cf242676SKris Buschelman 
2235c38d4ed2SBarry Smith   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
22366bf464f9SBarry Smith   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2237c3122656SLisandro Dalcin   a->row = row;
2238c38d4ed2SBarry Smith   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
22396bf464f9SBarry Smith   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2240c3122656SLisandro Dalcin   a->col = col;
2241c38d4ed2SBarry Smith 
2242c38d4ed2SBarry Smith   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
22436bf464f9SBarry Smith   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
22444c49b128SBarry Smith   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
22453bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2246c38d4ed2SBarry Smith 
2247ace3abfcSBarry Smith   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2248c38d4ed2SBarry Smith   if (!a->solve_work) {
2249854ce69bSBarry Smith     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
22503bb1ff40SBarry Smith     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2251c38d4ed2SBarry Smith   }
2252719d5645SBarry Smith   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
22532d61bbb3SSatish Balay   PetscFunctionReturn(0);
22542d61bbb3SSatish Balay }
2255d9b7c43dSSatish Balay 
22567087cfbeSBarry Smith PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
225727a8da17SBarry Smith {
225827a8da17SBarry Smith   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2259bdb1c0e1SJed Brown   PetscInt    i,nz,mbs;
226027a8da17SBarry Smith 
226127a8da17SBarry Smith   PetscFunctionBegin;
2262b32cb4a7SJed Brown   nz  = baij->maxnz;
2263bdb1c0e1SJed Brown   mbs = baij->mbs;
226427a8da17SBarry Smith   for (i=0; i<nz; i++) {
226527a8da17SBarry Smith     baij->j[i] = indices[i];
226627a8da17SBarry Smith   }
226727a8da17SBarry Smith   baij->nz = nz;
2268bdb1c0e1SJed Brown   for (i=0; i<mbs; i++) {
226927a8da17SBarry Smith     baij->ilen[i] = baij->imax[i];
227027a8da17SBarry Smith   }
227127a8da17SBarry Smith   PetscFunctionReturn(0);
227227a8da17SBarry Smith }
227327a8da17SBarry Smith 
227427a8da17SBarry Smith /*@
227527a8da17SBarry Smith     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
227627a8da17SBarry Smith        in the matrix.
227727a8da17SBarry Smith 
227827a8da17SBarry Smith   Input Parameters:
227927a8da17SBarry Smith +  mat - the SeqBAIJ matrix
228027a8da17SBarry Smith -  indices - the column indices
228127a8da17SBarry Smith 
228215091d37SBarry Smith   Level: advanced
228315091d37SBarry Smith 
228427a8da17SBarry Smith   Notes:
228527a8da17SBarry Smith     This can be called if you have precomputed the nonzero structure of the
228627a8da17SBarry Smith   matrix and want to provide it to the matrix object to improve the performance
228727a8da17SBarry Smith   of the MatSetValues() operation.
228827a8da17SBarry Smith 
228927a8da17SBarry Smith     You MUST have set the correct numbers of nonzeros per row in the call to
2290d1be2dadSMatthew Knepley   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
229127a8da17SBarry Smith 
229227a8da17SBarry Smith     MUST be called before any calls to MatSetValues();
229327a8da17SBarry Smith 
229427a8da17SBarry Smith @*/
22957087cfbeSBarry Smith PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
229627a8da17SBarry Smith {
22974ac538c5SBarry Smith   PetscErrorCode ierr;
229827a8da17SBarry Smith 
229927a8da17SBarry Smith   PetscFunctionBegin;
23000700a824SBarry Smith   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
23014482741eSBarry Smith   PetscValidPointer(indices,2);
23024ac538c5SBarry Smith   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
230327a8da17SBarry Smith   PetscFunctionReturn(0);
230427a8da17SBarry Smith }
230527a8da17SBarry Smith 
2306985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2307273d9f13SBarry Smith {
2308273d9f13SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2309dfbe8321SBarry Smith   PetscErrorCode ierr;
2310c1ac3661SBarry Smith   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2311273d9f13SBarry Smith   PetscReal      atmp;
231287828ca2SBarry Smith   PetscScalar    *x,zero = 0.0;
2313273d9f13SBarry Smith   MatScalar      *aa;
2314c1ac3661SBarry Smith   PetscInt       ncols,brow,krow,kcol;
2315273d9f13SBarry Smith 
2316273d9f13SBarry Smith   PetscFunctionBegin;
2317e32f2f54SBarry Smith   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2318d0f46423SBarry Smith   bs  = A->rmap->bs;
2319273d9f13SBarry Smith   aa  = a->a;
2320273d9f13SBarry Smith   ai  = a->i;
2321273d9f13SBarry Smith   aj  = a->j;
2322273d9f13SBarry Smith   mbs = a->mbs;
2323273d9f13SBarry Smith 
23242dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
23251ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2326273d9f13SBarry Smith   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2327e32f2f54SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2328273d9f13SBarry Smith   for (i=0; i<mbs; i++) {
2329273d9f13SBarry Smith     ncols = ai[1] - ai[0]; ai++;
2330273d9f13SBarry Smith     brow  = bs*i;
2331273d9f13SBarry Smith     for (j=0; j<ncols; j++) {
2332273d9f13SBarry Smith       for (kcol=0; kcol<bs; kcol++) {
2333273d9f13SBarry Smith         for (krow=0; krow<bs; krow++) {
2334273d9f13SBarry Smith           atmp = PetscAbsScalar(*aa);aa++;
2335273d9f13SBarry Smith           row  = brow + krow;   /* row index */
2336985db425SBarry Smith           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2337273d9f13SBarry Smith         }
2338273d9f13SBarry Smith       }
2339273d9f13SBarry Smith       aj++;
2340273d9f13SBarry Smith     }
2341273d9f13SBarry Smith   }
23421ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2343273d9f13SBarry Smith   PetscFunctionReturn(0);
2344273d9f13SBarry Smith }
2345273d9f13SBarry Smith 
23463c896bc6SHong Zhang PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
23473c896bc6SHong Zhang {
23483c896bc6SHong Zhang   PetscErrorCode ierr;
23493c896bc6SHong Zhang 
23503c896bc6SHong Zhang   PetscFunctionBegin;
23513c896bc6SHong Zhang   /* If the two matrices have the same copy implementation, use fast copy. */
23523c896bc6SHong Zhang   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
23533c896bc6SHong Zhang     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
23543c896bc6SHong Zhang     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2355d88c0aacSHong Zhang     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
23563c896bc6SHong Zhang 
2357d88c0aacSHong Zhang     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2358d88c0aacSHong Zhang     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2359580bdb30SBarry Smith     ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr);
2360cdc753b6SBarry Smith     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
23613c896bc6SHong Zhang   } else {
23623c896bc6SHong Zhang     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
23633c896bc6SHong Zhang   }
23643c896bc6SHong Zhang   PetscFunctionReturn(0);
23653c896bc6SHong Zhang }
23663c896bc6SHong Zhang 
23674994cf47SJed Brown PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2368273d9f13SBarry Smith {
2369dfbe8321SBarry Smith   PetscErrorCode ierr;
2370273d9f13SBarry Smith 
2371273d9f13SBarry Smith   PetscFunctionBegin;
2372f4259b30SLisandro Dalcin   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2373273d9f13SBarry Smith   PetscFunctionReturn(0);
2374273d9f13SBarry Smith }
2375273d9f13SBarry Smith 
2376cda14afcSprj- static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2377f2a5309cSSatish Balay {
2378f2a5309cSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
23796e111a19SKarl Rupp 
2380f2a5309cSSatish Balay   PetscFunctionBegin;
2381f2a5309cSSatish Balay   *array = a->a;
2382f2a5309cSSatish Balay   PetscFunctionReturn(0);
2383f2a5309cSSatish Balay }
2384f2a5309cSSatish Balay 
2385cda14afcSprj- static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2386f2a5309cSSatish Balay {
2387f2a5309cSSatish Balay   PetscFunctionBegin;
2388cda14afcSprj-   *array = NULL;
2389f2a5309cSSatish Balay   PetscFunctionReturn(0);
2390f2a5309cSSatish Balay }
2391f2a5309cSSatish Balay 
239252768537SHong Zhang PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
239352768537SHong Zhang {
2394b264fe52SHong Zhang   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
239552768537SHong Zhang   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
239652768537SHong Zhang   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2397b264fe52SHong Zhang   PetscErrorCode ierr;
239852768537SHong Zhang 
239952768537SHong Zhang   PetscFunctionBegin;
240052768537SHong Zhang   /* Set the number of nonzeros in the new matrix */
2401b264fe52SHong Zhang   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
240252768537SHong Zhang   PetscFunctionReturn(0);
240352768537SHong Zhang }
240452768537SHong Zhang 
2405f4df32b1SMatthew Knepley PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
240642ee4b1aSHong Zhang {
240742ee4b1aSHong Zhang   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2408dfbe8321SBarry Smith   PetscErrorCode ierr;
240931ce2d13SHong Zhang   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2410e838b9e7SJed Brown   PetscBLASInt   one=1;
241142ee4b1aSHong Zhang 
241242ee4b1aSHong Zhang   PetscFunctionBegin;
241342ee4b1aSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
2414f4df32b1SMatthew Knepley     PetscScalar  alpha = a;
2415c5df96a5SBarry Smith     PetscBLASInt bnz;
2416c5df96a5SBarry Smith     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
24178b83055fSJed Brown     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2418a3fa217bSJose E. Roman     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2419ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2420ab784542SHong Zhang     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
242142ee4b1aSHong Zhang   } else {
242252768537SHong Zhang     Mat      B;
242352768537SHong Zhang     PetscInt *nnz;
242452768537SHong Zhang     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
242552768537SHong Zhang     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
242652768537SHong Zhang     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
242752768537SHong Zhang     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
242852768537SHong Zhang     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
242952768537SHong Zhang     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
243052768537SHong Zhang     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
243152768537SHong Zhang     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
243252768537SHong Zhang     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
243352768537SHong Zhang     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
243428be2f97SBarry Smith     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
243552768537SHong Zhang     ierr = PetscFree(nnz);CHKERRQ(ierr);
243642ee4b1aSHong Zhang   }
243742ee4b1aSHong Zhang   PetscFunctionReturn(0);
243842ee4b1aSHong Zhang }
243942ee4b1aSHong Zhang 
24402726fb6dSPierre Jolivet PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
24412726fb6dSPierre Jolivet {
24422726fb6dSPierre Jolivet #if defined(PETSC_USE_COMPLEX)
24432726fb6dSPierre Jolivet   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
24442726fb6dSPierre Jolivet   PetscInt    i,nz = a->bs2*a->i[a->mbs];
24452726fb6dSPierre Jolivet   MatScalar   *aa = a->a;
24462726fb6dSPierre Jolivet 
24472726fb6dSPierre Jolivet   PetscFunctionBegin;
24482726fb6dSPierre Jolivet   for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]);
24492726fb6dSPierre Jolivet #else
24502726fb6dSPierre Jolivet   PetscFunctionBegin;
24512726fb6dSPierre Jolivet #endif
24522726fb6dSPierre Jolivet   PetscFunctionReturn(0);
24532726fb6dSPierre Jolivet }
24542726fb6dSPierre Jolivet 
245599cafbc1SBarry Smith PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
245699cafbc1SBarry Smith {
245799cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
245899cafbc1SBarry Smith   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2459dd6ea824SBarry Smith   MatScalar   *aa = a->a;
246099cafbc1SBarry Smith 
246199cafbc1SBarry Smith   PetscFunctionBegin;
246299cafbc1SBarry Smith   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
246399cafbc1SBarry Smith   PetscFunctionReturn(0);
246499cafbc1SBarry Smith }
246599cafbc1SBarry Smith 
246699cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
246799cafbc1SBarry Smith {
246899cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
246999cafbc1SBarry Smith   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2470dd6ea824SBarry Smith   MatScalar   *aa = a->a;
247199cafbc1SBarry Smith 
247299cafbc1SBarry Smith   PetscFunctionBegin;
247399cafbc1SBarry Smith   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
247499cafbc1SBarry Smith   PetscFunctionReturn(0);
247599cafbc1SBarry Smith }
247699cafbc1SBarry Smith 
24773acb8795SBarry Smith /*
24782479783cSJose E. Roman     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
24793acb8795SBarry Smith */
24801a83f524SJed Brown PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
24813acb8795SBarry Smith {
24823acb8795SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
24833acb8795SBarry Smith   PetscErrorCode ierr;
24843acb8795SBarry Smith   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
24853acb8795SBarry Smith   PetscInt       nz = a->i[m],row,*jj,mr,col;
24863acb8795SBarry Smith 
24873acb8795SBarry Smith   PetscFunctionBegin;
24883acb8795SBarry Smith   *nn = n;
24893acb8795SBarry Smith   if (!ia) PetscFunctionReturn(0);
2490e7e72b3dSBarry Smith   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2491e7e72b3dSBarry Smith   else {
2492b9e7e5c1SBarry Smith     ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2493854ce69bSBarry Smith     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2494b9e7e5c1SBarry Smith     ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
24953acb8795SBarry Smith     jj   = a->j;
24963acb8795SBarry Smith     for (i=0; i<nz; i++) {
24973acb8795SBarry Smith       collengths[jj[i]]++;
24983acb8795SBarry Smith     }
24993acb8795SBarry Smith     cia[0] = oshift;
25003acb8795SBarry Smith     for (i=0; i<n; i++) {
25013acb8795SBarry Smith       cia[i+1] = cia[i] + collengths[i];
25023acb8795SBarry Smith     }
2503580bdb30SBarry Smith     ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
25043acb8795SBarry Smith     jj   = a->j;
25053acb8795SBarry Smith     for (row=0; row<m; row++) {
25063acb8795SBarry Smith       mr = a->i[row+1] - a->i[row];
25073acb8795SBarry Smith       for (i=0; i<mr; i++) {
25083acb8795SBarry Smith         col = *jj++;
250926fbe8dcSKarl Rupp 
25103acb8795SBarry Smith         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
25113acb8795SBarry Smith       }
25123acb8795SBarry Smith     }
25133acb8795SBarry Smith     ierr = PetscFree(collengths);CHKERRQ(ierr);
25143acb8795SBarry Smith     *ia  = cia; *ja = cja;
25153acb8795SBarry Smith   }
25163acb8795SBarry Smith   PetscFunctionReturn(0);
25173acb8795SBarry Smith }
25183acb8795SBarry Smith 
25191a83f524SJed Brown PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
25203acb8795SBarry Smith {
25213acb8795SBarry Smith   PetscErrorCode ierr;
25223acb8795SBarry Smith 
25233acb8795SBarry Smith   PetscFunctionBegin;
25243acb8795SBarry Smith   if (!ia) PetscFunctionReturn(0);
25253acb8795SBarry Smith   ierr = PetscFree(*ia);CHKERRQ(ierr);
25263acb8795SBarry Smith   ierr = PetscFree(*ja);CHKERRQ(ierr);
25273acb8795SBarry Smith   PetscFunctionReturn(0);
25283acb8795SBarry Smith }
25293acb8795SBarry Smith 
2530525d23c0SHong Zhang /*
2531525d23c0SHong Zhang  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2532525d23c0SHong Zhang  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2533040ebd07SHong Zhang  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2534525d23c0SHong Zhang  */
2535525d23c0SHong Zhang PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2536f6d58c54SBarry Smith {
2537525d23c0SHong Zhang   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2538f6d58c54SBarry Smith   PetscErrorCode ierr;
2539c0349474SHong Zhang   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2540525d23c0SHong Zhang   PetscInt       nz = a->i[m],row,*jj,mr,col;
2541525d23c0SHong Zhang   PetscInt       *cspidx;
2542f6d58c54SBarry Smith 
2543f6d58c54SBarry Smith   PetscFunctionBegin;
2544525d23c0SHong Zhang   *nn = n;
2545525d23c0SHong Zhang   if (!ia) PetscFunctionReturn(0);
2546f6d58c54SBarry Smith 
2547b9e7e5c1SBarry Smith   ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2548854ce69bSBarry Smith   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2549b9e7e5c1SBarry Smith   ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2550b9e7e5c1SBarry Smith   ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr);
2551525d23c0SHong Zhang   jj   = a->j;
2552525d23c0SHong Zhang   for (i=0; i<nz; i++) {
2553525d23c0SHong Zhang     collengths[jj[i]]++;
2554f6d58c54SBarry Smith   }
2555525d23c0SHong Zhang   cia[0] = oshift;
2556525d23c0SHong Zhang   for (i=0; i<n; i++) {
2557525d23c0SHong Zhang     cia[i+1] = cia[i] + collengths[i];
2558525d23c0SHong Zhang   }
2559580bdb30SBarry Smith   ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2560525d23c0SHong Zhang   jj   = a->j;
2561525d23c0SHong Zhang   for (row=0; row<m; row++) {
2562525d23c0SHong Zhang     mr = a->i[row+1] - a->i[row];
2563525d23c0SHong Zhang     for (i=0; i<mr; i++) {
2564525d23c0SHong Zhang       col = *jj++;
2565525d23c0SHong Zhang       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2566525d23c0SHong Zhang       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2567525d23c0SHong Zhang     }
2568525d23c0SHong Zhang   }
2569525d23c0SHong Zhang   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2570071fcb05SBarry Smith   *ia    = cia;
2571071fcb05SBarry Smith   *ja    = cja;
2572525d23c0SHong Zhang   *spidx = cspidx;
2573525d23c0SHong Zhang   PetscFunctionReturn(0);
2574f6d58c54SBarry Smith }
2575f6d58c54SBarry Smith 
2576525d23c0SHong Zhang PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2577525d23c0SHong Zhang {
2578525d23c0SHong Zhang   PetscErrorCode ierr;
2579f6d58c54SBarry Smith 
2580525d23c0SHong Zhang   PetscFunctionBegin;
2581525d23c0SHong Zhang   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2582525d23c0SHong Zhang   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2583f6d58c54SBarry Smith   PetscFunctionReturn(0);
2584f6d58c54SBarry Smith }
258599cafbc1SBarry Smith 
25867d68702bSBarry Smith PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
25877d68702bSBarry Smith {
25887d68702bSBarry Smith   PetscErrorCode ierr;
25897d68702bSBarry Smith   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
25907d68702bSBarry Smith 
25917d68702bSBarry Smith   PetscFunctionBegin;
25926f33a894SBarry Smith   if (!Y->preallocated || !aij->nz) {
25937d68702bSBarry Smith     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
25947d68702bSBarry Smith   }
25957d68702bSBarry Smith   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
25967d68702bSBarry Smith   PetscFunctionReturn(0);
25977d68702bSBarry Smith }
25987d68702bSBarry Smith 
25992593348eSBarry Smith /* -------------------------------------------------------------------*/
26003964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2601cc2dc46cSBarry Smith                                        MatGetRow_SeqBAIJ,
2602cc2dc46cSBarry Smith                                        MatRestoreRow_SeqBAIJ,
2603cc2dc46cSBarry Smith                                        MatMult_SeqBAIJ_N,
260497304618SKris Buschelman                                /* 4*/  MatMultAdd_SeqBAIJ_N,
26057c922b88SBarry Smith                                        MatMultTranspose_SeqBAIJ,
26067c922b88SBarry Smith                                        MatMultTransposeAdd_SeqBAIJ,
2607f4259b30SLisandro Dalcin                                        NULL,
2608f4259b30SLisandro Dalcin                                        NULL,
2609f4259b30SLisandro Dalcin                                        NULL,
2610f4259b30SLisandro Dalcin                                /* 10*/ NULL,
2611cc2dc46cSBarry Smith                                        MatLUFactor_SeqBAIJ,
2612f4259b30SLisandro Dalcin                                        NULL,
2613f4259b30SLisandro Dalcin                                        NULL,
2614f2501298SSatish Balay                                        MatTranspose_SeqBAIJ,
261597304618SKris Buschelman                                /* 15*/ MatGetInfo_SeqBAIJ,
2616cc2dc46cSBarry Smith                                        MatEqual_SeqBAIJ,
2617cc2dc46cSBarry Smith                                        MatGetDiagonal_SeqBAIJ,
2618cc2dc46cSBarry Smith                                        MatDiagonalScale_SeqBAIJ,
2619cc2dc46cSBarry Smith                                        MatNorm_SeqBAIJ,
2620f4259b30SLisandro Dalcin                                /* 20*/ NULL,
2621cc2dc46cSBarry Smith                                        MatAssemblyEnd_SeqBAIJ,
2622cc2dc46cSBarry Smith                                        MatSetOption_SeqBAIJ,
2623cc2dc46cSBarry Smith                                        MatZeroEntries_SeqBAIJ,
2624d519adbfSMatthew Knepley                                /* 24*/ MatZeroRows_SeqBAIJ,
2625f4259b30SLisandro Dalcin                                        NULL,
2626f4259b30SLisandro Dalcin                                        NULL,
2627f4259b30SLisandro Dalcin                                        NULL,
2628f4259b30SLisandro Dalcin                                        NULL,
26294994cf47SJed Brown                                /* 29*/ MatSetUp_SeqBAIJ,
2630f4259b30SLisandro Dalcin                                        NULL,
2631f4259b30SLisandro Dalcin                                        NULL,
2632f4259b30SLisandro Dalcin                                        NULL,
2633f4259b30SLisandro Dalcin                                        NULL,
2634d519adbfSMatthew Knepley                                /* 34*/ MatDuplicate_SeqBAIJ,
2635f4259b30SLisandro Dalcin                                        NULL,
2636f4259b30SLisandro Dalcin                                        NULL,
2637cc2dc46cSBarry Smith                                        MatILUFactor_SeqBAIJ,
2638f4259b30SLisandro Dalcin                                        NULL,
2639d519adbfSMatthew Knepley                                /* 39*/ MatAXPY_SeqBAIJ,
26407dae84e0SHong Zhang                                        MatCreateSubMatrices_SeqBAIJ,
2641cc2dc46cSBarry Smith                                        MatIncreaseOverlap_SeqBAIJ,
2642cc2dc46cSBarry Smith                                        MatGetValues_SeqBAIJ,
26433c896bc6SHong Zhang                                        MatCopy_SeqBAIJ,
2644f4259b30SLisandro Dalcin                                /* 44*/ NULL,
2645cc2dc46cSBarry Smith                                        MatScale_SeqBAIJ,
26467d68702bSBarry Smith                                        MatShift_SeqBAIJ,
2647f4259b30SLisandro Dalcin                                        NULL,
264897b48c8fSBarry Smith                                        MatZeroRowsColumns_SeqBAIJ,
2649f4259b30SLisandro Dalcin                                /* 49*/ NULL,
26503b2fbd54SBarry Smith                                        MatGetRowIJ_SeqBAIJ,
265192c4ed94SBarry Smith                                        MatRestoreRowIJ_SeqBAIJ,
26523acb8795SBarry Smith                                        MatGetColumnIJ_SeqBAIJ,
26533acb8795SBarry Smith                                        MatRestoreColumnIJ_SeqBAIJ,
265493dfae19SHong Zhang                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2655f4259b30SLisandro Dalcin                                        NULL,
2656f4259b30SLisandro Dalcin                                        NULL,
2657090001bdSToby Isaac                                        NULL,
2658d3825aa8SBarry Smith                                        MatSetValuesBlocked_SeqBAIJ,
26597dae84e0SHong Zhang                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2660b9b97703SBarry Smith                                        MatDestroy_SeqBAIJ,
2661b9b97703SBarry Smith                                        MatView_SeqBAIJ,
2662f4259b30SLisandro Dalcin                                        NULL,
2663f4259b30SLisandro Dalcin                                        NULL,
2664f4259b30SLisandro Dalcin                                /* 64*/ NULL,
2665f4259b30SLisandro Dalcin                                        NULL,
2666f4259b30SLisandro Dalcin                                        NULL,
2667f4259b30SLisandro Dalcin                                        NULL,
2668f4259b30SLisandro Dalcin                                        NULL,
2669d519adbfSMatthew Knepley                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2670f4259b30SLisandro Dalcin                                        NULL,
2671c87e5d42SMatthew Knepley                                        MatConvert_Basic,
2672f4259b30SLisandro Dalcin                                        NULL,
2673f4259b30SLisandro Dalcin                                        NULL,
2674f4259b30SLisandro Dalcin                                /* 74*/ NULL,
2675f6d58c54SBarry Smith                                        MatFDColoringApply_BAIJ,
2676f4259b30SLisandro Dalcin                                        NULL,
2677f4259b30SLisandro Dalcin                                        NULL,
2678f4259b30SLisandro Dalcin                                        NULL,
2679f4259b30SLisandro Dalcin                                /* 79*/ NULL,
2680f4259b30SLisandro Dalcin                                        NULL,
2681f4259b30SLisandro Dalcin                                        NULL,
2682f4259b30SLisandro Dalcin                                        NULL,
26835bba2384SShri Abhyankar                                        MatLoad_SeqBAIJ,
2684f4259b30SLisandro Dalcin                                /* 84*/ NULL,
2685f4259b30SLisandro Dalcin                                        NULL,
2686f4259b30SLisandro Dalcin                                        NULL,
2687f4259b30SLisandro Dalcin                                        NULL,
2688f4259b30SLisandro Dalcin                                        NULL,
2689f4259b30SLisandro Dalcin                                /* 89*/ NULL,
2690f4259b30SLisandro Dalcin                                        NULL,
2691f4259b30SLisandro Dalcin                                        NULL,
2692f4259b30SLisandro Dalcin                                        NULL,
2693f4259b30SLisandro Dalcin                                        NULL,
2694f4259b30SLisandro Dalcin                                /* 94*/ NULL,
2695f4259b30SLisandro Dalcin                                        NULL,
2696f4259b30SLisandro Dalcin                                        NULL,
2697f4259b30SLisandro Dalcin                                        NULL,
2698f4259b30SLisandro Dalcin                                        NULL,
2699f4259b30SLisandro Dalcin                                /* 99*/ NULL,
2700f4259b30SLisandro Dalcin                                        NULL,
2701f4259b30SLisandro Dalcin                                        NULL,
27022726fb6dSPierre Jolivet                                        MatConjugate_SeqBAIJ,
2703f4259b30SLisandro Dalcin                                        NULL,
2704f4259b30SLisandro Dalcin                                /*104*/ NULL,
270599cafbc1SBarry Smith                                        MatRealPart_SeqBAIJ,
27062af78befSBarry Smith                                        MatImaginaryPart_SeqBAIJ,
2707f4259b30SLisandro Dalcin                                        NULL,
2708f4259b30SLisandro Dalcin                                        NULL,
2709f4259b30SLisandro Dalcin                                /*109*/ NULL,
2710f4259b30SLisandro Dalcin                                        NULL,
2711f4259b30SLisandro Dalcin                                        NULL,
2712f4259b30SLisandro Dalcin                                        NULL,
2713547795f9SHong Zhang                                        MatMissingDiagonal_SeqBAIJ,
2714f4259b30SLisandro Dalcin                                /*114*/ NULL,
2715f4259b30SLisandro Dalcin                                        NULL,
2716f4259b30SLisandro Dalcin                                        NULL,
2717f4259b30SLisandro Dalcin                                        NULL,
2718f4259b30SLisandro Dalcin                                        NULL,
2719f4259b30SLisandro Dalcin                                /*119*/ NULL,
2720f4259b30SLisandro Dalcin                                        NULL,
2721547795f9SHong Zhang                                        MatMultHermitianTranspose_SeqBAIJ,
2722d6037b41SHong Zhang                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2723f4259b30SLisandro Dalcin                                        NULL,
2724f4259b30SLisandro Dalcin                                /*124*/ NULL,
2725f4259b30SLisandro Dalcin                                        NULL,
27263964eb88SJed Brown                                        MatInvertBlockDiagonal_SeqBAIJ,
2727f4259b30SLisandro Dalcin                                        NULL,
2728f4259b30SLisandro Dalcin                                        NULL,
2729f4259b30SLisandro Dalcin                                /*129*/ NULL,
2730f4259b30SLisandro Dalcin                                        NULL,
2731f4259b30SLisandro Dalcin                                        NULL,
2732f4259b30SLisandro Dalcin                                        NULL,
2733f4259b30SLisandro Dalcin                                        NULL,
2734f4259b30SLisandro Dalcin                                /*134*/ NULL,
2735f4259b30SLisandro Dalcin                                        NULL,
2736f4259b30SLisandro Dalcin                                        NULL,
2737f4259b30SLisandro Dalcin                                        NULL,
2738f4259b30SLisandro Dalcin                                        NULL,
273946533700Sstefano_zampini                                /*139*/ MatSetBlockSizes_Default,
2740f4259b30SLisandro Dalcin                                        NULL,
2741f4259b30SLisandro Dalcin                                        NULL,
2742bdf6f3fcSHong Zhang                                        MatFDColoringSetUp_SeqXAIJ,
2743f4259b30SLisandro Dalcin                                        NULL,
274486e85357SHong Zhang                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
274586e85357SHong Zhang                                        MatDestroySubMatrices_SeqBAIJ
274699cafbc1SBarry Smith };
27472593348eSBarry Smith 
27487087cfbeSBarry Smith PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
27493e90b805SBarry Smith {
27503e90b805SBarry Smith   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
27518ece6314SShri Abhyankar   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2752dfbe8321SBarry Smith   PetscErrorCode ierr;
27533e90b805SBarry Smith 
27543e90b805SBarry Smith   PetscFunctionBegin;
2755e7e72b3dSBarry Smith   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
27563e90b805SBarry Smith 
27573e90b805SBarry Smith   /* allocate space for values if not already there */
27583e90b805SBarry Smith   if (!aij->saved_values) {
2759854ce69bSBarry Smith     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
27603bb1ff40SBarry Smith     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
27613e90b805SBarry Smith   }
27623e90b805SBarry Smith 
27633e90b805SBarry Smith   /* copy values over */
2764580bdb30SBarry Smith   ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr);
27653e90b805SBarry Smith   PetscFunctionReturn(0);
27663e90b805SBarry Smith }
27673e90b805SBarry Smith 
27687087cfbeSBarry Smith PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
27693e90b805SBarry Smith {
27703e90b805SBarry Smith   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
27716849ba73SBarry Smith   PetscErrorCode ierr;
27728ece6314SShri Abhyankar   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
27733e90b805SBarry Smith 
27743e90b805SBarry Smith   PetscFunctionBegin;
2775e7e72b3dSBarry Smith   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2776e7e72b3dSBarry Smith   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
27773e90b805SBarry Smith 
27783e90b805SBarry Smith   /* copy values over */
2779580bdb30SBarry Smith   ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr);
27803e90b805SBarry Smith   PetscFunctionReturn(0);
27813e90b805SBarry Smith }
27823e90b805SBarry Smith 
2783cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2784cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2785273d9f13SBarry Smith 
2786b5b72c8aSIrina Sokolova PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2787a23d5eceSKris Buschelman {
2788a23d5eceSKris Buschelman   Mat_SeqBAIJ    *b;
27896849ba73SBarry Smith   PetscErrorCode ierr;
2790535b19f3SBarry Smith   PetscInt       i,mbs,nbs,bs2;
27918afaa268SBarry Smith   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2792a23d5eceSKris Buschelman 
2793a23d5eceSKris Buschelman   PetscFunctionBegin;
27942576faa2SJed Brown   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2795ab93d7beSBarry Smith   if (nz == MAT_SKIP_ALLOCATION) {
2796ab93d7beSBarry Smith     skipallocation = PETSC_TRUE;
2797ab93d7beSBarry Smith     nz             = 0;
2798ab93d7beSBarry Smith   }
27998c07d4e3SBarry Smith 
280033d57670SJed Brown   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
280126283091SBarry Smith   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
280226283091SBarry Smith   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2803e02043d6SBarry Smith   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2804899cda47SBarry Smith 
2805899cda47SBarry Smith   B->preallocated = PETSC_TRUE;
2806899cda47SBarry Smith 
2807d0f46423SBarry Smith   mbs = B->rmap->n/bs;
2808d0f46423SBarry Smith   nbs = B->cmap->n/bs;
2809a23d5eceSKris Buschelman   bs2 = bs*bs;
2810a23d5eceSKris Buschelman 
281165e19b50SBarry Smith   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2812a23d5eceSKris Buschelman 
2813a23d5eceSKris Buschelman   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2814e32f2f54SBarry Smith   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2815a23d5eceSKris Buschelman   if (nnz) {
2816a23d5eceSKris Buschelman     for (i=0; i<mbs; i++) {
2817e32f2f54SBarry Smith       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2818e32f2f54SBarry Smith       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2819a23d5eceSKris Buschelman     }
2820a23d5eceSKris Buschelman   }
2821a23d5eceSKris Buschelman 
2822a23d5eceSKris Buschelman   b    = (Mat_SeqBAIJ*)B->data;
2823ce94432eSBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
28248afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
28258c07d4e3SBarry Smith   ierr = PetscOptionsEnd();CHKERRQ(ierr);
28268c07d4e3SBarry Smith 
2827a23d5eceSKris Buschelman   if (!flg) {
2828a23d5eceSKris Buschelman     switch (bs) {
2829a23d5eceSKris Buschelman     case 1:
2830a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_1;
2831a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2832a23d5eceSKris Buschelman       break;
2833a23d5eceSKris Buschelman     case 2:
2834a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_2;
2835a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2836a23d5eceSKris Buschelman       break;
2837a23d5eceSKris Buschelman     case 3:
2838a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_3;
2839a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2840a23d5eceSKris Buschelman       break;
2841a23d5eceSKris Buschelman     case 4:
2842a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_4;
2843a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2844a23d5eceSKris Buschelman       break;
2845a23d5eceSKris Buschelman     case 5:
2846a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_5;
2847a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2848a23d5eceSKris Buschelman       break;
2849a23d5eceSKris Buschelman     case 6:
2850a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_6;
2851a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2852a23d5eceSKris Buschelman       break;
2853a23d5eceSKris Buschelman     case 7:
2854a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_7;
2855a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2856a23d5eceSKris Buschelman       break;
285796e086a2SDaniel Kokron     case 9:
28586679dcc1SBarry Smith     {
28596679dcc1SBarry Smith       PetscInt version = 1;
28606679dcc1SBarry Smith       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
28616679dcc1SBarry Smith       switch (version) {
28625f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
28636679dcc1SBarry Smith       case 1:
286496e086a2SDaniel Kokron         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
286596e086a2SDaniel Kokron         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2866*1e1ea65dSPierre Jolivet         ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
28676679dcc1SBarry Smith         break;
28686679dcc1SBarry Smith #endif
28696679dcc1SBarry Smith       default:
287096e086a2SDaniel Kokron         B->ops->mult    = MatMult_SeqBAIJ_N;
287196e086a2SDaniel Kokron         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2872*1e1ea65dSPierre Jolivet         ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
287396e086a2SDaniel Kokron         break;
28746679dcc1SBarry Smith       }
28756679dcc1SBarry Smith       break;
28766679dcc1SBarry Smith     }
2877ebada01fSBarry Smith     case 11:
2878ebada01fSBarry Smith       B->ops->mult    = MatMult_SeqBAIJ_11;
2879ebada01fSBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2880ebada01fSBarry Smith       break;
28816679dcc1SBarry Smith     case 12:
28826679dcc1SBarry Smith     {
28836679dcc1SBarry Smith       PetscInt version = 1;
28846679dcc1SBarry Smith       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
28856679dcc1SBarry Smith       switch (version) {
28866679dcc1SBarry Smith       case 1:
28876679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
28886679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2889*1e1ea65dSPierre Jolivet         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
28908ab949d8SShri Abhyankar         break;
28916679dcc1SBarry Smith       case 2:
28926679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
28936679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
2894*1e1ea65dSPierre Jolivet         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
28956679dcc1SBarry Smith         break;
28966679dcc1SBarry Smith #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
28976679dcc1SBarry Smith       case 3:
28986679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
28996679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2900*1e1ea65dSPierre Jolivet         ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
29016679dcc1SBarry Smith         break;
29026679dcc1SBarry Smith #endif
2903a23d5eceSKris Buschelman       default:
2904a23d5eceSKris Buschelman         B->ops->mult    = MatMult_SeqBAIJ_N;
2905a23d5eceSKris Buschelman         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2906*1e1ea65dSPierre Jolivet         ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
29076679dcc1SBarry Smith         break;
29086679dcc1SBarry Smith       }
29096679dcc1SBarry Smith       break;
29106679dcc1SBarry Smith     }
29116679dcc1SBarry Smith     case 15:
29126679dcc1SBarry Smith     {
29136679dcc1SBarry Smith       PetscInt version = 1;
29146679dcc1SBarry Smith       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
29156679dcc1SBarry Smith       switch (version) {
29166679dcc1SBarry Smith       case 1:
29176679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2918*1e1ea65dSPierre Jolivet         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
29196679dcc1SBarry Smith         break;
29206679dcc1SBarry Smith       case 2:
29216679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_15_ver2;
2922*1e1ea65dSPierre Jolivet         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
29236679dcc1SBarry Smith         break;
29246679dcc1SBarry Smith       case 3:
29256679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_15_ver3;
2926*1e1ea65dSPierre Jolivet         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
29276679dcc1SBarry Smith         break;
29286679dcc1SBarry Smith       case 4:
29296679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_15_ver4;
2930*1e1ea65dSPierre Jolivet         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
29316679dcc1SBarry Smith         break;
29326679dcc1SBarry Smith       default:
29336679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_N;
2934*1e1ea65dSPierre Jolivet         ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
29356679dcc1SBarry Smith         break;
29366679dcc1SBarry Smith       }
29376679dcc1SBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
29386679dcc1SBarry Smith       break;
29396679dcc1SBarry Smith     }
29406679dcc1SBarry Smith     default:
29416679dcc1SBarry Smith       B->ops->mult    = MatMult_SeqBAIJ_N;
29426679dcc1SBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2943*1e1ea65dSPierre Jolivet       ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2944a23d5eceSKris Buschelman       break;
2945a23d5eceSKris Buschelman     }
2946a23d5eceSKris Buschelman   }
2947e48d15efSToby Isaac   B->ops->sor = MatSOR_SeqBAIJ;
2948a23d5eceSKris Buschelman   b->mbs = mbs;
2949a23d5eceSKris Buschelman   b->nbs = nbs;
2950ab93d7beSBarry Smith   if (!skipallocation) {
29512ee49352SLisandro Dalcin     if (!b->imax) {
2952dcca6d9dSJed Brown       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
29533bb1ff40SBarry Smith       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
295426fbe8dcSKarl Rupp 
29554fd072dbSBarry Smith       b->free_imax_ilen = PETSC_TRUE;
29562ee49352SLisandro Dalcin     }
2957ab93d7beSBarry Smith     /* b->ilen will count nonzeros in each block row so far. */
295826fbe8dcSKarl Rupp     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2959a23d5eceSKris Buschelman     if (!nnz) {
2960a23d5eceSKris Buschelman       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2961c62bd62aSJed Brown       else if (nz < 0) nz = 1;
29625d2a9ed1SStefano Zampini       nz = PetscMin(nz,nbs);
2963a23d5eceSKris Buschelman       for (i=0; i<mbs; i++) b->imax[i] = nz;
296461778c46SBarry Smith       ierr = PetscIntMultError(nz,mbs,&nz);CHKERRQ(ierr);
2965a23d5eceSKris Buschelman     } else {
2966c73702f5SBarry Smith       PetscInt64 nz64 = 0;
2967c73702f5SBarry Smith       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
2968c73702f5SBarry Smith       ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr);
2969a23d5eceSKris Buschelman     }
2970a23d5eceSKris Buschelman 
2971a23d5eceSKris Buschelman     /* allocate the matrix space */
29722ee49352SLisandro Dalcin     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2973672ba085SHong Zhang     if (B->structure_only) {
2974672ba085SHong Zhang       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
2975672ba085SHong Zhang       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
2976672ba085SHong Zhang       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
2977672ba085SHong Zhang     } else {
29786679dcc1SBarry Smith       PetscInt nzbs2 = 0;
29796679dcc1SBarry Smith       ierr = PetscIntMultError(nz,bs2,&nzbs2);CHKERRQ(ierr);
29806679dcc1SBarry Smith       ierr = PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
29813bb1ff40SBarry Smith       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2982580bdb30SBarry Smith       ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr);
2983672ba085SHong Zhang     }
2984580bdb30SBarry Smith     ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr);
298526fbe8dcSKarl Rupp 
2986672ba085SHong Zhang     if (B->structure_only) {
2987672ba085SHong Zhang       b->singlemalloc = PETSC_FALSE;
2988672ba085SHong Zhang       b->free_a       = PETSC_FALSE;
2989672ba085SHong Zhang     } else {
2990a23d5eceSKris Buschelman       b->singlemalloc = PETSC_TRUE;
2991672ba085SHong Zhang       b->free_a       = PETSC_TRUE;
2992672ba085SHong Zhang     }
2993672ba085SHong Zhang     b->free_ij = PETSC_TRUE;
2994672ba085SHong Zhang 
2995a23d5eceSKris Buschelman     b->i[0] = 0;
2996a23d5eceSKris Buschelman     for (i=1; i<mbs+1; i++) {
2997a23d5eceSKris Buschelman       b->i[i] = b->i[i-1] + b->imax[i-1];
2998a23d5eceSKris Buschelman     }
2999672ba085SHong Zhang 
3000e811da20SHong Zhang   } else {
3001e6b907acSBarry Smith     b->free_a  = PETSC_FALSE;
3002e6b907acSBarry Smith     b->free_ij = PETSC_FALSE;
3003ab93d7beSBarry Smith   }
3004a23d5eceSKris Buschelman 
3005a23d5eceSKris Buschelman   b->bs2              = bs2;
3006a23d5eceSKris Buschelman   b->mbs              = mbs;
3007a23d5eceSKris Buschelman   b->nz               = 0;
3008b32cb4a7SJed Brown   b->maxnz            = nz;
3009b32cb4a7SJed Brown   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3010cb7b82ddSBarry Smith   B->was_assembled    = PETSC_FALSE;
3011cb7b82ddSBarry Smith   B->assembled        = PETSC_FALSE;
30122576faa2SJed Brown   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3013a23d5eceSKris Buschelman   PetscFunctionReturn(0);
3014a23d5eceSKris Buschelman }
3015a23d5eceSKris Buschelman 
3016cf12db73SBarry Smith PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3017725b52f3SLisandro Dalcin {
3018725b52f3SLisandro Dalcin   PetscInt       i,m,nz,nz_max=0,*nnz;
3019f4259b30SLisandro Dalcin   PetscScalar    *values=NULL;
3020d47bf9aaSJed Brown   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
3021725b52f3SLisandro Dalcin   PetscErrorCode ierr;
3022725b52f3SLisandro Dalcin 
3023725b52f3SLisandro Dalcin   PetscFunctionBegin;
3024e32f2f54SBarry Smith   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
302526283091SBarry Smith   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
302626283091SBarry Smith   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
302726283091SBarry Smith   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
302826283091SBarry Smith   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3029e02043d6SBarry Smith   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3030d0f46423SBarry Smith   m    = B->rmap->n/bs;
3031725b52f3SLisandro Dalcin 
303226fbe8dcSKarl Rupp   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
3033854ce69bSBarry Smith   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
3034725b52f3SLisandro Dalcin   for (i=0; i<m; i++) {
3035cf12db73SBarry Smith     nz = ii[i+1]- ii[i];
303626fbe8dcSKarl Rupp     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
3037725b52f3SLisandro Dalcin     nz_max = PetscMax(nz_max, nz);
3038725b52f3SLisandro Dalcin     nnz[i] = nz;
3039725b52f3SLisandro Dalcin   }
3040725b52f3SLisandro Dalcin   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3041725b52f3SLisandro Dalcin   ierr = PetscFree(nnz);CHKERRQ(ierr);
3042725b52f3SLisandro Dalcin 
3043725b52f3SLisandro Dalcin   values = (PetscScalar*)V;
3044725b52f3SLisandro Dalcin   if (!values) {
30451795a4d1SJed Brown     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
3046725b52f3SLisandro Dalcin   }
3047725b52f3SLisandro Dalcin   for (i=0; i<m; i++) {
3048cf12db73SBarry Smith     PetscInt          ncols  = ii[i+1] - ii[i];
3049cf12db73SBarry Smith     const PetscInt    *icols = jj + ii[i];
3050bb80cfbbSStefano Zampini     if (bs == 1 || !roworiented) {
3051cf12db73SBarry Smith       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3052725b52f3SLisandro Dalcin       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
30533adadaf3SJed Brown     } else {
30543adadaf3SJed Brown       PetscInt j;
30553adadaf3SJed Brown       for (j=0; j<ncols; j++) {
30563adadaf3SJed Brown         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3057d47bf9aaSJed Brown         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
30583adadaf3SJed Brown       }
30593adadaf3SJed Brown     }
3060725b52f3SLisandro Dalcin   }
3061725b52f3SLisandro Dalcin   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3062725b52f3SLisandro Dalcin   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3063725b52f3SLisandro Dalcin   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
30647827cd58SJed Brown   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3065725b52f3SLisandro Dalcin   PetscFunctionReturn(0);
3066725b52f3SLisandro Dalcin }
3067725b52f3SLisandro Dalcin 
3068cda14afcSprj- /*@C
3069cda14afcSprj-    MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored
3070cda14afcSprj- 
3071cda14afcSprj-    Not Collective
3072cda14afcSprj- 
3073cda14afcSprj-    Input Parameter:
3074cda14afcSprj- .  mat - a MATSEQBAIJ matrix
3075cda14afcSprj- 
3076cda14afcSprj-    Output Parameter:
3077cda14afcSprj- .   array - pointer to the data
3078cda14afcSprj- 
3079cda14afcSprj-    Level: intermediate
3080cda14afcSprj- 
3081cda14afcSprj- .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3082cda14afcSprj- @*/
3083cda14afcSprj- PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3084cda14afcSprj- {
3085cda14afcSprj-   PetscErrorCode ierr;
3086cda14afcSprj- 
3087cda14afcSprj-   PetscFunctionBegin;
3088cda14afcSprj-   ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3089cda14afcSprj-   PetscFunctionReturn(0);
3090cda14afcSprj- }
3091cda14afcSprj- 
3092cda14afcSprj- /*@C
3093cda14afcSprj-    MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()
3094cda14afcSprj- 
3095cda14afcSprj-    Not Collective
3096cda14afcSprj- 
3097cda14afcSprj-    Input Parameters:
3098cda14afcSprj- +  mat - a MATSEQBAIJ matrix
3099cda14afcSprj- -  array - pointer to the data
3100cda14afcSprj- 
3101cda14afcSprj-    Level: intermediate
3102cda14afcSprj- 
3103cda14afcSprj- .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3104cda14afcSprj- @*/
3105cda14afcSprj- PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3106cda14afcSprj- {
3107cda14afcSprj-   PetscErrorCode ierr;
3108cda14afcSprj- 
3109cda14afcSprj-   PetscFunctionBegin;
3110cda14afcSprj-   ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3111cda14afcSprj-   PetscFunctionReturn(0);
3112cda14afcSprj- }
3113cda14afcSprj- 
31140bad9183SKris Buschelman /*MC
3115fafad747SKris Buschelman    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
31160bad9183SKris Buschelman    block sparse compressed row format.
31170bad9183SKris Buschelman 
31180bad9183SKris Buschelman    Options Database Keys:
31196679dcc1SBarry Smith + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
31206679dcc1SBarry Smith - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
31210bad9183SKris Buschelman 
31220bad9183SKris Buschelman    Level: beginner
31230cd7f59aSBarry Smith 
31240cd7f59aSBarry Smith    Notes:
31250cd7f59aSBarry Smith     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
31260cd7f59aSBarry Smith     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
31270bad9183SKris Buschelman 
31286679dcc1SBarry Smith    Run with -info to see what version of the matrix-vector product is being used
31296679dcc1SBarry Smith 
3130f0c06035SSatish Balay .seealso: MatCreateSeqBAIJ()
31310bad9183SKris Buschelman M*/
31320bad9183SKris Buschelman 
3133cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3134b24902e0SBarry Smith 
31358cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
31362593348eSBarry Smith {
3137dfbe8321SBarry Smith   PetscErrorCode ierr;
3138c1ac3661SBarry Smith   PetscMPIInt    size;
3139b6490206SBarry Smith   Mat_SeqBAIJ    *b;
31403b2fbd54SBarry Smith 
31413a40ed3dSBarry Smith   PetscFunctionBegin;
3142ffc4695bSBarry Smith   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
3143e32f2f54SBarry Smith   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3144b6490206SBarry Smith 
3145b00a9115SJed Brown   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3146b0a32e0cSBarry Smith   B->data = (void*)b;
3147549d3d68SSatish Balay   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
314826fbe8dcSKarl Rupp 
3149f4259b30SLisandro Dalcin   b->row          = NULL;
3150f4259b30SLisandro Dalcin   b->col          = NULL;
3151f4259b30SLisandro Dalcin   b->icol         = NULL;
31522593348eSBarry Smith   b->reallocs     = 0;
3153f4259b30SLisandro Dalcin   b->saved_values = NULL;
31542593348eSBarry Smith 
3155c4992f7dSBarry Smith   b->roworiented        = PETSC_TRUE;
31562593348eSBarry Smith   b->nonew              = 0;
3157f4259b30SLisandro Dalcin   b->diag               = NULL;
3158f4259b30SLisandro Dalcin   B->spptr              = NULL;
3159b32cb4a7SJed Brown   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3160a9817697SBarry Smith   b->keepnonzeropattern = PETSC_FALSE;
31614e220ebcSLois Curfman McInnes 
3162cda14afcSprj-   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr);
3163cda14afcSprj-   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr);
3164bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3165bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3166bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3167bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3168bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3169bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3170bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3171bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3172bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
31737ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
3174c9225affSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
31757ea3e4caSstefano_zampini #endif
3176c9225affSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
317717667f90SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
31783a40ed3dSBarry Smith   PetscFunctionReturn(0);
31792593348eSBarry Smith }
31802593348eSBarry Smith 
3181ace3abfcSBarry Smith PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
31822593348eSBarry Smith {
3183b24902e0SBarry Smith   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
31846849ba73SBarry Smith   PetscErrorCode ierr;
3185a96a251dSBarry Smith   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3186de6a44a3SBarry Smith 
31873a40ed3dSBarry Smith   PetscFunctionBegin;
3188e32f2f54SBarry Smith   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
31892593348eSBarry Smith 
31904fd072dbSBarry Smith   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
31914fd072dbSBarry Smith     c->imax           = a->imax;
31924fd072dbSBarry Smith     c->ilen           = a->ilen;
31934fd072dbSBarry Smith     c->free_imax_ilen = PETSC_FALSE;
31944fd072dbSBarry Smith   } else {
3195dcca6d9dSJed Brown     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
31963bb1ff40SBarry Smith     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3197b6490206SBarry Smith     for (i=0; i<mbs; i++) {
31982593348eSBarry Smith       c->imax[i] = a->imax[i];
31992593348eSBarry Smith       c->ilen[i] = a->ilen[i];
32002593348eSBarry Smith     }
32014fd072dbSBarry Smith     c->free_imax_ilen = PETSC_TRUE;
32024fd072dbSBarry Smith   }
32032593348eSBarry Smith 
32042593348eSBarry Smith   /* allocate the matrix space */
320516a2bf60SHong Zhang   if (mallocmatspace) {
32064fd072dbSBarry Smith     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
32071795a4d1SJed Brown       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
32083bb1ff40SBarry Smith       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
320926fbe8dcSKarl Rupp 
32104fd072dbSBarry Smith       c->i            = a->i;
32114fd072dbSBarry Smith       c->j            = a->j;
3212379be0ddSLisandro Dalcin       c->singlemalloc = PETSC_FALSE;
3213379be0ddSLisandro Dalcin       c->free_a       = PETSC_TRUE;
3214379be0ddSLisandro Dalcin       c->free_ij      = PETSC_FALSE;
32154fd072dbSBarry Smith       c->parent       = A;
32161e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
32171e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
321826fbe8dcSKarl Rupp 
32194fd072dbSBarry Smith       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
32204fd072dbSBarry Smith       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
32214fd072dbSBarry Smith       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
32224fd072dbSBarry Smith     } else {
3223dcca6d9dSJed Brown       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
32243bb1ff40SBarry Smith       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
322526fbe8dcSKarl Rupp 
3226c4992f7dSBarry Smith       c->singlemalloc = PETSC_TRUE;
3227379be0ddSLisandro Dalcin       c->free_a       = PETSC_TRUE;
32284fd072dbSBarry Smith       c->free_ij      = PETSC_TRUE;
322926fbe8dcSKarl Rupp 
3230580bdb30SBarry Smith       ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr);
3231b6490206SBarry Smith       if (mbs > 0) {
3232580bdb30SBarry Smith         ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr);
32332e8a6d31SBarry Smith         if (cpvalues == MAT_COPY_VALUES) {
3234580bdb30SBarry Smith           ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr);
32352e8a6d31SBarry Smith         } else {
3236580bdb30SBarry Smith           ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr);
32372593348eSBarry Smith         }
32382593348eSBarry Smith       }
32391e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
32401e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
324116a2bf60SHong Zhang     }
32424fd072dbSBarry Smith   }
324316a2bf60SHong Zhang 
32442593348eSBarry Smith   c->roworiented = a->roworiented;
32452593348eSBarry Smith   c->nonew       = a->nonew;
324626fbe8dcSKarl Rupp 
32471e1e43feSBarry Smith   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
32481e1e43feSBarry Smith   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
324926fbe8dcSKarl Rupp 
32505c9eb25fSBarry Smith   c->bs2         = a->bs2;
32515c9eb25fSBarry Smith   c->mbs         = a->mbs;
32525c9eb25fSBarry Smith   c->nbs         = a->nbs;
32532593348eSBarry Smith 
32542593348eSBarry Smith   if (a->diag) {
32554fd072dbSBarry Smith     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
32564fd072dbSBarry Smith       c->diag      = a->diag;
32574fd072dbSBarry Smith       c->free_diag = PETSC_FALSE;
32584fd072dbSBarry Smith     } else {
3259854ce69bSBarry Smith       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
32603bb1ff40SBarry Smith       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
326126fbe8dcSKarl Rupp       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
32624fd072dbSBarry Smith       c->free_diag = PETSC_TRUE;
32634fd072dbSBarry Smith     }
3264f4259b30SLisandro Dalcin   } else c->diag = NULL;
326526fbe8dcSKarl Rupp 
32662593348eSBarry Smith   c->nz         = a->nz;
3267f2cbd3d5SJed Brown   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3268f361c04dSBarry Smith   c->solve_work = NULL;
3269f361c04dSBarry Smith   c->mult_work  = NULL;
3270f361c04dSBarry Smith   c->sor_workt  = NULL;
3271f361c04dSBarry Smith   c->sor_work   = NULL;
327288e51ccdSHong Zhang 
327388e51ccdSHong Zhang   c->compressedrow.use   = a->compressedrow.use;
327488e51ccdSHong Zhang   c->compressedrow.nrows = a->compressedrow.nrows;
3275cd6b891eSBarry Smith   if (a->compressedrow.use) {
327688e51ccdSHong Zhang     i    = a->compressedrow.nrows;
3277dcca6d9dSJed Brown     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
32783bb1ff40SBarry Smith     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3279580bdb30SBarry Smith     ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr);
3280580bdb30SBarry Smith     ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr);
328188e51ccdSHong Zhang   } else {
328288e51ccdSHong Zhang     c->compressedrow.use    = PETSC_FALSE;
32830298fd71SBarry Smith     c->compressedrow.i      = NULL;
32840298fd71SBarry Smith     c->compressedrow.rindex = NULL;
328588e51ccdSHong Zhang   }
3286e56f5c9eSBarry Smith   C->nonzerostate = A->nonzerostate;
328726fbe8dcSKarl Rupp 
3288140e18c1SBarry Smith   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
32893a40ed3dSBarry Smith   PetscFunctionReturn(0);
32902593348eSBarry Smith }
32912593348eSBarry Smith 
3292b24902e0SBarry Smith PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3293b24902e0SBarry Smith {
3294b24902e0SBarry Smith   PetscErrorCode ierr;
3295b24902e0SBarry Smith 
3296b24902e0SBarry Smith   PetscFunctionBegin;
3297ce94432eSBarry Smith   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3298d0f46423SBarry Smith   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
32995c9eb25fSBarry Smith   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
330098ad0f72SJed Brown   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3301b24902e0SBarry Smith   PetscFunctionReturn(0);
3302b24902e0SBarry Smith }
3303b24902e0SBarry Smith 
3304618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3305b51a4376SLisandro Dalcin PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
3306f501eaabSShri Abhyankar {
3307b51a4376SLisandro Dalcin   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3308b51a4376SLisandro Dalcin   PetscInt       *rowidxs,*colidxs;
3309b51a4376SLisandro Dalcin   PetscScalar    *matvals;
3310f501eaabSShri Abhyankar   PetscErrorCode ierr;
3311b51a4376SLisandro Dalcin 
3312b51a4376SLisandro Dalcin   PetscFunctionBegin;
3313b51a4376SLisandro Dalcin   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3314b51a4376SLisandro Dalcin 
3315b51a4376SLisandro Dalcin   /* read matrix header */
3316b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3317b51a4376SLisandro Dalcin   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3318b51a4376SLisandro Dalcin   M = header[1]; N = header[2]; nz = header[3];
3319b51a4376SLisandro Dalcin   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3320b51a4376SLisandro Dalcin   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3321b51a4376SLisandro Dalcin   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ");
3322b51a4376SLisandro Dalcin 
3323b51a4376SLisandro Dalcin   /* set block sizes from the viewer's .info file */
3324b51a4376SLisandro Dalcin   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3325b51a4376SLisandro Dalcin   /* set local and global sizes if not set already */
3326b51a4376SLisandro Dalcin   if (mat->rmap->n < 0) mat->rmap->n = M;
3327b51a4376SLisandro Dalcin   if (mat->cmap->n < 0) mat->cmap->n = N;
3328b51a4376SLisandro Dalcin   if (mat->rmap->N < 0) mat->rmap->N = M;
3329b51a4376SLisandro Dalcin   if (mat->cmap->N < 0) mat->cmap->N = N;
3330b51a4376SLisandro Dalcin   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3331b51a4376SLisandro Dalcin   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3332b51a4376SLisandro Dalcin 
3333b51a4376SLisandro Dalcin   /* check if the matrix sizes are correct */
3334b51a4376SLisandro Dalcin   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3335b51a4376SLisandro Dalcin   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3336b51a4376SLisandro Dalcin   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3337b51a4376SLisandro Dalcin   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3338b51a4376SLisandro Dalcin   mbs = m/bs; nbs = n/bs;
3339b51a4376SLisandro Dalcin 
3340b51a4376SLisandro Dalcin   /* read in row lengths, column indices and nonzero values */
3341b51a4376SLisandro Dalcin   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3342b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr);
3343b51a4376SLisandro Dalcin   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3344b51a4376SLisandro Dalcin   sum = rowidxs[m];
3345b51a4376SLisandro Dalcin   if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3346b51a4376SLisandro Dalcin 
3347b51a4376SLisandro Dalcin   /* read in column indices and nonzero values */
3348b51a4376SLisandro Dalcin   ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr);
3349b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr);
3350b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr);
3351b51a4376SLisandro Dalcin 
3352b51a4376SLisandro Dalcin   { /* preallocate matrix storage */
3353b51a4376SLisandro Dalcin     PetscBT   bt; /* helper bit set to count nonzeros */
3354b51a4376SLisandro Dalcin     PetscInt  *nnz;
3355618cc2edSLisandro Dalcin     PetscBool sbaij;
3356b51a4376SLisandro Dalcin 
3357b51a4376SLisandro Dalcin     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3358b51a4376SLisandro Dalcin     ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr);
3359618cc2edSLisandro Dalcin     ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr);
3360b51a4376SLisandro Dalcin     for (i=0; i<mbs; i++) {
3361b51a4376SLisandro Dalcin       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3362618cc2edSLisandro Dalcin       for (k=0; k<bs; k++) {
3363618cc2edSLisandro Dalcin         PetscInt row = bs*i + k;
3364618cc2edSLisandro Dalcin         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3365618cc2edSLisandro Dalcin           PetscInt col = colidxs[j];
3366618cc2edSLisandro Dalcin           if (!sbaij || col >= row)
3367618cc2edSLisandro Dalcin             if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++;
3368618cc2edSLisandro Dalcin         }
3369618cc2edSLisandro Dalcin       }
3370b51a4376SLisandro Dalcin     }
3371b51a4376SLisandro Dalcin     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3372b51a4376SLisandro Dalcin     ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3373618cc2edSLisandro Dalcin     ierr = MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3374b51a4376SLisandro Dalcin     ierr = PetscFree(nnz);CHKERRQ(ierr);
3375b51a4376SLisandro Dalcin   }
3376b51a4376SLisandro Dalcin 
3377b51a4376SLisandro Dalcin   /* store matrix values */
3378b51a4376SLisandro Dalcin   for (i=0; i<m; i++) {
3379b51a4376SLisandro Dalcin     PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1];
3380618cc2edSLisandro Dalcin     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3381b51a4376SLisandro Dalcin   }
3382b51a4376SLisandro Dalcin 
3383b51a4376SLisandro Dalcin   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3384b51a4376SLisandro Dalcin   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3385b51a4376SLisandro Dalcin   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3386b51a4376SLisandro Dalcin   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3387b51a4376SLisandro Dalcin   PetscFunctionReturn(0);
3388b51a4376SLisandro Dalcin }
3389b51a4376SLisandro Dalcin 
3390b51a4376SLisandro Dalcin PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)
3391b51a4376SLisandro Dalcin {
3392b51a4376SLisandro Dalcin   PetscErrorCode ierr;
33937f489da9SVaclav Hapla   PetscBool      isbinary;
3394f501eaabSShri Abhyankar 
3395f501eaabSShri Abhyankar   PetscFunctionBegin;
33967f489da9SVaclav Hapla   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3397b51a4376SLisandro Dalcin   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3398b51a4376SLisandro Dalcin   ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3399f501eaabSShri Abhyankar   PetscFunctionReturn(0);
3400f501eaabSShri Abhyankar }
3401f501eaabSShri Abhyankar 
3402273d9f13SBarry Smith /*@C
3403273d9f13SBarry Smith    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3404273d9f13SBarry Smith    compressed row) format.  For good matrix assembly performance the
3405273d9f13SBarry Smith    user should preallocate the matrix storage by setting the parameter nz
3406273d9f13SBarry Smith    (or the array nnz).  By setting these parameters accurately, performance
3407273d9f13SBarry Smith    during matrix assembly can be increased by more than a factor of 50.
34082593348eSBarry Smith 
3409d083f849SBarry Smith    Collective
3410273d9f13SBarry Smith 
3411273d9f13SBarry Smith    Input Parameters:
3412273d9f13SBarry Smith +  comm - MPI communicator, set to PETSC_COMM_SELF
3413bb7ae925SBarry Smith .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3414bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3415273d9f13SBarry Smith .  m - number of rows
3416273d9f13SBarry Smith .  n - number of columns
341735d8aa7fSBarry Smith .  nz - number of nonzero blocks  per block row (same for all rows)
341835d8aa7fSBarry Smith -  nnz - array containing the number of nonzero blocks in the various block rows
34190298fd71SBarry Smith          (possibly different for each block row) or NULL
3420273d9f13SBarry Smith 
3421273d9f13SBarry Smith    Output Parameter:
3422273d9f13SBarry Smith .  A - the matrix
3423273d9f13SBarry Smith 
3424175b88e8SBarry Smith    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3425f6f02116SRichard Tran Mills    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3426175b88e8SBarry Smith    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3427175b88e8SBarry Smith 
3428273d9f13SBarry Smith    Options Database Keys:
3429a2b725a8SWilliam Gropp +   -mat_no_unroll - uses code that does not unroll the loops in the
3430273d9f13SBarry Smith                      block calculations (much slower)
3431a2b725a8SWilliam Gropp -    -mat_block_size - size of the blocks to use
3432273d9f13SBarry Smith 
3433273d9f13SBarry Smith    Level: intermediate
3434273d9f13SBarry Smith 
3435273d9f13SBarry Smith    Notes:
3436d1be2dadSMatthew Knepley    The number of rows and columns must be divisible by blocksize.
3437d1be2dadSMatthew Knepley 
343849a6f317SBarry Smith    If the nnz parameter is given then the nz parameter is ignored
343949a6f317SBarry Smith 
344035d8aa7fSBarry Smith    A nonzero block is any block that as 1 or more nonzeros in it
344135d8aa7fSBarry Smith 
3442273d9f13SBarry Smith    The block AIJ format is fully compatible with standard Fortran 77
3443273d9f13SBarry Smith    storage.  That is, the stored row and column indices can begin at
3444273d9f13SBarry Smith    either one (as in Fortran) or zero.  See the users' manual for details.
3445273d9f13SBarry Smith 
3446273d9f13SBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
34470298fd71SBarry Smith    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3448a7f22e61SSatish Balay    allocation.  See Users-Manual: ch_mat for details.
3449273d9f13SBarry Smith    matrices.
3450273d9f13SBarry Smith 
345169b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3452273d9f13SBarry Smith @*/
34537087cfbeSBarry Smith PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3454273d9f13SBarry Smith {
3455dfbe8321SBarry Smith   PetscErrorCode ierr;
3456273d9f13SBarry Smith 
3457273d9f13SBarry Smith   PetscFunctionBegin;
3458f69a0ea3SMatthew Knepley   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3459f69a0ea3SMatthew Knepley   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3460273d9f13SBarry Smith   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3461367daffbSBarry Smith   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3462273d9f13SBarry Smith   PetscFunctionReturn(0);
3463273d9f13SBarry Smith }
3464273d9f13SBarry Smith 
3465273d9f13SBarry Smith /*@C
3466273d9f13SBarry Smith    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3467273d9f13SBarry Smith    per row in the matrix. For good matrix assembly performance the
3468273d9f13SBarry Smith    user should preallocate the matrix storage by setting the parameter nz
3469273d9f13SBarry Smith    (or the array nnz).  By setting these parameters accurately, performance
3470273d9f13SBarry Smith    during matrix assembly can be increased by more than a factor of 50.
3471273d9f13SBarry Smith 
3472d083f849SBarry Smith    Collective
3473273d9f13SBarry Smith 
3474273d9f13SBarry Smith    Input Parameters:
34751c4f3114SJed Brown +  B - the matrix
3476bb7ae925SBarry Smith .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3477bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3478273d9f13SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
3479273d9f13SBarry Smith -  nnz - array containing the number of block nonzeros in the various block rows
34800298fd71SBarry Smith          (possibly different for each block row) or NULL
3481273d9f13SBarry Smith 
3482273d9f13SBarry Smith    Options Database Keys:
3483a2b725a8SWilliam Gropp +   -mat_no_unroll - uses code that does not unroll the loops in the
3484273d9f13SBarry Smith                      block calculations (much slower)
3485a2b725a8SWilliam Gropp -   -mat_block_size - size of the blocks to use
3486273d9f13SBarry Smith 
3487273d9f13SBarry Smith    Level: intermediate
3488273d9f13SBarry Smith 
3489273d9f13SBarry Smith    Notes:
349049a6f317SBarry Smith    If the nnz parameter is given then the nz parameter is ignored
349149a6f317SBarry Smith 
3492aa95bbe8SBarry Smith    You can call MatGetInfo() to get information on how effective the preallocation was;
3493aa95bbe8SBarry Smith    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3494aa95bbe8SBarry Smith    You can also run with the option -info and look for messages with the string
3495aa95bbe8SBarry Smith    malloc in them to see if additional memory allocation was needed.
3496aa95bbe8SBarry Smith 
3497273d9f13SBarry Smith    The block AIJ format is fully compatible with standard Fortran 77
3498273d9f13SBarry Smith    storage.  That is, the stored row and column indices can begin at
3499273d9f13SBarry Smith    either one (as in Fortran) or zero.  See the users' manual for details.
3500273d9f13SBarry Smith 
3501273d9f13SBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
35020298fd71SBarry Smith    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3503a7f22e61SSatish Balay    allocation.  See Users-Manual: ch_mat for details.
3504273d9f13SBarry Smith 
350569b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3506273d9f13SBarry Smith @*/
35077087cfbeSBarry Smith PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3508273d9f13SBarry Smith {
35094ac538c5SBarry Smith   PetscErrorCode ierr;
3510273d9f13SBarry Smith 
3511273d9f13SBarry Smith   PetscFunctionBegin;
35126ba663aaSJed Brown   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
35136ba663aaSJed Brown   PetscValidType(B,1);
35146ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B,bs,2);
35154ac538c5SBarry Smith   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3516273d9f13SBarry Smith   PetscFunctionReturn(0);
3517273d9f13SBarry Smith }
3518a1d92eedSBarry Smith 
3519725b52f3SLisandro Dalcin /*@C
3520664954b6SBarry Smith    MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
3521725b52f3SLisandro Dalcin 
3522d083f849SBarry Smith    Collective
3523725b52f3SLisandro Dalcin 
3524725b52f3SLisandro Dalcin    Input Parameters:
35251c4f3114SJed Brown +  B - the matrix
3526725b52f3SLisandro Dalcin .  i - the indices into j for the start of each local row (starts with zero)
3527725b52f3SLisandro Dalcin .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3528725b52f3SLisandro Dalcin -  v - optional values in the matrix
3529725b52f3SLisandro Dalcin 
3530664954b6SBarry Smith    Level: advanced
3531725b52f3SLisandro Dalcin 
35323adadaf3SJed Brown    Notes:
35333adadaf3SJed Brown    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
35343adadaf3SJed Brown    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
35353adadaf3SJed Brown    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
35363adadaf3SJed Brown    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
35373adadaf3SJed Brown    block column and the second index is over columns within a block.
35383adadaf3SJed Brown 
3539664954b6SBarry Smith    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3540664954b6SBarry Smith 
3541725b52f3SLisandro Dalcin .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3542725b52f3SLisandro Dalcin @*/
35437087cfbeSBarry Smith PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3544725b52f3SLisandro Dalcin {
35454ac538c5SBarry Smith   PetscErrorCode ierr;
3546725b52f3SLisandro Dalcin 
3547725b52f3SLisandro Dalcin   PetscFunctionBegin;
35486ba663aaSJed Brown   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
35496ba663aaSJed Brown   PetscValidType(B,1);
35506ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B,bs,2);
35514ac538c5SBarry Smith   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3552725b52f3SLisandro Dalcin   PetscFunctionReturn(0);
3553725b52f3SLisandro Dalcin }
3554725b52f3SLisandro Dalcin 
3555c75a6043SHong Zhang /*@
3556dfb205c3SBarry Smith      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3557c75a6043SHong Zhang 
3558d083f849SBarry Smith      Collective
3559c75a6043SHong Zhang 
3560c75a6043SHong Zhang    Input Parameters:
3561c75a6043SHong Zhang +  comm - must be an MPI communicator of size 1
3562c75a6043SHong Zhang .  bs - size of block
3563c75a6043SHong Zhang .  m - number of rows
3564c75a6043SHong Zhang .  n - number of columns
3565483a2f95SBarry Smith .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3566c75a6043SHong Zhang .  j - column indices
3567c75a6043SHong Zhang -  a - matrix values
3568c75a6043SHong Zhang 
3569c75a6043SHong Zhang    Output Parameter:
3570c75a6043SHong Zhang .  mat - the matrix
3571c75a6043SHong Zhang 
3572dfb205c3SBarry Smith    Level: advanced
3573c75a6043SHong Zhang 
3574c75a6043SHong Zhang    Notes:
3575c75a6043SHong Zhang        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3576c75a6043SHong Zhang     once the matrix is destroyed
3577c75a6043SHong Zhang 
3578c75a6043SHong Zhang        You cannot set new nonzero locations into this matrix, that will generate an error.
3579c75a6043SHong Zhang 
3580c75a6043SHong Zhang        The i and j indices are 0 based
3581c75a6043SHong Zhang 
3582dfb205c3SBarry Smith        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3583dfb205c3SBarry Smith 
35843adadaf3SJed Brown       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
35853adadaf3SJed Brown       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
35863adadaf3SJed Brown       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
35873adadaf3SJed Brown       with column-major ordering within blocks.
3588dfb205c3SBarry Smith 
358969b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3590c75a6043SHong Zhang 
3591c75a6043SHong Zhang @*/
3592c3c607ccSBarry Smith PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3593c75a6043SHong Zhang {
3594c75a6043SHong Zhang   PetscErrorCode ierr;
3595c75a6043SHong Zhang   PetscInt       ii;
3596c75a6043SHong Zhang   Mat_SeqBAIJ    *baij;
3597c75a6043SHong Zhang 
3598c75a6043SHong Zhang   PetscFunctionBegin;
3599e32f2f54SBarry Smith   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
360041096f02SStefano Zampini   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3601c75a6043SHong Zhang 
3602c75a6043SHong Zhang   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3603c75a6043SHong Zhang   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3604c75a6043SHong Zhang   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3605f4259b30SLisandro Dalcin   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
3606c75a6043SHong Zhang   baij = (Mat_SeqBAIJ*)(*mat)->data;
3607dcca6d9dSJed Brown   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
36083bb1ff40SBarry Smith   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3609c75a6043SHong Zhang 
3610c75a6043SHong Zhang   baij->i = i;
3611c75a6043SHong Zhang   baij->j = j;
3612c75a6043SHong Zhang   baij->a = a;
361326fbe8dcSKarl Rupp 
3614c75a6043SHong Zhang   baij->singlemalloc = PETSC_FALSE;
3615c75a6043SHong Zhang   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3616e6b907acSBarry Smith   baij->free_a       = PETSC_FALSE;
3617e6b907acSBarry Smith   baij->free_ij      = PETSC_FALSE;
3618c75a6043SHong Zhang 
3619c75a6043SHong Zhang   for (ii=0; ii<m; ii++) {
3620c75a6043SHong Zhang     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3621cf9c20a2SJed Brown     if (PetscUnlikelyDebug(i[ii+1] - i[ii] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3622c75a6043SHong Zhang   }
362376bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {
3624c75a6043SHong Zhang     for (ii=0; ii<baij->i[m]; ii++) {
3625e32f2f54SBarry Smith       if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3626e32f2f54SBarry Smith       if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3627c75a6043SHong Zhang     }
362876bd3646SJed Brown   }
3629c75a6043SHong Zhang 
3630c75a6043SHong Zhang   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3631c75a6043SHong Zhang   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3632c75a6043SHong Zhang   PetscFunctionReturn(0);
3633c75a6043SHong Zhang }
3634bdf6f3fcSHong Zhang 
3635bdf6f3fcSHong Zhang PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3636bdf6f3fcSHong Zhang {
3637bdf6f3fcSHong Zhang   PetscErrorCode ierr;
36388761c3d6SHong Zhang   PetscMPIInt    size;
3639bdf6f3fcSHong Zhang 
3640bdf6f3fcSHong Zhang   PetscFunctionBegin;
3641ffc4695bSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
36428761c3d6SHong Zhang   if (size == 1 && scall == MAT_REUSE_MATRIX) {
36438761c3d6SHong Zhang     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
36448761c3d6SHong Zhang   } else {
3645bdf6f3fcSHong Zhang     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
36468761c3d6SHong Zhang   }
3647bdf6f3fcSHong Zhang   PetscFunctionReturn(0);
3648bdf6f3fcSHong Zhang }
3649