xref: /petsc/src/mat/impls/baij/seq/baij.c (revision d70f29a362ae60d541be4e9a72e9494be00f9e3d)
1be1d678aSKris Buschelman 
22593348eSBarry Smith /*
3b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
42593348eSBarry Smith   matrix storage format.
52593348eSBarry Smith */
6c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7c6db04a5SJed Brown #include <petscblaslapack.h>
8af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h>
9af0996ceSBarry Smith #include <petsc/private/kernels/blockmatmult.h>
1043516a2dSKris Buschelman 
117ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
127ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
137ea3e4caSstefano_zampini #endif
147ea3e4caSstefano_zampini 
15b5b72c8aSIrina Sokolova #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16fd9d3c67SJed Brown PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17b5b72c8aSIrina Sokolova #endif
18c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19b5b72c8aSIrina Sokolova 
20857cbf51SRichard Tran Mills PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A,PetscInt type,PetscReal *reductions)
219463ebdaSPierre Jolivet {
229463ebdaSPierre Jolivet   PetscErrorCode ierr;
239463ebdaSPierre Jolivet   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) A->data;
24857cbf51SRichard Tran Mills   PetscInt       m,n,i;
259463ebdaSPierre Jolivet   PetscInt       ib,jb,bs = A->rmap->bs;
269463ebdaSPierre Jolivet   MatScalar      *a_val = a_aij->a;
279463ebdaSPierre Jolivet 
289463ebdaSPierre Jolivet   PetscFunctionBegin;
29857cbf51SRichard Tran Mills   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
30857cbf51SRichard Tran Mills   for (i=0; i<n; i++) reductions[i] = 0.0;
319463ebdaSPierre Jolivet   if (type == NORM_2) {
329463ebdaSPierre Jolivet     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
339463ebdaSPierre Jolivet       for (jb=0; jb<bs; jb++) {
349463ebdaSPierre Jolivet         for (ib=0; ib<bs; ib++) {
35857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
369463ebdaSPierre Jolivet           a_val++;
379463ebdaSPierre Jolivet         }
389463ebdaSPierre Jolivet       }
399463ebdaSPierre Jolivet     }
409463ebdaSPierre Jolivet   } else if (type == NORM_1) {
419463ebdaSPierre Jolivet     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
429463ebdaSPierre Jolivet       for (jb=0; jb<bs; jb++) {
439463ebdaSPierre Jolivet         for (ib=0; ib<bs; ib++) {
44857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
459463ebdaSPierre Jolivet           a_val++;
469463ebdaSPierre Jolivet         }
479463ebdaSPierre Jolivet       }
489463ebdaSPierre Jolivet     }
499463ebdaSPierre Jolivet   } else if (type == NORM_INFINITY) {
509463ebdaSPierre Jolivet     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
519463ebdaSPierre Jolivet       for (jb=0; jb<bs; jb++) {
529463ebdaSPierre Jolivet         for (ib=0; ib<bs; ib++) {
539463ebdaSPierre Jolivet           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
54857cbf51SRichard Tran Mills           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
559463ebdaSPierre Jolivet           a_val++;
569463ebdaSPierre Jolivet         }
579463ebdaSPierre Jolivet       }
589463ebdaSPierre Jolivet     }
59857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
60857cbf51SRichard Tran Mills     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
61857cbf51SRichard Tran Mills       for (jb=0; jb<bs; jb++) {
62857cbf51SRichard Tran Mills         for (ib=0; ib<bs; ib++) {
63857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
64857cbf51SRichard Tran Mills           a_val++;
65857cbf51SRichard Tran Mills         }
66857cbf51SRichard Tran Mills       }
67857cbf51SRichard Tran Mills     }
68857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
69857cbf51SRichard Tran Mills     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
70857cbf51SRichard Tran Mills       for (jb=0; jb<bs; jb++) {
71857cbf51SRichard Tran Mills         for (ib=0; ib<bs; ib++) {
72857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
73857cbf51SRichard Tran Mills           a_val++;
74857cbf51SRichard Tran Mills         }
75857cbf51SRichard Tran Mills       }
76857cbf51SRichard Tran Mills     }
77857cbf51SRichard Tran Mills   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
789463ebdaSPierre Jolivet   if (type == NORM_2) {
79857cbf51SRichard Tran Mills     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
80857cbf51SRichard Tran Mills   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
81857cbf51SRichard Tran Mills     for (i=0; i<n; i++) reductions[i] /= m;
829463ebdaSPierre Jolivet   }
839463ebdaSPierre Jolivet   PetscFunctionReturn(0);
849463ebdaSPierre Jolivet }
859463ebdaSPierre Jolivet 
86713ccfa9SJed Brown PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
87b01c7715SBarry Smith {
88b01c7715SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
896849ba73SBarry Smith   PetscErrorCode ierr;
90de80f912SBarry Smith   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
917f0c90edSBarry Smith   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
9262bba022SBarry Smith   PetscReal      shift = 0.0;
931a9391e3SHong Zhang   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
94b01c7715SBarry Smith 
95b01c7715SBarry Smith   PetscFunctionBegin;
96a455e926SHong Zhang   allowzeropivot = PetscNot(A->erroriffailure);
97a455e926SHong Zhang 
989797317bSBarry Smith   if (a->idiagvalid) {
999797317bSBarry Smith     if (values) *values = a->idiag;
1009797317bSBarry Smith     PetscFunctionReturn(0);
1019797317bSBarry Smith   }
102b01c7715SBarry Smith   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
103b01c7715SBarry Smith   diag_offset = a->diag;
104b01c7715SBarry Smith   if (!a->idiag) {
1057f0c90edSBarry Smith     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
1067f0c90edSBarry Smith     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
107b01c7715SBarry Smith   }
108b01c7715SBarry Smith   diag  = a->idiag;
109bbead8a2SBarry Smith   if (values) *values = a->idiag;
110b01c7715SBarry Smith   /* factor and invert each block */
111521d7252SBarry Smith   switch (bs) {
112ab040260SJed Brown   case 1:
113ab040260SJed Brown     for (i=0; i<mbs; i++) {
114ab040260SJed Brown       odiag    = v + 1*diag_offset[i];
115ab040260SJed Brown       diag[0]  = odiag[0];
116ec1892c8SHong Zhang 
117ec1892c8SHong Zhang       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
118ec1892c8SHong Zhang         if (allowzeropivot) {
1197b6c816cSBarry Smith           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1207b6c816cSBarry Smith           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
1217b6c816cSBarry Smith           A->factorerror_zeropivot_row   = i;
1227d3de750SJacob Faibussowitsch           ierr = PetscInfo(A,"Zero pivot, row %" PetscInt_FMT "\n",i);CHKERRQ(ierr);
12398921bdaSJacob Faibussowitsch         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
124ec1892c8SHong Zhang       }
125ec1892c8SHong Zhang 
126d4a378daSJed Brown       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
127ab040260SJed Brown       diag    += 1;
128ab040260SJed Brown     }
129ab040260SJed Brown     break;
130b01c7715SBarry Smith   case 2:
131b01c7715SBarry Smith     for (i=0; i<mbs; i++) {
132b01c7715SBarry Smith       odiag    = v + 4*diag_offset[i];
133b01c7715SBarry Smith       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
134a455e926SHong Zhang       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
1357b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
136b01c7715SBarry Smith       diag    += 4;
137b01c7715SBarry Smith     }
138b01c7715SBarry Smith     break;
139b01c7715SBarry Smith   case 3:
140b01c7715SBarry Smith     for (i=0; i<mbs; i++) {
141b01c7715SBarry Smith       odiag    = v + 9*diag_offset[i];
142b01c7715SBarry Smith       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
143b01c7715SBarry Smith       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
144b01c7715SBarry Smith       diag[8]  = odiag[8];
145a455e926SHong Zhang       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
1467b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
147b01c7715SBarry Smith       diag    += 9;
148b01c7715SBarry Smith     }
149b01c7715SBarry Smith     break;
150b01c7715SBarry Smith   case 4:
151b01c7715SBarry Smith     for (i=0; i<mbs; i++) {
152b01c7715SBarry Smith       odiag  = v + 16*diag_offset[i];
153580bdb30SBarry Smith       ierr   = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr);
154a455e926SHong Zhang       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
1557b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
156b01c7715SBarry Smith       diag  += 16;
157b01c7715SBarry Smith     }
158b01c7715SBarry Smith     break;
159b01c7715SBarry Smith   case 5:
160b01c7715SBarry Smith     for (i=0; i<mbs; i++) {
161b01c7715SBarry Smith       odiag  = v + 25*diag_offset[i];
162580bdb30SBarry Smith       ierr   = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr);
163a455e926SHong Zhang       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
1647b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
165b01c7715SBarry Smith       diag  += 25;
166b01c7715SBarry Smith     }
167b01c7715SBarry Smith     break;
168d49b2adcSBarry Smith   case 6:
169d49b2adcSBarry Smith     for (i=0; i<mbs; i++) {
170d49b2adcSBarry Smith       odiag  = v + 36*diag_offset[i];
171580bdb30SBarry Smith       ierr   = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr);
172a455e926SHong Zhang       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
1737b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
174d49b2adcSBarry Smith       diag  += 36;
175d49b2adcSBarry Smith     }
176d49b2adcSBarry Smith     break;
177de80f912SBarry Smith   case 7:
178de80f912SBarry Smith     for (i=0; i<mbs; i++) {
179de80f912SBarry Smith       odiag  = v + 49*diag_offset[i];
180580bdb30SBarry Smith       ierr   = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr);
181a455e926SHong Zhang       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
1827b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
183de80f912SBarry Smith       diag  += 49;
184de80f912SBarry Smith     }
185de80f912SBarry Smith     break;
186b01c7715SBarry Smith   default:
187dcca6d9dSJed Brown     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
188de80f912SBarry Smith     for (i=0; i<mbs; i++) {
189de80f912SBarry Smith       odiag  = v + bs2*diag_offset[i];
190580bdb30SBarry Smith       ierr   = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr);
1915f8bbccaSHong Zhang       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
1927b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
193de80f912SBarry Smith       diag  += bs2;
194de80f912SBarry Smith     }
195de80f912SBarry Smith     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
196b01c7715SBarry Smith   }
197b01c7715SBarry Smith   a->idiagvalid = PETSC_TRUE;
198b01c7715SBarry Smith   PetscFunctionReturn(0);
199b01c7715SBarry Smith }
200b01c7715SBarry Smith 
201e48d15efSToby Isaac PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2026d3beeddSMatthew Knepley {
2036d3beeddSMatthew Knepley   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
204e48d15efSToby Isaac   PetscScalar       *x,*work,*w,*workt,*t;
205e48d15efSToby Isaac   const MatScalar   *v,*aa = a->a, *idiag;
206e48d15efSToby Isaac   const PetscScalar *b,*xb;
2075455b99fSToby Isaac   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
2086d3beeddSMatthew Knepley   PetscErrorCode    ierr;
209e48d15efSToby Isaac   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
210c1ac3661SBarry Smith   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
211b01c7715SBarry Smith 
212b01c7715SBarry Smith   PetscFunctionBegin;
213b01c7715SBarry Smith   its = its*lits;
2142c71b3e2SJacob Faibussowitsch   PetscCheckFalse(flag & SOR_EISENSTAT,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
2152c71b3e2SJacob Faibussowitsch   PetscCheckFalse(its <= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
2162c71b3e2SJacob Faibussowitsch   PetscCheckFalse(fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
2172c71b3e2SJacob Faibussowitsch   PetscCheckFalse(omega != 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor");
2182c71b3e2SJacob Faibussowitsch   PetscCheckFalse((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
219b01c7715SBarry Smith 
2200298fd71SBarry Smith   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
221b01c7715SBarry Smith 
222b2ec919aSToby Isaac   if (!m) PetscFunctionReturn(0);
223b01c7715SBarry Smith   diag  = a->diag;
224b01c7715SBarry Smith   idiag = a->idiag;
225de80f912SBarry Smith   k    = PetscMax(A->rmap->n,A->cmap->n);
226e48d15efSToby Isaac   if (!a->mult_work) {
227f361c04dSBarry Smith     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
228de80f912SBarry Smith   }
2293475c22fSBarry Smith   if (!a->sor_workt) {
230f361c04dSBarry Smith     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
231de80f912SBarry Smith   }
232de80f912SBarry Smith   if (!a->sor_work) {
233785e854fSJed Brown     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
234de80f912SBarry Smith   }
2353475c22fSBarry Smith   work = a->mult_work;
2363475c22fSBarry Smith   t    = a->sor_workt;
237de80f912SBarry Smith   w    = a->sor_work;
238de80f912SBarry Smith 
239de80f912SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
240de80f912SBarry Smith   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
241de80f912SBarry Smith 
242de80f912SBarry Smith   if (flag & SOR_ZERO_INITIAL_GUESS) {
243de80f912SBarry Smith     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
244e48d15efSToby Isaac       switch (bs) {
245e48d15efSToby Isaac       case 1:
246e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
247e48d15efSToby Isaac         t[0] = b[0];
248e48d15efSToby Isaac         i2     = 1;
249e48d15efSToby Isaac         idiag += 1;
250e48d15efSToby Isaac         for (i=1; i<m; i++) {
251e48d15efSToby Isaac           v  = aa + ai[i];
252e48d15efSToby Isaac           vi = aj + ai[i];
253e48d15efSToby Isaac           nz = diag[i] - ai[i];
254e48d15efSToby Isaac           s[0] = b[i2];
255e48d15efSToby Isaac           for (j=0; j<nz; j++) {
256e48d15efSToby Isaac             xw[0] = x[vi[j]];
257e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
258e48d15efSToby Isaac           }
259e48d15efSToby Isaac           t[i2] = s[0];
260e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
261e48d15efSToby Isaac           x[i2]  = xw[0];
262e48d15efSToby Isaac           idiag += 1;
263e48d15efSToby Isaac           i2    += 1;
264e48d15efSToby Isaac         }
265e48d15efSToby Isaac         break;
266e48d15efSToby Isaac       case 2:
267e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
268e48d15efSToby Isaac         t[0] = b[0]; t[1] = b[1];
269e48d15efSToby Isaac         i2     = 2;
270e48d15efSToby Isaac         idiag += 4;
271e48d15efSToby Isaac         for (i=1; i<m; i++) {
272e48d15efSToby Isaac           v  = aa + 4*ai[i];
273e48d15efSToby Isaac           vi = aj + ai[i];
274e48d15efSToby Isaac           nz = diag[i] - ai[i];
275e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1];
276e48d15efSToby Isaac           for (j=0; j<nz; j++) {
277e48d15efSToby Isaac             idx = 2*vi[j];
278e48d15efSToby Isaac             it  = 4*j;
279e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx];
280e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
281e48d15efSToby Isaac           }
282e48d15efSToby Isaac           t[i2] = s[0]; t[i2+1] = s[1];
283e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
284e48d15efSToby Isaac           x[i2]   = xw[0]; x[i2+1] = xw[1];
285e48d15efSToby Isaac           idiag  += 4;
286e48d15efSToby Isaac           i2     += 2;
287e48d15efSToby Isaac         }
288e48d15efSToby Isaac         break;
289e48d15efSToby Isaac       case 3:
290e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
291e48d15efSToby Isaac         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
292e48d15efSToby Isaac         i2     = 3;
293e48d15efSToby Isaac         idiag += 9;
294e48d15efSToby Isaac         for (i=1; i<m; i++) {
295e48d15efSToby Isaac           v  = aa + 9*ai[i];
296e48d15efSToby Isaac           vi = aj + ai[i];
297e48d15efSToby Isaac           nz = diag[i] - ai[i];
298e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
299e48d15efSToby Isaac           while (nz--) {
300e48d15efSToby Isaac             idx = 3*(*vi++);
301e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
302e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
303e48d15efSToby Isaac             v  += 9;
304e48d15efSToby Isaac           }
305e48d15efSToby Isaac           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
306e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
307e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
308e48d15efSToby Isaac           idiag  += 9;
309e48d15efSToby Isaac           i2     += 3;
310e48d15efSToby Isaac         }
311e48d15efSToby Isaac         break;
312e48d15efSToby Isaac       case 4:
313e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
314e48d15efSToby Isaac         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
315e48d15efSToby Isaac         i2     = 4;
316e48d15efSToby Isaac         idiag += 16;
317e48d15efSToby Isaac         for (i=1; i<m; i++) {
318e48d15efSToby Isaac           v  = aa + 16*ai[i];
319e48d15efSToby Isaac           vi = aj + ai[i];
320e48d15efSToby Isaac           nz = diag[i] - ai[i];
321e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
322e48d15efSToby Isaac           while (nz--) {
323e48d15efSToby Isaac             idx = 4*(*vi++);
324e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
325e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
326e48d15efSToby Isaac             v  += 16;
327e48d15efSToby Isaac           }
328e48d15efSToby Isaac           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
329e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
330e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
331e48d15efSToby Isaac           idiag  += 16;
332e48d15efSToby Isaac           i2     += 4;
333e48d15efSToby Isaac         }
334e48d15efSToby Isaac         break;
335e48d15efSToby Isaac       case 5:
336e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
337e48d15efSToby Isaac         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
338e48d15efSToby Isaac         i2     = 5;
339e48d15efSToby Isaac         idiag += 25;
340e48d15efSToby Isaac         for (i=1; i<m; i++) {
341e48d15efSToby Isaac           v  = aa + 25*ai[i];
342e48d15efSToby Isaac           vi = aj + ai[i];
343e48d15efSToby Isaac           nz = diag[i] - ai[i];
344e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
345e48d15efSToby Isaac           while (nz--) {
346e48d15efSToby Isaac             idx = 5*(*vi++);
347e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
348e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
349e48d15efSToby Isaac             v  += 25;
350e48d15efSToby Isaac           }
351e48d15efSToby Isaac           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
352e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
353e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
354e48d15efSToby Isaac           idiag  += 25;
355e48d15efSToby Isaac           i2     += 5;
356e48d15efSToby Isaac         }
357e48d15efSToby Isaac         break;
358e48d15efSToby Isaac       case 6:
359e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
360e48d15efSToby Isaac         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
361e48d15efSToby Isaac         i2     = 6;
362e48d15efSToby Isaac         idiag += 36;
363e48d15efSToby Isaac         for (i=1; i<m; i++) {
364e48d15efSToby Isaac           v  = aa + 36*ai[i];
365e48d15efSToby Isaac           vi = aj + ai[i];
366e48d15efSToby Isaac           nz = diag[i] - ai[i];
367e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
368e48d15efSToby Isaac           while (nz--) {
369e48d15efSToby Isaac             idx = 6*(*vi++);
370e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
371e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
372e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
373e48d15efSToby Isaac             v  += 36;
374e48d15efSToby Isaac           }
375e48d15efSToby Isaac           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
376e48d15efSToby Isaac           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
377e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
378e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
379e48d15efSToby Isaac           idiag  += 36;
380e48d15efSToby Isaac           i2     += 6;
381e48d15efSToby Isaac         }
382e48d15efSToby Isaac         break;
383e48d15efSToby Isaac       case 7:
384e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
385e48d15efSToby Isaac         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
386e48d15efSToby Isaac         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
387e48d15efSToby Isaac         i2     = 7;
388e48d15efSToby Isaac         idiag += 49;
389e48d15efSToby Isaac         for (i=1; i<m; i++) {
390e48d15efSToby Isaac           v  = aa + 49*ai[i];
391e48d15efSToby Isaac           vi = aj + ai[i];
392e48d15efSToby Isaac           nz = diag[i] - ai[i];
393e48d15efSToby Isaac           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
394e48d15efSToby Isaac           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
395e48d15efSToby Isaac           while (nz--) {
396e48d15efSToby Isaac             idx = 7*(*vi++);
397e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
398e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
399e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
400e48d15efSToby Isaac             v  += 49;
401e48d15efSToby Isaac           }
402e48d15efSToby Isaac           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
403e48d15efSToby Isaac           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
404e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
405e48d15efSToby Isaac           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
406e48d15efSToby Isaac           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
407e48d15efSToby Isaac           idiag  += 49;
408e48d15efSToby Isaac           i2     += 7;
409e48d15efSToby Isaac         }
410e48d15efSToby Isaac         break;
411e48d15efSToby Isaac       default:
41296b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
413580bdb30SBarry Smith         ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr);
414de80f912SBarry Smith         i2     = bs;
415de80f912SBarry Smith         idiag += bs2;
416de80f912SBarry Smith         for (i=1; i<m; i++) {
417de80f912SBarry Smith           v  = aa + bs2*ai[i];
418de80f912SBarry Smith           vi = aj + ai[i];
419de80f912SBarry Smith           nz = diag[i] - ai[i];
420de80f912SBarry Smith 
421580bdb30SBarry Smith           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
422de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
423de80f912SBarry Smith           workt = work;
424de80f912SBarry Smith           for (j=0; j<nz; j++) {
425580bdb30SBarry Smith             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
426de80f912SBarry Smith             workt += bs;
427de80f912SBarry Smith           }
42896b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
429580bdb30SBarry Smith           ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr);
43096b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
431de80f912SBarry Smith 
432de80f912SBarry Smith           idiag += bs2;
433de80f912SBarry Smith           i2    += bs;
434de80f912SBarry Smith         }
435e48d15efSToby Isaac         break;
436e48d15efSToby Isaac       }
437de80f912SBarry Smith       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
438e48d15efSToby Isaac       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
439e48d15efSToby Isaac       xb = t;
440de80f912SBarry Smith     }
441e48d15efSToby Isaac     else xb = b;
442de80f912SBarry Smith     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
443e48d15efSToby Isaac       idiag = a->idiag+bs2*(a->mbs-1);
444e48d15efSToby Isaac       i2 = bs * (m-1);
445e48d15efSToby Isaac       switch (bs) {
446e48d15efSToby Isaac       case 1:
447e48d15efSToby Isaac         s[0]  = xb[i2];
448e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
449e48d15efSToby Isaac         x[i2] = xw[0];
450e48d15efSToby Isaac         i2   -= 1;
451e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
452e48d15efSToby Isaac           v  = aa + (diag[i]+1);
453e48d15efSToby Isaac           vi = aj + diag[i] + 1;
454e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
455e48d15efSToby Isaac           s[0] = xb[i2];
456e48d15efSToby Isaac           for (j=0; j<nz; j++) {
457e48d15efSToby Isaac             xw[0] = x[vi[j]];
458e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
459e48d15efSToby Isaac           }
460e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
461e48d15efSToby Isaac           x[i2]  = xw[0];
462e48d15efSToby Isaac           idiag -= 1;
463e48d15efSToby Isaac           i2    -= 1;
464e48d15efSToby Isaac         }
465e48d15efSToby Isaac         break;
466e48d15efSToby Isaac       case 2:
467e48d15efSToby Isaac         s[0]  = xb[i2]; s[1] = xb[i2+1];
468e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
469e48d15efSToby Isaac         x[i2] = xw[0]; x[i2+1] = xw[1];
470e48d15efSToby Isaac         i2    -= 2;
471e48d15efSToby Isaac         idiag -= 4;
472e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
473e48d15efSToby Isaac           v  = aa + 4*(diag[i] + 1);
474e48d15efSToby Isaac           vi = aj + diag[i] + 1;
475e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
476e48d15efSToby Isaac           s[0] = xb[i2]; s[1] = xb[i2+1];
477e48d15efSToby Isaac           for (j=0; j<nz; j++) {
478e48d15efSToby Isaac             idx = 2*vi[j];
479e48d15efSToby Isaac             it  = 4*j;
480e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx];
481e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
482e48d15efSToby Isaac           }
483e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
484e48d15efSToby Isaac           x[i2]   = xw[0]; x[i2+1] = xw[1];
485e48d15efSToby Isaac           idiag  -= 4;
486e48d15efSToby Isaac           i2     -= 2;
487e48d15efSToby Isaac         }
488e48d15efSToby Isaac         break;
489e48d15efSToby Isaac       case 3:
490e48d15efSToby Isaac         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
491e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
492e48d15efSToby Isaac         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
493e48d15efSToby Isaac         i2    -= 3;
494e48d15efSToby Isaac         idiag -= 9;
495e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
496e48d15efSToby Isaac           v  = aa + 9*(diag[i]+1);
497e48d15efSToby Isaac           vi = aj + diag[i] + 1;
498e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
499e48d15efSToby Isaac           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
500e48d15efSToby Isaac           while (nz--) {
501e48d15efSToby Isaac             idx = 3*(*vi++);
502e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
503e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
504e48d15efSToby Isaac             v  += 9;
505e48d15efSToby Isaac           }
506e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
507e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
508e48d15efSToby Isaac           idiag  -= 9;
509e48d15efSToby Isaac           i2     -= 3;
510e48d15efSToby Isaac         }
511e48d15efSToby Isaac         break;
512e48d15efSToby Isaac       case 4:
513e48d15efSToby Isaac         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
514e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
515e48d15efSToby Isaac         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
516e48d15efSToby Isaac         i2    -= 4;
517e48d15efSToby Isaac         idiag -= 16;
518e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
519e48d15efSToby Isaac           v  = aa + 16*(diag[i]+1);
520e48d15efSToby Isaac           vi = aj + diag[i] + 1;
521e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
522e48d15efSToby Isaac           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
523e48d15efSToby Isaac           while (nz--) {
524e48d15efSToby Isaac             idx = 4*(*vi++);
525e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
526e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
527e48d15efSToby Isaac             v  += 16;
528e48d15efSToby Isaac           }
529e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
530e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
531e48d15efSToby Isaac           idiag  -= 16;
532e48d15efSToby Isaac           i2     -= 4;
533e48d15efSToby Isaac         }
534e48d15efSToby Isaac         break;
535e48d15efSToby Isaac       case 5:
536e48d15efSToby Isaac         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
537e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
538e48d15efSToby Isaac         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
539e48d15efSToby Isaac         i2    -= 5;
540e48d15efSToby Isaac         idiag -= 25;
541e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
542e48d15efSToby Isaac           v  = aa + 25*(diag[i]+1);
543e48d15efSToby Isaac           vi = aj + diag[i] + 1;
544e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
545e48d15efSToby Isaac           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
546e48d15efSToby Isaac           while (nz--) {
547e48d15efSToby Isaac             idx = 5*(*vi++);
548e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
549e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
550e48d15efSToby Isaac             v  += 25;
551e48d15efSToby Isaac           }
552e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
553e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
554e48d15efSToby Isaac           idiag  -= 25;
555e48d15efSToby Isaac           i2     -= 5;
556e48d15efSToby Isaac         }
557e48d15efSToby Isaac         break;
558e48d15efSToby Isaac       case 6:
559e48d15efSToby Isaac         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
560e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
561e48d15efSToby Isaac         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
562e48d15efSToby Isaac         i2    -= 6;
563e48d15efSToby Isaac         idiag -= 36;
564e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
565e48d15efSToby Isaac           v  = aa + 36*(diag[i]+1);
566e48d15efSToby Isaac           vi = aj + diag[i] + 1;
567e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
568e48d15efSToby Isaac           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
569e48d15efSToby Isaac           while (nz--) {
570e48d15efSToby Isaac             idx = 6*(*vi++);
571e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
572e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
573e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
574e48d15efSToby Isaac             v  += 36;
575e48d15efSToby Isaac           }
576e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
577e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
578e48d15efSToby Isaac           idiag  -= 36;
579e48d15efSToby Isaac           i2     -= 6;
580e48d15efSToby Isaac         }
581e48d15efSToby Isaac         break;
582e48d15efSToby Isaac       case 7:
583e48d15efSToby Isaac         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
584e48d15efSToby Isaac         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
585e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
586e48d15efSToby Isaac         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
587e48d15efSToby Isaac         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
588e48d15efSToby Isaac         i2    -= 7;
589e48d15efSToby Isaac         idiag -= 49;
590e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
591e48d15efSToby Isaac           v  = aa + 49*(diag[i]+1);
592e48d15efSToby Isaac           vi = aj + diag[i] + 1;
593e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
594e48d15efSToby Isaac           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
595e48d15efSToby Isaac           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
596e48d15efSToby Isaac           while (nz--) {
597e48d15efSToby Isaac             idx = 7*(*vi++);
598e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
599e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
600e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
601e48d15efSToby Isaac             v  += 49;
602e48d15efSToby Isaac           }
603e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
604e48d15efSToby Isaac           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
605e48d15efSToby Isaac           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
606e48d15efSToby Isaac           idiag  -= 49;
607e48d15efSToby Isaac           i2     -= 7;
608e48d15efSToby Isaac         }
609e48d15efSToby Isaac         break;
610e48d15efSToby Isaac       default:
611580bdb30SBarry Smith         ierr  = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
61296b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
613de80f912SBarry Smith         i2    -= bs;
614e48d15efSToby Isaac         idiag -= bs2;
615de80f912SBarry Smith         for (i=m-2; i>=0; i--) {
616de80f912SBarry Smith           v  = aa + bs2*(diag[i]+1);
617de80f912SBarry Smith           vi = aj + diag[i] + 1;
618de80f912SBarry Smith           nz = ai[i+1] - diag[i] - 1;
619de80f912SBarry Smith 
620580bdb30SBarry Smith           ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
621de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
622de80f912SBarry Smith           workt = work;
623de80f912SBarry Smith           for (j=0; j<nz; j++) {
624580bdb30SBarry Smith             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
625de80f912SBarry Smith             workt += bs;
626de80f912SBarry Smith           }
62796b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
62896b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
629e48d15efSToby Isaac 
630de80f912SBarry Smith           idiag -= bs2;
631de80f912SBarry Smith           i2    -= bs;
632de80f912SBarry Smith         }
633e48d15efSToby Isaac         break;
634e48d15efSToby Isaac       }
635de80f912SBarry Smith       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
636de80f912SBarry Smith     }
637e48d15efSToby Isaac     its--;
638e48d15efSToby Isaac   }
639e48d15efSToby Isaac   while (its--) {
640e48d15efSToby Isaac     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
641e48d15efSToby Isaac       idiag = a->idiag;
642e48d15efSToby Isaac       i2 = 0;
643e48d15efSToby Isaac       switch (bs) {
644e48d15efSToby Isaac       case 1:
645e48d15efSToby Isaac         for (i=0; i<m; i++) {
646e48d15efSToby Isaac           v  = aa + ai[i];
647e48d15efSToby Isaac           vi = aj + ai[i];
648e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
649e48d15efSToby Isaac           s[0] = b[i2];
650e48d15efSToby Isaac           for (j=0; j<nz; j++) {
651e48d15efSToby Isaac             xw[0] = x[vi[j]];
652e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
653e48d15efSToby Isaac           }
654e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
655e48d15efSToby Isaac           x[i2] += xw[0];
656e48d15efSToby Isaac           idiag += 1;
657e48d15efSToby Isaac           i2    += 1;
658e48d15efSToby Isaac         }
659e48d15efSToby Isaac         break;
660e48d15efSToby Isaac       case 2:
661e48d15efSToby Isaac         for (i=0; i<m; i++) {
662e48d15efSToby Isaac           v  = aa + 4*ai[i];
663e48d15efSToby Isaac           vi = aj + ai[i];
664e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
665e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1];
666e48d15efSToby Isaac           for (j=0; j<nz; j++) {
667e48d15efSToby Isaac             idx = 2*vi[j];
668e48d15efSToby Isaac             it  = 4*j;
669e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx];
670e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
671e48d15efSToby Isaac           }
672e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
673e48d15efSToby Isaac           x[i2]  += xw[0]; x[i2+1] += xw[1];
674e48d15efSToby Isaac           idiag  += 4;
675e48d15efSToby Isaac           i2     += 2;
676e48d15efSToby Isaac         }
677e48d15efSToby Isaac         break;
678e48d15efSToby Isaac       case 3:
679e48d15efSToby Isaac         for (i=0; i<m; i++) {
680e48d15efSToby Isaac           v  = aa + 9*ai[i];
681e48d15efSToby Isaac           vi = aj + ai[i];
682e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
683e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
684e48d15efSToby Isaac           while (nz--) {
685e48d15efSToby Isaac             idx = 3*(*vi++);
686e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
687e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
688e48d15efSToby Isaac             v  += 9;
689e48d15efSToby Isaac           }
690e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
691e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
692e48d15efSToby Isaac           idiag  += 9;
693e48d15efSToby Isaac           i2     += 3;
694e48d15efSToby Isaac         }
695e48d15efSToby Isaac         break;
696e48d15efSToby Isaac       case 4:
697e48d15efSToby Isaac         for (i=0; i<m; i++) {
698e48d15efSToby Isaac           v  = aa + 16*ai[i];
699e48d15efSToby Isaac           vi = aj + ai[i];
700e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
701e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
702e48d15efSToby Isaac           while (nz--) {
703e48d15efSToby Isaac             idx = 4*(*vi++);
704e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
705e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
706e48d15efSToby Isaac             v  += 16;
707e48d15efSToby Isaac           }
708e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
709e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
710e48d15efSToby Isaac           idiag  += 16;
711e48d15efSToby Isaac           i2     += 4;
712e48d15efSToby Isaac         }
713e48d15efSToby Isaac         break;
714e48d15efSToby Isaac       case 5:
715e48d15efSToby Isaac         for (i=0; i<m; i++) {
716e48d15efSToby Isaac           v  = aa + 25*ai[i];
717e48d15efSToby Isaac           vi = aj + ai[i];
718e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
719e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
720e48d15efSToby Isaac           while (nz--) {
721e48d15efSToby Isaac             idx = 5*(*vi++);
722e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
723e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
724e48d15efSToby Isaac             v  += 25;
725e48d15efSToby Isaac           }
726e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
727e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
728e48d15efSToby Isaac           idiag  += 25;
729e48d15efSToby Isaac           i2     += 5;
730e48d15efSToby Isaac         }
731e48d15efSToby Isaac         break;
732e48d15efSToby Isaac       case 6:
733e48d15efSToby Isaac         for (i=0; i<m; i++) {
734e48d15efSToby Isaac           v  = aa + 36*ai[i];
735e48d15efSToby Isaac           vi = aj + ai[i];
736e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
737e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
738e48d15efSToby Isaac           while (nz--) {
739e48d15efSToby Isaac             idx = 6*(*vi++);
740e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
741e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
742e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
743e48d15efSToby Isaac             v  += 36;
744e48d15efSToby Isaac           }
745e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
746e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
747e48d15efSToby Isaac           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
748e48d15efSToby Isaac           idiag  += 36;
749e48d15efSToby Isaac           i2     += 6;
750e48d15efSToby Isaac         }
751e48d15efSToby Isaac         break;
752e48d15efSToby Isaac       case 7:
753e48d15efSToby Isaac         for (i=0; i<m; i++) {
754e48d15efSToby Isaac           v  = aa + 49*ai[i];
755e48d15efSToby Isaac           vi = aj + ai[i];
756e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
757e48d15efSToby Isaac           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
758e48d15efSToby Isaac           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
759e48d15efSToby Isaac           while (nz--) {
760e48d15efSToby Isaac             idx = 7*(*vi++);
761e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
762e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
763e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
764e48d15efSToby Isaac             v  += 49;
765e48d15efSToby Isaac           }
766e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
767e48d15efSToby Isaac           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
768e48d15efSToby Isaac           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
769e48d15efSToby Isaac           idiag  += 49;
770e48d15efSToby Isaac           i2     += 7;
771e48d15efSToby Isaac         }
772e48d15efSToby Isaac         break;
773e48d15efSToby Isaac       default:
774e48d15efSToby Isaac         for (i=0; i<m; i++) {
775e48d15efSToby Isaac           v  = aa + bs2*ai[i];
776e48d15efSToby Isaac           vi = aj + ai[i];
777e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
778e48d15efSToby Isaac 
779580bdb30SBarry Smith           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
780e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
781e48d15efSToby Isaac           workt = work;
782e48d15efSToby Isaac           for (j=0; j<nz; j++) {
783580bdb30SBarry Smith             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
784e48d15efSToby Isaac             workt += bs;
785e48d15efSToby Isaac           }
786e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
787e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
788e48d15efSToby Isaac 
789e48d15efSToby Isaac           idiag += bs2;
790e48d15efSToby Isaac           i2    += bs;
791e48d15efSToby Isaac         }
792e48d15efSToby Isaac         break;
793e48d15efSToby Isaac       }
794e48d15efSToby Isaac       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
795e48d15efSToby Isaac     }
796e48d15efSToby Isaac     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
797e48d15efSToby Isaac       idiag = a->idiag+bs2*(a->mbs-1);
798e48d15efSToby Isaac       i2 = bs * (m-1);
799e48d15efSToby Isaac       switch (bs) {
800e48d15efSToby Isaac       case 1:
801e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
802e48d15efSToby Isaac           v  = aa + ai[i];
803e48d15efSToby Isaac           vi = aj + ai[i];
804e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
805e48d15efSToby Isaac           s[0] = b[i2];
806e48d15efSToby Isaac           for (j=0; j<nz; j++) {
807e48d15efSToby Isaac             xw[0] = x[vi[j]];
808e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
809e48d15efSToby Isaac           }
810e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
811e48d15efSToby Isaac           x[i2] += xw[0];
812e48d15efSToby Isaac           idiag -= 1;
813e48d15efSToby Isaac           i2    -= 1;
814e48d15efSToby Isaac         }
815e48d15efSToby Isaac         break;
816e48d15efSToby Isaac       case 2:
817e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
818e48d15efSToby Isaac           v  = aa + 4*ai[i];
819e48d15efSToby Isaac           vi = aj + ai[i];
820e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
821e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1];
822e48d15efSToby Isaac           for (j=0; j<nz; j++) {
823e48d15efSToby Isaac             idx = 2*vi[j];
824e48d15efSToby Isaac             it  = 4*j;
825e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx];
826e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
827e48d15efSToby Isaac           }
828e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
829e48d15efSToby Isaac           x[i2]  += xw[0]; x[i2+1] += xw[1];
830e48d15efSToby Isaac           idiag  -= 4;
831e48d15efSToby Isaac           i2     -= 2;
832e48d15efSToby Isaac         }
833e48d15efSToby Isaac         break;
834e48d15efSToby Isaac       case 3:
835e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
836e48d15efSToby Isaac           v  = aa + 9*ai[i];
837e48d15efSToby Isaac           vi = aj + ai[i];
838e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
839e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
840e48d15efSToby Isaac           while (nz--) {
841e48d15efSToby Isaac             idx = 3*(*vi++);
842e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
843e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
844e48d15efSToby Isaac             v  += 9;
845e48d15efSToby Isaac           }
846e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
847e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
848e48d15efSToby Isaac           idiag  -= 9;
849e48d15efSToby Isaac           i2     -= 3;
850e48d15efSToby Isaac         }
851e48d15efSToby Isaac         break;
852e48d15efSToby Isaac       case 4:
853e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
854e48d15efSToby Isaac           v  = aa + 16*ai[i];
855e48d15efSToby Isaac           vi = aj + ai[i];
856e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
857e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
858e48d15efSToby Isaac           while (nz--) {
859e48d15efSToby Isaac             idx = 4*(*vi++);
860e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
861e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
862e48d15efSToby Isaac             v  += 16;
863e48d15efSToby Isaac           }
864e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
865e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
866e48d15efSToby Isaac           idiag  -= 16;
867e48d15efSToby Isaac           i2     -= 4;
868e48d15efSToby Isaac         }
869e48d15efSToby Isaac         break;
870e48d15efSToby Isaac       case 5:
871e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
872e48d15efSToby Isaac           v  = aa + 25*ai[i];
873e48d15efSToby Isaac           vi = aj + ai[i];
874e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
875e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
876e48d15efSToby Isaac           while (nz--) {
877e48d15efSToby Isaac             idx = 5*(*vi++);
878e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
879e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
880e48d15efSToby Isaac             v  += 25;
881e48d15efSToby Isaac           }
882e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
883e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
884e48d15efSToby Isaac           idiag  -= 25;
885e48d15efSToby Isaac           i2     -= 5;
886e48d15efSToby Isaac         }
887e48d15efSToby Isaac         break;
888e48d15efSToby Isaac       case 6:
889e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
890e48d15efSToby Isaac           v  = aa + 36*ai[i];
891e48d15efSToby Isaac           vi = aj + ai[i];
892e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
893e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
894e48d15efSToby Isaac           while (nz--) {
895e48d15efSToby Isaac             idx = 6*(*vi++);
896e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
897e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
898e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
899e48d15efSToby Isaac             v  += 36;
900e48d15efSToby Isaac           }
901e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
902e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
903e48d15efSToby Isaac           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
904e48d15efSToby Isaac           idiag  -= 36;
905e48d15efSToby Isaac           i2     -= 6;
906e48d15efSToby Isaac         }
907e48d15efSToby Isaac         break;
908e48d15efSToby Isaac       case 7:
909e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
910e48d15efSToby Isaac           v  = aa + 49*ai[i];
911e48d15efSToby Isaac           vi = aj + ai[i];
912e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
913e48d15efSToby Isaac           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
914e48d15efSToby Isaac           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
915e48d15efSToby Isaac           while (nz--) {
916e48d15efSToby Isaac             idx = 7*(*vi++);
917e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
918e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
919e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
920e48d15efSToby Isaac             v  += 49;
921e48d15efSToby Isaac           }
922e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
923e48d15efSToby Isaac           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
924e48d15efSToby Isaac           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
925e48d15efSToby Isaac           idiag  -= 49;
926e48d15efSToby Isaac           i2     -= 7;
927e48d15efSToby Isaac         }
928e48d15efSToby Isaac         break;
929e48d15efSToby Isaac       default:
930e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
931e48d15efSToby Isaac           v  = aa + bs2*ai[i];
932e48d15efSToby Isaac           vi = aj + ai[i];
933e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
934e48d15efSToby Isaac 
935580bdb30SBarry Smith           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
936e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
937e48d15efSToby Isaac           workt = work;
938e48d15efSToby Isaac           for (j=0; j<nz; j++) {
939580bdb30SBarry Smith             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
940e48d15efSToby Isaac             workt += bs;
941e48d15efSToby Isaac           }
942e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
943e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
944e48d15efSToby Isaac 
945e48d15efSToby Isaac           idiag -= bs2;
946e48d15efSToby Isaac           i2    -= bs;
947e48d15efSToby Isaac         }
948e48d15efSToby Isaac         break;
949e48d15efSToby Isaac       }
950e48d15efSToby Isaac       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
951e48d15efSToby Isaac     }
952e48d15efSToby Isaac   }
953de80f912SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
954de80f912SBarry Smith   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
955de80f912SBarry Smith   PetscFunctionReturn(0);
956de80f912SBarry Smith }
957de80f912SBarry Smith 
958af674e45SBarry Smith /*
95981824310SBarry Smith     Special version for direct calls from Fortran (Used in PETSc-fun3d)
960af674e45SBarry Smith */
961af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
962af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
963af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
964af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4
965af674e45SBarry Smith #endif
966af674e45SBarry Smith 
9678cc058d9SJed Brown PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
968af674e45SBarry Smith {
969af674e45SBarry Smith   Mat               A  = *AA;
970af674e45SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
971c1ac3661SBarry Smith   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
972c1ac3661SBarry Smith   PetscInt          *ai    =a->i,*ailen=a->ilen;
97317ec6a02SBarry Smith   PetscInt          *aj    =a->j,stepval,lastcol = -1;
974f15d580aSBarry Smith   const PetscScalar *value = v;
9754bb09213Spetsc   MatScalar         *ap,*aa = a->a,*bap;
97670990e77SSatish Balay   PetscErrorCode    ierr;
977af674e45SBarry Smith 
978af674e45SBarry Smith   PetscFunctionBegin;
979ce94432eSBarry Smith   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
980af674e45SBarry Smith   stepval = (n-1)*4;
981af674e45SBarry Smith   for (k=0; k<m; k++) { /* loop over added rows */
982af674e45SBarry Smith     row  = im[k];
983af674e45SBarry Smith     rp   = aj + ai[row];
984af674e45SBarry Smith     ap   = aa + 16*ai[row];
985af674e45SBarry Smith     nrow = ailen[row];
986af674e45SBarry Smith     low  = 0;
98717ec6a02SBarry Smith     high = nrow;
988af674e45SBarry Smith     for (l=0; l<n; l++) { /* loop over added columns */
989af674e45SBarry Smith       col = in[l];
990db4deed7SKarl Rupp       if (col <= lastcol)  low = 0;
991db4deed7SKarl Rupp       else                high = nrow;
99217ec6a02SBarry Smith       lastcol = col;
9931e3347e8SBarry Smith       value   = v + k*(stepval+4 + l)*4;
994af674e45SBarry Smith       while (high-low > 7) {
995af674e45SBarry Smith         t = (low+high)/2;
996af674e45SBarry Smith         if (rp[t] > col) high = t;
997af674e45SBarry Smith         else             low  = t;
998af674e45SBarry Smith       }
999af674e45SBarry Smith       for (i=low; i<high; i++) {
1000af674e45SBarry Smith         if (rp[i] > col) break;
1001af674e45SBarry Smith         if (rp[i] == col) {
1002af674e45SBarry Smith           bap = ap +  16*i;
1003af674e45SBarry Smith           for (ii=0; ii<4; ii++,value+=stepval) {
1004af674e45SBarry Smith             for (jj=ii; jj<16; jj+=4) {
1005af674e45SBarry Smith               bap[jj] += *value++;
1006af674e45SBarry Smith             }
1007af674e45SBarry Smith           }
1008af674e45SBarry Smith           goto noinsert2;
1009af674e45SBarry Smith         }
1010af674e45SBarry Smith       }
1011af674e45SBarry Smith       N = nrow++ - 1;
101217ec6a02SBarry Smith       high++; /* added new column index thus must search to one higher than before */
1013af674e45SBarry Smith       /* shift up all the later entries in this row */
1014af674e45SBarry Smith       for (ii=N; ii>=i; ii--) {
1015af674e45SBarry Smith         rp[ii+1] = rp[ii];
101670990e77SSatish Balay         ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr);
1017af674e45SBarry Smith       }
1018af674e45SBarry Smith       if (N >= i) {
101970990e77SSatish Balay         ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1020af674e45SBarry Smith       }
1021af674e45SBarry Smith       rp[i] = col;
1022af674e45SBarry Smith       bap   = ap +  16*i;
1023af674e45SBarry Smith       for (ii=0; ii<4; ii++,value+=stepval) {
1024af674e45SBarry Smith         for (jj=ii; jj<16; jj+=4) {
1025af674e45SBarry Smith           bap[jj] = *value++;
1026af674e45SBarry Smith         }
1027af674e45SBarry Smith       }
1028af674e45SBarry Smith       noinsert2:;
1029af674e45SBarry Smith       low = i;
1030af674e45SBarry Smith     }
1031af674e45SBarry Smith     ailen[row] = nrow;
1032af674e45SBarry Smith   }
1033be1d678aSKris Buschelman   PetscFunctionReturnVoid();
1034af674e45SBarry Smith }
1035af674e45SBarry Smith 
1036af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
1037af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4
1038af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1039af674e45SBarry Smith #define matsetvalues4_ matsetvalues4
1040af674e45SBarry Smith #endif
1041af674e45SBarry Smith 
10428cc058d9SJed Brown PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1043af674e45SBarry Smith {
1044af674e45SBarry Smith   Mat         A  = *AA;
1045af674e45SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1046580bdb30SBarry Smith   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
1047c1ac3661SBarry Smith   PetscInt    *ai=a->i,*ailen=a->ilen;
1048c1ac3661SBarry Smith   PetscInt    *aj=a->j,brow,bcol;
104917ec6a02SBarry Smith   PetscInt    ridx,cidx,lastcol = -1;
1050af674e45SBarry Smith   MatScalar   *ap,value,*aa=a->a,*bap;
105170990e77SSatish Balay   PetscErrorCode ierr;
1052af674e45SBarry Smith 
1053af674e45SBarry Smith   PetscFunctionBegin;
1054af674e45SBarry Smith   for (k=0; k<m; k++) { /* loop over added rows */
1055af674e45SBarry Smith     row  = im[k]; brow = row/4;
1056af674e45SBarry Smith     rp   = aj + ai[brow];
1057af674e45SBarry Smith     ap   = aa + 16*ai[brow];
1058af674e45SBarry Smith     nrow = ailen[brow];
1059af674e45SBarry Smith     low  = 0;
106017ec6a02SBarry Smith     high = nrow;
1061af674e45SBarry Smith     for (l=0; l<n; l++) { /* loop over added columns */
1062af674e45SBarry Smith       col   = in[l]; bcol = col/4;
1063af674e45SBarry Smith       ridx  = row % 4; cidx = col % 4;
1064af674e45SBarry Smith       value = v[l + k*n];
1065db4deed7SKarl Rupp       if (col <= lastcol)  low = 0;
1066db4deed7SKarl Rupp       else                high = nrow;
106717ec6a02SBarry Smith       lastcol = col;
1068af674e45SBarry Smith       while (high-low > 7) {
1069af674e45SBarry Smith         t = (low+high)/2;
1070af674e45SBarry Smith         if (rp[t] > bcol) high = t;
1071af674e45SBarry Smith         else              low  = t;
1072af674e45SBarry Smith       }
1073af674e45SBarry Smith       for (i=low; i<high; i++) {
1074af674e45SBarry Smith         if (rp[i] > bcol) break;
1075af674e45SBarry Smith         if (rp[i] == bcol) {
1076af674e45SBarry Smith           bap   = ap +  16*i + 4*cidx + ridx;
1077af674e45SBarry Smith           *bap += value;
1078af674e45SBarry Smith           goto noinsert1;
1079af674e45SBarry Smith         }
1080af674e45SBarry Smith       }
1081af674e45SBarry Smith       N = nrow++ - 1;
108217ec6a02SBarry Smith       high++; /* added new column thus must search to one higher than before */
1083af674e45SBarry Smith       /* shift up all the later entries in this row */
108470990e77SSatish Balay       ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr);
108570990e77SSatish Balay       ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr);
108670990e77SSatish Balay       ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1087af674e45SBarry Smith       rp[i]                    = bcol;
1088af674e45SBarry Smith       ap[16*i + 4*cidx + ridx] = value;
1089af674e45SBarry Smith noinsert1:;
1090af674e45SBarry Smith       low = i;
1091af674e45SBarry Smith     }
1092af674e45SBarry Smith     ailen[brow] = nrow;
1093af674e45SBarry Smith   }
1094be1d678aSKris Buschelman   PetscFunctionReturnVoid();
1095af674e45SBarry Smith }
1096af674e45SBarry Smith 
1097be5855fcSBarry Smith /*
1098be5855fcSBarry Smith      Checks for missing diagonals
1099be5855fcSBarry Smith */
1100ace3abfcSBarry Smith PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1101be5855fcSBarry Smith {
1102be5855fcSBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
11036849ba73SBarry Smith   PetscErrorCode ierr;
11047734d3b5SMatthew G. Knepley   PetscInt       *diag,*ii = a->i,i;
1105be5855fcSBarry Smith 
1106be5855fcSBarry Smith   PetscFunctionBegin;
1107c4992f7dSBarry Smith   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
11082af78befSBarry Smith   *missing = PETSC_FALSE;
11097734d3b5SMatthew G. Knepley   if (A->rmap->n > 0 && !ii) {
11102efa7f71SHong Zhang     *missing = PETSC_TRUE;
11112efa7f71SHong Zhang     if (d) *d = 0;
1112994fe344SLisandro Dalcin     ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
11132efa7f71SHong Zhang   } else {
111401445905SHong Zhang     PetscInt n;
111501445905SHong Zhang     n = PetscMin(a->mbs, a->nbs);
1116883fce79SBarry Smith     diag = a->diag;
111701445905SHong Zhang     for (i=0; i<n; i++) {
11187734d3b5SMatthew G. Knepley       if (diag[i] >= ii[i+1]) {
11192af78befSBarry Smith         *missing = PETSC_TRUE;
11202af78befSBarry Smith         if (d) *d = i;
11217d3de750SJacob Faibussowitsch         ierr = PetscInfo(A,"Matrix is missing block diagonal number %" PetscInt_FMT "\n",i);CHKERRQ(ierr);
1122358d2f5dSShri Abhyankar         break;
11232efa7f71SHong Zhang       }
1124be5855fcSBarry Smith     }
1125be5855fcSBarry Smith   }
1126be5855fcSBarry Smith   PetscFunctionReturn(0);
1127be5855fcSBarry Smith }
1128be5855fcSBarry Smith 
1129dfbe8321SBarry Smith PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1130de6a44a3SBarry Smith {
1131de6a44a3SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
11326849ba73SBarry Smith   PetscErrorCode ierr;
113309f38230SBarry Smith   PetscInt       i,j,m = a->mbs;
1134de6a44a3SBarry Smith 
11353a40ed3dSBarry Smith   PetscFunctionBegin;
113609f38230SBarry Smith   if (!a->diag) {
1137785e854fSJed Brown     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
11383bb1ff40SBarry Smith     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
11394fd072dbSBarry Smith     a->free_diag = PETSC_TRUE;
114009f38230SBarry Smith   }
11417fc0212eSBarry Smith   for (i=0; i<m; i++) {
114209f38230SBarry Smith     a->diag[i] = a->i[i+1];
1143de6a44a3SBarry Smith     for (j=a->i[i]; j<a->i[i+1]; j++) {
1144de6a44a3SBarry Smith       if (a->j[j] == i) {
114509f38230SBarry Smith         a->diag[i] = j;
1146de6a44a3SBarry Smith         break;
1147de6a44a3SBarry Smith       }
1148de6a44a3SBarry Smith     }
1149de6a44a3SBarry Smith   }
11503a40ed3dSBarry Smith   PetscFunctionReturn(0);
1151de6a44a3SBarry Smith }
11522593348eSBarry Smith 
11531a83f524SJed Brown static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
11543b2fbd54SBarry Smith {
11553b2fbd54SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1156dfbe8321SBarry Smith   PetscErrorCode ierr;
11571a83f524SJed Brown   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
11581a83f524SJed Brown   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
11593b2fbd54SBarry Smith 
11603a40ed3dSBarry Smith   PetscFunctionBegin;
11613b2fbd54SBarry Smith   *nn = n;
11623a40ed3dSBarry Smith   if (!ia) PetscFunctionReturn(0);
11633b2fbd54SBarry Smith   if (symmetric) {
11642462f5fdSStefano Zampini     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1165553b3c51SBarry Smith     nz   = tia[n];
11663b2fbd54SBarry Smith   } else {
11678f7157efSSatish Balay     tia = a->i; tja = a->j;
11683b2fbd54SBarry Smith   }
11693b2fbd54SBarry Smith 
1170ecc77c7aSBarry Smith   if (!blockcompressed && bs > 1) {
1171ecc77c7aSBarry Smith     (*nn) *= bs;
11728f7157efSSatish Balay     /* malloc & create the natural set of indices */
1173785e854fSJed Brown     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
11749985e31cSBarry Smith     if (n) {
11752462f5fdSStefano Zampini       (*ia)[0] = oshift;
1176ecc77c7aSBarry Smith       for (j=1; j<bs; j++) {
1177ecc77c7aSBarry Smith         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1178ecc77c7aSBarry Smith       }
11799985e31cSBarry Smith     }
1180ecc77c7aSBarry Smith 
1181ecc77c7aSBarry Smith     for (i=1; i<n; i++) {
1182ecc77c7aSBarry Smith       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1183ecc77c7aSBarry Smith       for (j=1; j<bs; j++) {
1184ecc77c7aSBarry Smith         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
11858f7157efSSatish Balay       }
11868f7157efSSatish Balay     }
11879985e31cSBarry Smith     if (n) {
1188ecc77c7aSBarry Smith       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
11899985e31cSBarry Smith     }
1190ecc77c7aSBarry Smith 
11911a83f524SJed Brown     if (inja) {
1192785e854fSJed Brown       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
11939985e31cSBarry Smith       cnt = 0;
11949985e31cSBarry Smith       for (i=0; i<n; i++) {
11959985e31cSBarry Smith         for (j=0; j<bs; j++) {
11969985e31cSBarry Smith           for (k=tia[i]; k<tia[i+1]; k++) {
11979985e31cSBarry Smith             for (l=0; l<bs; l++) {
11989985e31cSBarry Smith               (*ja)[cnt++] = bs*tja[k] + l;
11999985e31cSBarry Smith             }
12009985e31cSBarry Smith           }
12019985e31cSBarry Smith         }
12029985e31cSBarry Smith       }
12039985e31cSBarry Smith     }
12049985e31cSBarry Smith 
12058f7157efSSatish Balay     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
12068f7157efSSatish Balay       ierr = PetscFree(tia);CHKERRQ(ierr);
12078f7157efSSatish Balay       ierr = PetscFree(tja);CHKERRQ(ierr);
12088f7157efSSatish Balay     }
1209f6d58c54SBarry Smith   } else if (oshift == 1) {
1210715a17b5SBarry Smith     if (symmetric) {
1211a2ea699eSBarry Smith       nz = tia[A->rmap->n/bs];
1212715a17b5SBarry Smith       /*  add 1 to i and j indices */
1213715a17b5SBarry Smith       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1214715a17b5SBarry Smith       *ia = tia;
1215715a17b5SBarry Smith       if (ja) {
1216715a17b5SBarry Smith         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1217715a17b5SBarry Smith         *ja = tja;
1218715a17b5SBarry Smith       }
1219715a17b5SBarry Smith     } else {
1220a2ea699eSBarry Smith       nz = a->i[A->rmap->n/bs];
1221f6d58c54SBarry Smith       /* malloc space and  add 1 to i and j indices */
1222854ce69bSBarry Smith       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1223f6d58c54SBarry Smith       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1224f6d58c54SBarry Smith       if (ja) {
1225785e854fSJed Brown         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1226f6d58c54SBarry Smith         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1227f6d58c54SBarry Smith       }
1228715a17b5SBarry Smith     }
12298f7157efSSatish Balay   } else {
12308f7157efSSatish Balay     *ia = tia;
1231ecc77c7aSBarry Smith     if (ja) *ja = tja;
12328f7157efSSatish Balay   }
12333a40ed3dSBarry Smith   PetscFunctionReturn(0);
12343b2fbd54SBarry Smith }
12353b2fbd54SBarry Smith 
12361a83f524SJed Brown static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
12373b2fbd54SBarry Smith {
12386849ba73SBarry Smith   PetscErrorCode ierr;
12393b2fbd54SBarry Smith 
12403a40ed3dSBarry Smith   PetscFunctionBegin;
12413a40ed3dSBarry Smith   if (!ia) PetscFunctionReturn(0);
1242715a17b5SBarry Smith   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1243606d414cSSatish Balay     ierr = PetscFree(*ia);CHKERRQ(ierr);
12449985e31cSBarry Smith     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
12453b2fbd54SBarry Smith   }
12463a40ed3dSBarry Smith   PetscFunctionReturn(0);
12473b2fbd54SBarry Smith }
12483b2fbd54SBarry Smith 
1249dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
12502d61bbb3SSatish Balay {
12512d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1252dfbe8321SBarry Smith   PetscErrorCode ierr;
12532d61bbb3SSatish Balay 
1254433994e6SBarry Smith   PetscFunctionBegin;
1255aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1256c0aa6a63SJacob Faibussowitsch   PetscLogObjectState((PetscObject)A,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT,A->rmap->N,A->cmap->n,a->nz);
12572d61bbb3SSatish Balay #endif
1258e6b907acSBarry Smith   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
12596bf464f9SBarry Smith   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
12606bf464f9SBarry Smith   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
12614fd072dbSBarry Smith   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
126205b42c5fSBarry Smith   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
12634fd072dbSBarry Smith   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
126405b42c5fSBarry Smith   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
126505b42c5fSBarry Smith   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1266f361c04dSBarry Smith   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1267de80f912SBarry Smith   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
12686bf464f9SBarry Smith   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
126905b42c5fSBarry Smith   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1270cd6b891eSBarry Smith   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1271c4319e64SHong Zhang 
12726bf464f9SBarry Smith   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
12736bf464f9SBarry Smith   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1274bf0cc555SLisandro Dalcin   ierr = PetscFree(A->data);CHKERRQ(ierr);
1275901853e0SKris Buschelman 
1276f4259b30SLisandro Dalcin   ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr);
1277cda14afcSprj-   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr);
1278cda14afcSprj-   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr);
1279bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1280bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1281bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1282bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1283bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1284bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1285bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1286bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1287bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
12887ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
1289c9225affSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr);
12907ea3e4caSstefano_zampini #endif
1291c9225affSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr);
12922d61bbb3SSatish Balay   PetscFunctionReturn(0);
12932d61bbb3SSatish Balay }
12942d61bbb3SSatish Balay 
1295ace3abfcSBarry Smith PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
12962d61bbb3SSatish Balay {
12972d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
129863ba0a88SBarry Smith   PetscErrorCode ierr;
12992d61bbb3SSatish Balay 
13002d61bbb3SSatish Balay   PetscFunctionBegin;
1301aa275fccSKris Buschelman   switch (op) {
1302aa275fccSKris Buschelman   case MAT_ROW_ORIENTED:
13034e0d8c25SBarry Smith     a->roworiented = flg;
1304aa275fccSKris Buschelman     break;
1305a9817697SBarry Smith   case MAT_KEEP_NONZERO_PATTERN:
1306a9817697SBarry Smith     a->keepnonzeropattern = flg;
1307aa275fccSKris Buschelman     break;
1308512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
1309512a5fc5SBarry Smith     a->nonew = (flg ? 0 : 1);
1310aa275fccSKris Buschelman     break;
1311aa275fccSKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
13124e0d8c25SBarry Smith     a->nonew = (flg ? -1 : 0);
1313aa275fccSKris Buschelman     break;
1314aa275fccSKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
13154e0d8c25SBarry Smith     a->nonew = (flg ? -2 : 0);
1316aa275fccSKris Buschelman     break;
131728b2fa4aSMatthew Knepley   case MAT_UNUSED_NONZERO_LOCATION_ERR:
131828b2fa4aSMatthew Knepley     a->nounused = (flg ? -1 : 0);
131928b2fa4aSMatthew Knepley     break;
13208c78258cSHong Zhang   case MAT_FORCE_DIAGONAL_ENTRIES:
1321aa275fccSKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
1322aa275fccSKris Buschelman   case MAT_USE_HASH_TABLE:
1323071fcb05SBarry Smith   case MAT_SORTED_FULL:
13247d3de750SJacob Faibussowitsch     ierr = PetscInfo(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1325aa275fccSKris Buschelman     break;
13265021d80fSJed Brown   case MAT_SPD:
132777e54ba9SKris Buschelman   case MAT_SYMMETRIC:
132877e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
13299a4540c5SBarry Smith   case MAT_HERMITIAN:
13309a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
1331c10200c1SHong Zhang   case MAT_SUBMAT_SINGLEIS:
1332672ba085SHong Zhang   case MAT_STRUCTURE_ONLY:
13335021d80fSJed Brown     /* These options are handled directly by MatSetOption() */
133477e54ba9SKris Buschelman     break;
1335aa275fccSKris Buschelman   default:
133698921bdaSJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
13372d61bbb3SSatish Balay   }
13382d61bbb3SSatish Balay   PetscFunctionReturn(0);
13392d61bbb3SSatish Balay }
13402d61bbb3SSatish Balay 
134152768537SHong Zhang /* used for both SeqBAIJ and SeqSBAIJ matrices */
134252768537SHong Zhang PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
13432d61bbb3SSatish Balay {
13446849ba73SBarry Smith   PetscErrorCode ierr;
134552768537SHong Zhang   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
134652768537SHong Zhang   MatScalar      *aa_i;
134787828ca2SBarry Smith   PetscScalar    *v_i;
13482d61bbb3SSatish Balay 
13492d61bbb3SSatish Balay   PetscFunctionBegin;
1350d0f46423SBarry Smith   bs  = A->rmap->bs;
135152768537SHong Zhang   bs2 = bs*bs;
13522c71b3e2SJacob Faibussowitsch   PetscCheckFalse(row < 0 || row >= A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range", row);
13532d61bbb3SSatish Balay 
13542d61bbb3SSatish Balay   bn  = row/bs;   /* Block number */
13552d61bbb3SSatish Balay   bp  = row % bs; /* Block Position */
13562d61bbb3SSatish Balay   M   = ai[bn+1] - ai[bn];
13572d61bbb3SSatish Balay   *nz = bs*M;
13582d61bbb3SSatish Balay 
13592d61bbb3SSatish Balay   if (v) {
1360f4259b30SLisandro Dalcin     *v = NULL;
13612d61bbb3SSatish Balay     if (*nz) {
1362854ce69bSBarry Smith       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
13632d61bbb3SSatish Balay       for (i=0; i<M; i++) { /* for each block in the block row */
13642d61bbb3SSatish Balay         v_i  = *v + i*bs;
13652d61bbb3SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
136626fbe8dcSKarl Rupp         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
13672d61bbb3SSatish Balay       }
13682d61bbb3SSatish Balay     }
13692d61bbb3SSatish Balay   }
13702d61bbb3SSatish Balay 
13712d61bbb3SSatish Balay   if (idx) {
1372f4259b30SLisandro Dalcin     *idx = NULL;
13732d61bbb3SSatish Balay     if (*nz) {
1374854ce69bSBarry Smith       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
13752d61bbb3SSatish Balay       for (i=0; i<M; i++) { /* for each block in the block row */
13762d61bbb3SSatish Balay         idx_i = *idx + i*bs;
13772d61bbb3SSatish Balay         itmp  = bs*aj[ai[bn] + i];
137826fbe8dcSKarl Rupp         for (j=0; j<bs; j++) idx_i[j] = itmp++;
13792d61bbb3SSatish Balay       }
13802d61bbb3SSatish Balay     }
13812d61bbb3SSatish Balay   }
13822d61bbb3SSatish Balay   PetscFunctionReturn(0);
13832d61bbb3SSatish Balay }
13842d61bbb3SSatish Balay 
138552768537SHong Zhang PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
138652768537SHong Zhang {
138752768537SHong Zhang   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
138852768537SHong Zhang   PetscErrorCode ierr;
138952768537SHong Zhang 
139052768537SHong Zhang   PetscFunctionBegin;
139152768537SHong Zhang   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
139252768537SHong Zhang   PetscFunctionReturn(0);
139352768537SHong Zhang }
139452768537SHong Zhang 
1395c1ac3661SBarry Smith PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
13962d61bbb3SSatish Balay {
1397dfbe8321SBarry Smith   PetscErrorCode ierr;
1398606d414cSSatish Balay 
13992d61bbb3SSatish Balay   PetscFunctionBegin;
1400cb4a9cd9SHong Zhang   if (nz)  *nz = 0;
140105b42c5fSBarry Smith   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
140205b42c5fSBarry Smith   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
14032d61bbb3SSatish Balay   PetscFunctionReturn(0);
14042d61bbb3SSatish Balay }
14052d61bbb3SSatish Balay 
1406fc4dec0aSBarry Smith PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
14072d61bbb3SSatish Balay {
140820e84f26SHong Zhang   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
14092d61bbb3SSatish Balay   Mat            C;
14106849ba73SBarry Smith   PetscErrorCode ierr;
141120e84f26SHong Zhang   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
141220e84f26SHong Zhang   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
141320e84f26SHong Zhang   MatScalar      *ata,*aa=a->a;
14142d61bbb3SSatish Balay 
14152d61bbb3SSatish Balay   PetscFunctionBegin;
141620e84f26SHong Zhang   ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr);
1417cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
141820e84f26SHong Zhang     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
14192d61bbb3SSatish Balay 
1420ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1421d0f46423SBarry Smith     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
14227adad957SLisandro Dalcin     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
142320e84f26SHong Zhang     ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr);
142420e84f26SHong Zhang 
142520e84f26SHong Zhang     at  = (Mat_SeqBAIJ*)C->data;
142620e84f26SHong Zhang     ati = at->i;
142720e84f26SHong Zhang     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1428fc4dec0aSBarry Smith   } else {
1429fc4dec0aSBarry Smith     C = *B;
143020e84f26SHong Zhang     at = (Mat_SeqBAIJ*)C->data;
143120e84f26SHong Zhang     ati = at->i;
1432fc4dec0aSBarry Smith   }
1433fc4dec0aSBarry Smith 
143420e84f26SHong Zhang   atj = at->j;
143520e84f26SHong Zhang   ata = at->a;
143620e84f26SHong Zhang 
143720e84f26SHong Zhang   /* Copy ati into atfill so we have locations of the next free space in atj */
1438580bdb30SBarry Smith   ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr);
143920e84f26SHong Zhang 
144020e84f26SHong Zhang   /* Walk through A row-wise and mark nonzero entries of A^T. */
14412d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
144220e84f26SHong Zhang     anzj = ai[i+1] - ai[i];
144320e84f26SHong Zhang     for (j=0; j<anzj; j++) {
144420e84f26SHong Zhang       atj[atfill[*aj]] = i;
144520e84f26SHong Zhang       for (kr=0; kr<bs; kr++) {
144620e84f26SHong Zhang         for (k=0; k<bs; k++) {
144720e84f26SHong Zhang           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
14482d61bbb3SSatish Balay         }
14492d61bbb3SSatish Balay       }
145020e84f26SHong Zhang       atfill[*aj++] += 1;
145120e84f26SHong Zhang     }
145220e84f26SHong Zhang   }
14532d61bbb3SSatish Balay   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
14542d61bbb3SSatish Balay   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
14552d61bbb3SSatish Balay 
145620e84f26SHong Zhang   /* Clean up temporary space and complete requests. */
145720e84f26SHong Zhang   ierr = PetscFree(atfill);CHKERRQ(ierr);
145820e84f26SHong Zhang 
1459cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
146020e84f26SHong Zhang     ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
14612d61bbb3SSatish Balay     *B = C;
14622d61bbb3SSatish Balay   } else {
146328be2f97SBarry Smith     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
14642d61bbb3SSatish Balay   }
14652d61bbb3SSatish Balay   PetscFunctionReturn(0);
14662d61bbb3SSatish Balay }
14672d61bbb3SSatish Balay 
1468453d3561SHong Zhang PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1469453d3561SHong Zhang {
1470453d3561SHong Zhang   PetscErrorCode ierr;
1471453d3561SHong Zhang   Mat            Btrans;
1472453d3561SHong Zhang 
1473453d3561SHong Zhang   PetscFunctionBegin;
1474453d3561SHong Zhang   *f   = PETSC_FALSE;
1475453d3561SHong Zhang   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1476453d3561SHong Zhang   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1477453d3561SHong Zhang   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1478453d3561SHong Zhang   PetscFunctionReturn(0);
1479453d3561SHong Zhang }
1480453d3561SHong Zhang 
1481618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1482b51a4376SLisandro Dalcin PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
14832593348eSBarry Smith {
1484b51a4376SLisandro Dalcin   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)mat->data;
1485b51a4376SLisandro Dalcin   PetscInt       header[4],M,N,m,bs,nz,cnt,i,j,k,l;
1486b51a4376SLisandro Dalcin   PetscInt       *rowlens,*colidxs;
1487b51a4376SLisandro Dalcin   PetscScalar    *matvals;
14886849ba73SBarry Smith   PetscErrorCode ierr;
14892593348eSBarry Smith 
14903a40ed3dSBarry Smith   PetscFunctionBegin;
1491b51a4376SLisandro Dalcin   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
14923b2fbd54SBarry Smith 
1493b51a4376SLisandro Dalcin   M  = mat->rmap->N;
1494b51a4376SLisandro Dalcin   N  = mat->cmap->N;
1495b51a4376SLisandro Dalcin   m  = mat->rmap->n;
1496b51a4376SLisandro Dalcin   bs = mat->rmap->bs;
1497b51a4376SLisandro Dalcin   nz = bs*bs*A->nz;
14982593348eSBarry Smith 
1499b51a4376SLisandro Dalcin   /* write matrix header */
1500b51a4376SLisandro Dalcin   header[0] = MAT_FILE_CLASSID;
1501b51a4376SLisandro Dalcin   header[1] = M; header[2] = N; header[3] = nz;
1502b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
15032593348eSBarry Smith 
1504b51a4376SLisandro Dalcin   /* store row lengths */
1505b51a4376SLisandro Dalcin   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1506b51a4376SLisandro Dalcin   for (cnt=0, i=0; i<A->mbs; i++)
1507b51a4376SLisandro Dalcin     for (j=0; j<bs; j++)
1508b51a4376SLisandro Dalcin       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]);
1509b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr);
1510b51a4376SLisandro Dalcin   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1511b51a4376SLisandro Dalcin 
1512b51a4376SLisandro Dalcin   /* store column indices  */
1513b51a4376SLisandro Dalcin   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1514b51a4376SLisandro Dalcin   for (cnt=0, i=0; i<A->mbs; i++)
1515b51a4376SLisandro Dalcin     for (k=0; k<bs; k++)
1516b51a4376SLisandro Dalcin       for (j=A->i[i]; j<A->i[i+1]; j++)
1517b51a4376SLisandro Dalcin         for (l=0; l<bs; l++)
1518b51a4376SLisandro Dalcin           colidxs[cnt++] = bs*A->j[j] + l;
15192c71b3e2SJacob Faibussowitsch   PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1520b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr);
1521b51a4376SLisandro Dalcin   ierr = PetscFree(colidxs);CHKERRQ(ierr);
15222593348eSBarry Smith 
15232593348eSBarry Smith   /* store nonzero values */
1524b51a4376SLisandro Dalcin   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1525b51a4376SLisandro Dalcin   for (cnt=0, i=0; i<A->mbs; i++)
1526b51a4376SLisandro Dalcin     for (k=0; k<bs; k++)
1527b51a4376SLisandro Dalcin       for (j=A->i[i]; j<A->i[i+1]; j++)
1528b51a4376SLisandro Dalcin         for (l=0; l<bs; l++)
1529b51a4376SLisandro Dalcin           matvals[cnt++] = A->a[bs*(bs*j + l) + k];
15302c71b3e2SJacob Faibussowitsch   PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1531b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1532b51a4376SLisandro Dalcin   ierr = PetscFree(matvals);CHKERRQ(ierr);
1533ce6f0cecSBarry Smith 
1534b51a4376SLisandro Dalcin   /* write block size option to the viewer's .info file */
1535b51a4376SLisandro Dalcin   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
15363a40ed3dSBarry Smith   PetscFunctionReturn(0);
15372593348eSBarry Smith }
15382593348eSBarry Smith 
15397dc0baabSHong Zhang static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
15407dc0baabSHong Zhang {
15417dc0baabSHong Zhang   PetscErrorCode ierr;
15427dc0baabSHong Zhang   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
15437dc0baabSHong Zhang   PetscInt       i,bs = A->rmap->bs,k;
15447dc0baabSHong Zhang 
15457dc0baabSHong Zhang   PetscFunctionBegin;
15467dc0baabSHong Zhang   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
15477dc0baabSHong Zhang   for (i=0; i<a->mbs; i++) {
1548c0aa6a63SJacob Faibussowitsch     ierr = PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT "-%" PetscInt_FMT ":",i*bs,i*bs+bs-1);CHKERRQ(ierr);
15497dc0baabSHong Zhang     for (k=a->i[i]; k<a->i[i+1]; k++) {
1550c0aa6a63SJacob Faibussowitsch       ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT "-%" PetscInt_FMT ") ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
15517dc0baabSHong Zhang     }
15527dc0baabSHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
15537dc0baabSHong Zhang   }
15547dc0baabSHong Zhang   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
15557dc0baabSHong Zhang   PetscFunctionReturn(0);
15567dc0baabSHong Zhang }
15577dc0baabSHong Zhang 
15586849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
15592593348eSBarry Smith {
1560b6490206SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1561dfbe8321SBarry Smith   PetscErrorCode    ierr;
1562d0f46423SBarry Smith   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1563f3ef73ceSBarry Smith   PetscViewerFormat format;
15642593348eSBarry Smith 
15653a40ed3dSBarry Smith   PetscFunctionBegin;
15667dc0baabSHong Zhang   if (A->structure_only) {
15677dc0baabSHong Zhang     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
15687dc0baabSHong Zhang     PetscFunctionReturn(0);
15697dc0baabSHong Zhang   }
15707dc0baabSHong Zhang 
1571b0a32e0cSBarry Smith   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1572456192e2SBarry Smith   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1573c0aa6a63SJacob Faibussowitsch     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %" PetscInt_FMT "\n",bs);CHKERRQ(ierr);
1574fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1575ade3a672SBarry Smith     const char *matname;
1576bcd9e38bSBarry Smith     Mat        aij;
1577ceb03754SKris Buschelman     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1578ade3a672SBarry Smith     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1579ade3a672SBarry Smith     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1580bcd9e38bSBarry Smith     ierr = MatView(aij,viewer);CHKERRQ(ierr);
15816bf464f9SBarry Smith     ierr = MatDestroy(&aij);CHKERRQ(ierr);
158204929863SHong Zhang   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
158304929863SHong Zhang       PetscFunctionReturn(0);
1584fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1585d00279f6SBarry Smith     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
158644cd7ae7SLois Curfman McInnes     for (i=0; i<a->mbs; i++) {
158744cd7ae7SLois Curfman McInnes       for (j=0; j<bs; j++) {
1588c0aa6a63SJacob Faibussowitsch         ierr = PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j);CHKERRQ(ierr);
158944cd7ae7SLois Curfman McInnes         for (k=a->i[i]; k<a->i[i+1]; k++) {
159044cd7ae7SLois Curfman McInnes           for (l=0; l<bs; l++) {
1591aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
15920e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1593c0aa6a63SJacob Faibussowitsch               ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %gi) ",bs*a->j[k]+l,
159457622a8eSBarry Smith                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
15950e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1596c0aa6a63SJacob Faibussowitsch               ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %gi) ",bs*a->j[k]+l,
159757622a8eSBarry Smith                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
15980e6d2581SBarry Smith             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1599c0aa6a63SJacob Faibussowitsch               ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
16000ef38995SBarry Smith             }
160144cd7ae7SLois Curfman McInnes #else
16020ef38995SBarry Smith             if (a->a[bs2*k + l*bs + j] != 0.0) {
1603c0aa6a63SJacob Faibussowitsch               ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
16040ef38995SBarry Smith             }
160544cd7ae7SLois Curfman McInnes #endif
160644cd7ae7SLois Curfman McInnes           }
160744cd7ae7SLois Curfman McInnes         }
1608b0a32e0cSBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
160944cd7ae7SLois Curfman McInnes       }
161044cd7ae7SLois Curfman McInnes     }
1611d00279f6SBarry Smith     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
16120ef38995SBarry Smith   } else {
1613d00279f6SBarry Smith     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1614b6490206SBarry Smith     for (i=0; i<a->mbs; i++) {
1615b6490206SBarry Smith       for (j=0; j<bs; j++) {
1616c0aa6a63SJacob Faibussowitsch         ierr = PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j);CHKERRQ(ierr);
1617b6490206SBarry Smith         for (k=a->i[i]; k<a->i[i+1]; k++) {
1618b6490206SBarry Smith           for (l=0; l<bs; l++) {
1619aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
16200e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1621c0aa6a63SJacob Faibussowitsch               ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i) ",bs*a->j[k]+l,
162257622a8eSBarry Smith                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
16230e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1624c0aa6a63SJacob Faibussowitsch               ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i) ",bs*a->j[k]+l,
162557622a8eSBarry Smith                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
16260ef38995SBarry Smith             } else {
1627c0aa6a63SJacob Faibussowitsch               ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
162888685aaeSLois Curfman McInnes             }
162988685aaeSLois Curfman McInnes #else
1630c0aa6a63SJacob Faibussowitsch             ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
163188685aaeSLois Curfman McInnes #endif
16322593348eSBarry Smith           }
16332593348eSBarry Smith         }
1634b0a32e0cSBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
16352593348eSBarry Smith       }
16362593348eSBarry Smith     }
1637d00279f6SBarry Smith     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1638b6490206SBarry Smith   }
1639b0a32e0cSBarry Smith   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
16403a40ed3dSBarry Smith   PetscFunctionReturn(0);
16412593348eSBarry Smith }
16422593348eSBarry Smith 
16439804daf3SBarry Smith #include <petscdraw.h>
16446849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
16453270192aSSatish Balay {
164677ed5343SBarry Smith   Mat               A = (Mat) Aa;
16473270192aSSatish Balay   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
16486849ba73SBarry Smith   PetscErrorCode    ierr;
1649d0f46423SBarry Smith   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
16500e6d2581SBarry Smith   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
16513f1db9ecSBarry Smith   MatScalar         *aa;
1652b0a32e0cSBarry Smith   PetscViewer       viewer;
1653b3e7f47fSJed Brown   PetscViewerFormat format;
16543270192aSSatish Balay 
16553a40ed3dSBarry Smith   PetscFunctionBegin;
165677ed5343SBarry Smith   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1657b3e7f47fSJed Brown   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1658b0a32e0cSBarry Smith   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
165977ed5343SBarry Smith 
16603270192aSSatish Balay   /* loop over matrix elements drawing boxes */
1661b3e7f47fSJed Brown 
1662b3e7f47fSJed Brown   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1663383922c3SLisandro Dalcin     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1664383922c3SLisandro Dalcin     /* Blue for negative, Cyan for zero and  Red for positive */
1665b0a32e0cSBarry Smith     color = PETSC_DRAW_BLUE;
16663270192aSSatish Balay     for (i=0,row=0; i<mbs; i++,row+=bs) {
16673270192aSSatish Balay       for (j=a->i[i]; j<a->i[i+1]; j++) {
1668d0f46423SBarry Smith         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
16693270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
16703270192aSSatish Balay         aa  = a->a + j*bs2;
16713270192aSSatish Balay         for (k=0; k<bs; k++) {
16723270192aSSatish Balay           for (l=0; l<bs; l++) {
16730e6d2581SBarry Smith             if (PetscRealPart(*aa++) >=  0.) continue;
1674b0a32e0cSBarry Smith             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
16753270192aSSatish Balay           }
16763270192aSSatish Balay         }
16773270192aSSatish Balay       }
16783270192aSSatish Balay     }
1679b0a32e0cSBarry Smith     color = PETSC_DRAW_CYAN;
16803270192aSSatish Balay     for (i=0,row=0; i<mbs; i++,row+=bs) {
16813270192aSSatish Balay       for (j=a->i[i]; j<a->i[i+1]; j++) {
1682d0f46423SBarry Smith         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
16833270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
16843270192aSSatish Balay         aa  = a->a + j*bs2;
16853270192aSSatish Balay         for (k=0; k<bs; k++) {
16863270192aSSatish Balay           for (l=0; l<bs; l++) {
16870e6d2581SBarry Smith             if (PetscRealPart(*aa++) != 0.) continue;
1688b0a32e0cSBarry Smith             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
16893270192aSSatish Balay           }
16903270192aSSatish Balay         }
16913270192aSSatish Balay       }
16923270192aSSatish Balay     }
1693b0a32e0cSBarry Smith     color = PETSC_DRAW_RED;
16943270192aSSatish Balay     for (i=0,row=0; i<mbs; i++,row+=bs) {
16953270192aSSatish Balay       for (j=a->i[i]; j<a->i[i+1]; j++) {
1696d0f46423SBarry Smith         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
16973270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
16983270192aSSatish Balay         aa  = a->a + j*bs2;
16993270192aSSatish Balay         for (k=0; k<bs; k++) {
17003270192aSSatish Balay           for (l=0; l<bs; l++) {
17010e6d2581SBarry Smith             if (PetscRealPart(*aa++) <= 0.) continue;
1702b0a32e0cSBarry Smith             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
17033270192aSSatish Balay           }
17043270192aSSatish Balay         }
17053270192aSSatish Balay       }
17063270192aSSatish Balay     }
1707383922c3SLisandro Dalcin     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1708b3e7f47fSJed Brown   } else {
1709b3e7f47fSJed Brown     /* use contour shading to indicate magnitude of values */
1710b3e7f47fSJed Brown     /* first determine max of all nonzero values */
1711b05fc000SLisandro Dalcin     PetscReal minv = 0.0, maxv = 0.0;
1712b3e7f47fSJed Brown     PetscDraw popup;
1713b3e7f47fSJed Brown 
1714b3e7f47fSJed Brown     for (i=0; i<a->nz*a->bs2; i++) {
1715b3e7f47fSJed Brown       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1716b3e7f47fSJed Brown     }
1717383922c3SLisandro Dalcin     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1718b3e7f47fSJed Brown     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
171945f3bb6eSLisandro Dalcin     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1720383922c3SLisandro Dalcin 
1721383922c3SLisandro Dalcin     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1722b3e7f47fSJed Brown     for (i=0,row=0; i<mbs; i++,row+=bs) {
1723b3e7f47fSJed Brown       for (j=a->i[i]; j<a->i[i+1]; j++) {
1724b3e7f47fSJed Brown         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1725b3e7f47fSJed Brown         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1726b3e7f47fSJed Brown         aa  = a->a + j*bs2;
1727b3e7f47fSJed Brown         for (k=0; k<bs; k++) {
1728b3e7f47fSJed Brown           for (l=0; l<bs; l++) {
1729383922c3SLisandro Dalcin             MatScalar v = *aa++;
1730383922c3SLisandro Dalcin             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1731b3e7f47fSJed Brown             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1732b3e7f47fSJed Brown           }
1733b3e7f47fSJed Brown         }
1734b3e7f47fSJed Brown       }
1735b3e7f47fSJed Brown     }
1736383922c3SLisandro Dalcin     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1737b3e7f47fSJed Brown   }
173877ed5343SBarry Smith   PetscFunctionReturn(0);
173977ed5343SBarry Smith }
17403270192aSSatish Balay 
17416849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
174277ed5343SBarry Smith {
1743dfbe8321SBarry Smith   PetscErrorCode ierr;
17440e6d2581SBarry Smith   PetscReal      xl,yl,xr,yr,w,h;
1745b0a32e0cSBarry Smith   PetscDraw      draw;
1746ace3abfcSBarry Smith   PetscBool      isnull;
17473270192aSSatish Balay 
174877ed5343SBarry Smith   PetscFunctionBegin;
1749b0a32e0cSBarry Smith   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
175045f3bb6eSLisandro Dalcin   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
175145f3bb6eSLisandro Dalcin   if (isnull) PetscFunctionReturn(0);
175277ed5343SBarry Smith 
1753d0f46423SBarry Smith   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
175477ed5343SBarry Smith   xr  += w;          yr += h;        xl = -w;     yl = -h;
1755b0a32e0cSBarry Smith   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1756832b7cebSLisandro Dalcin   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1757b0a32e0cSBarry Smith   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
17580298fd71SBarry Smith   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1759832b7cebSLisandro Dalcin   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
17603a40ed3dSBarry Smith   PetscFunctionReturn(0);
17613270192aSSatish Balay }
17623270192aSSatish Balay 
1763dfbe8321SBarry Smith PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
17642593348eSBarry Smith {
1765dfbe8321SBarry Smith   PetscErrorCode ierr;
1766ace3abfcSBarry Smith   PetscBool      iascii,isbinary,isdraw;
17672593348eSBarry Smith 
17683a40ed3dSBarry Smith   PetscFunctionBegin;
1769251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1770251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1771251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
177232077d6dSBarry Smith   if (iascii) {
17733a40ed3dSBarry Smith     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
17740f5bd95cSBarry Smith   } else if (isbinary) {
17753a40ed3dSBarry Smith     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
17760f5bd95cSBarry Smith   } else if (isdraw) {
17773a40ed3dSBarry Smith     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
17785cd90555SBarry Smith   } else {
1779a5e6ed63SBarry Smith     Mat B;
1780ceb03754SKris Buschelman     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1781a5e6ed63SBarry Smith     ierr = MatView(B,viewer);CHKERRQ(ierr);
17826bf464f9SBarry Smith     ierr = MatDestroy(&B);CHKERRQ(ierr);
17832593348eSBarry Smith   }
17843a40ed3dSBarry Smith   PetscFunctionReturn(0);
17852593348eSBarry Smith }
1786b6490206SBarry Smith 
1787c1ac3661SBarry Smith PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1788cd0e1443SSatish Balay {
1789cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1790c1ac3661SBarry Smith   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1791c1ac3661SBarry Smith   PetscInt    *ai = a->i,*ailen = a->ilen;
1792d0f46423SBarry Smith   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
179397e567efSBarry Smith   MatScalar   *ap,*aa = a->a;
1794cd0e1443SSatish Balay 
17953a40ed3dSBarry Smith   PetscFunctionBegin;
17962d61bbb3SSatish Balay   for (k=0; k<m; k++) { /* loop over rows */
1797cd0e1443SSatish Balay     row = im[k]; brow = row/bs;
179854c59aa7SJacob Faibussowitsch     if (row < 0) {v += n; continue;} /* negative row */
179954c59aa7SJacob Faibussowitsch     PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " too large", row);
18002d61bbb3SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
18012c3acbe9SBarry Smith     nrow = ailen[brow];
18022d61bbb3SSatish Balay     for (l=0; l<n; l++) { /* loop over columns */
180354c59aa7SJacob Faibussowitsch       if (in[l] < 0) {v++; continue;} /* negative column */
180454c59aa7SJacob Faibussowitsch       PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %" PetscInt_FMT " too large", in[l]);
18052d61bbb3SSatish Balay       col  = in[l];
18062d61bbb3SSatish Balay       bcol = col/bs;
18072d61bbb3SSatish Balay       cidx = col%bs;
18082d61bbb3SSatish Balay       ridx = row%bs;
18092d61bbb3SSatish Balay       high = nrow;
18102d61bbb3SSatish Balay       low  = 0; /* assume unsorted */
18112d61bbb3SSatish Balay       while (high-low > 5) {
1812cd0e1443SSatish Balay         t = (low+high)/2;
1813cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
1814cd0e1443SSatish Balay         else             low  = t;
1815cd0e1443SSatish Balay       }
1816cd0e1443SSatish Balay       for (i=low; i<high; i++) {
1817cd0e1443SSatish Balay         if (rp[i] > bcol) break;
1818cd0e1443SSatish Balay         if (rp[i] == bcol) {
18192d61bbb3SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
18202d61bbb3SSatish Balay           goto finished;
1821cd0e1443SSatish Balay         }
1822cd0e1443SSatish Balay       }
182397e567efSBarry Smith       *v++ = 0.0;
18242d61bbb3SSatish Balay finished:;
1825cd0e1443SSatish Balay     }
1826cd0e1443SSatish Balay   }
18273a40ed3dSBarry Smith   PetscFunctionReturn(0);
1828cd0e1443SSatish Balay }
1829cd0e1443SSatish Balay 
1830dd6ea824SBarry Smith PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
183192c4ed94SBarry Smith {
183292c4ed94SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1833e2ee6c50SBarry Smith   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1834c1ac3661SBarry Smith   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
18356849ba73SBarry Smith   PetscErrorCode    ierr;
1836d0f46423SBarry Smith   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1837ace3abfcSBarry Smith   PetscBool         roworiented=a->roworiented;
1838dd6ea824SBarry Smith   const PetscScalar *value     = v;
18399d243f67SHong Zhang   MatScalar         *ap=NULL,*aa = a->a,*bap;
184092c4ed94SBarry Smith 
18413a40ed3dSBarry Smith   PetscFunctionBegin;
18420e324ae4SSatish Balay   if (roworiented) {
18430e324ae4SSatish Balay     stepval = (n-1)*bs;
18440e324ae4SSatish Balay   } else {
18450e324ae4SSatish Balay     stepval = (m-1)*bs;
18460e324ae4SSatish Balay   }
184792c4ed94SBarry Smith   for (k=0; k<m; k++) { /* loop over added rows */
184892c4ed94SBarry Smith     row = im[k];
18495ef9f2a5SBarry Smith     if (row < 0) continue;
18506bdcaf15SBarry Smith     PetscCheck(row < a->mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT,row,a->mbs-1);
185192c4ed94SBarry Smith     rp   = aj + ai[row];
18527dc0baabSHong Zhang     if (!A->structure_only) ap = aa + bs2*ai[row];
185392c4ed94SBarry Smith     rmax = imax[row];
185492c4ed94SBarry Smith     nrow = ailen[row];
185592c4ed94SBarry Smith     low  = 0;
1856c71e6ed7SBarry Smith     high = nrow;
185792c4ed94SBarry Smith     for (l=0; l<n; l++) { /* loop over added columns */
18585ef9f2a5SBarry Smith       if (in[l] < 0) continue;
18596bdcaf15SBarry Smith       PetscCheck(in[l] < a->nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT,in[l],a->nbs-1);
186092c4ed94SBarry Smith       col = in[l];
18617dc0baabSHong Zhang       if (!A->structure_only) {
186292c4ed94SBarry Smith         if (roworiented) {
186353ef36baSBarry Smith           value = v + (k*(stepval+bs) + l)*bs;
18640e324ae4SSatish Balay         } else {
186553ef36baSBarry Smith           value = v + (l*(stepval+bs) + k)*bs;
186692c4ed94SBarry Smith         }
18677dc0baabSHong Zhang       }
186826fbe8dcSKarl Rupp       if (col <= lastcol) low = 0;
186926fbe8dcSKarl Rupp       else high = nrow;
1870e2ee6c50SBarry Smith       lastcol = col;
187192c4ed94SBarry Smith       while (high-low > 7) {
187292c4ed94SBarry Smith         t = (low+high)/2;
187392c4ed94SBarry Smith         if (rp[t] > col) high = t;
187492c4ed94SBarry Smith         else             low  = t;
187592c4ed94SBarry Smith       }
187692c4ed94SBarry Smith       for (i=low; i<high; i++) {
187792c4ed94SBarry Smith         if (rp[i] > col) break;
187892c4ed94SBarry Smith         if (rp[i] == col) {
18797dc0baabSHong Zhang           if (A->structure_only) goto noinsert2;
18808a84c255SSatish Balay           bap = ap +  bs2*i;
18810e324ae4SSatish Balay           if (roworiented) {
18828a84c255SSatish Balay             if (is == ADD_VALUES) {
1883dd9472c6SBarry Smith               for (ii=0; ii<bs; ii++,value+=stepval) {
1884dd9472c6SBarry Smith                 for (jj=ii; jj<bs2; jj+=bs) {
18858a84c255SSatish Balay                   bap[jj] += *value++;
1886dd9472c6SBarry Smith                 }
1887dd9472c6SBarry Smith               }
18880e324ae4SSatish Balay             } else {
1889dd9472c6SBarry Smith               for (ii=0; ii<bs; ii++,value+=stepval) {
1890dd9472c6SBarry Smith                 for (jj=ii; jj<bs2; jj+=bs) {
18910e324ae4SSatish Balay                   bap[jj] = *value++;
18928a84c255SSatish Balay                 }
1893dd9472c6SBarry Smith               }
1894dd9472c6SBarry Smith             }
18950e324ae4SSatish Balay           } else {
18960e324ae4SSatish Balay             if (is == ADD_VALUES) {
189753ef36baSBarry Smith               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1898dd9472c6SBarry Smith                 for (jj=0; jj<bs; jj++) {
189953ef36baSBarry Smith                   bap[jj] += value[jj];
1900dd9472c6SBarry Smith                 }
190153ef36baSBarry Smith                 bap += bs;
1902dd9472c6SBarry Smith               }
19030e324ae4SSatish Balay             } else {
190453ef36baSBarry Smith               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1905dd9472c6SBarry Smith                 for (jj=0; jj<bs; jj++) {
190653ef36baSBarry Smith                   bap[jj]  = value[jj];
19070e324ae4SSatish Balay                 }
190853ef36baSBarry Smith                 bap += bs;
19098a84c255SSatish Balay               }
1910dd9472c6SBarry Smith             }
1911dd9472c6SBarry Smith           }
1912f1241b54SBarry Smith           goto noinsert2;
191392c4ed94SBarry Smith         }
191492c4ed94SBarry Smith       }
191589280ab3SLois Curfman McInnes       if (nonew == 1) goto noinsert2;
19162c71b3e2SJacob Faibussowitsch       PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
19177dc0baabSHong Zhang       if (A->structure_only) {
19187dc0baabSHong Zhang         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
19197dc0baabSHong Zhang       } else {
1920fef13f97SBarry Smith         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
19217dc0baabSHong Zhang       }
1922c03d1d03SSatish Balay       N = nrow++ - 1; high++;
192392c4ed94SBarry Smith       /* shift up all the later entries in this row */
1924580bdb30SBarry Smith       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
192592c4ed94SBarry Smith       rp[i] = col;
19267dc0baabSHong Zhang       if (!A->structure_only) {
1927580bdb30SBarry Smith         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
19288a84c255SSatish Balay         bap   = ap +  bs2*i;
19290e324ae4SSatish Balay         if (roworiented) {
1930dd9472c6SBarry Smith           for (ii=0; ii<bs; ii++,value+=stepval) {
1931dd9472c6SBarry Smith             for (jj=ii; jj<bs2; jj+=bs) {
19320e324ae4SSatish Balay               bap[jj] = *value++;
1933dd9472c6SBarry Smith             }
1934dd9472c6SBarry Smith           }
19350e324ae4SSatish Balay         } else {
1936dd9472c6SBarry Smith           for (ii=0; ii<bs; ii++,value+=stepval) {
1937dd9472c6SBarry Smith             for (jj=0; jj<bs; jj++) {
19380e324ae4SSatish Balay               *bap++ = *value++;
19390e324ae4SSatish Balay             }
1940dd9472c6SBarry Smith           }
1941dd9472c6SBarry Smith         }
19427dc0baabSHong Zhang       }
1943f1241b54SBarry Smith noinsert2:;
194492c4ed94SBarry Smith       low = i;
194592c4ed94SBarry Smith     }
194692c4ed94SBarry Smith     ailen[row] = nrow;
194792c4ed94SBarry Smith   }
19483a40ed3dSBarry Smith   PetscFunctionReturn(0);
194992c4ed94SBarry Smith }
195026e093fcSHong Zhang 
1951dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1952584200bdSSatish Balay {
1953584200bdSSatish Balay   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1954580bdb30SBarry Smith   PetscInt       fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1955d0f46423SBarry Smith   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
19566849ba73SBarry Smith   PetscErrorCode ierr;
1957c1ac3661SBarry Smith   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
19583f1db9ecSBarry Smith   MatScalar      *aa  = a->a,*ap;
19593447b6efSHong Zhang   PetscReal      ratio=0.6;
1960584200bdSSatish Balay 
19613a40ed3dSBarry Smith   PetscFunctionBegin;
19623a40ed3dSBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1963584200bdSSatish Balay 
196443ee02c3SBarry Smith   if (m) rmax = ailen[0];
1965584200bdSSatish Balay   for (i=1; i<mbs; i++) {
1966584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
1967584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
1968d402145bSBarry Smith     rmax    = PetscMax(rmax,ailen[i]);
1969584200bdSSatish Balay     if (fshift) {
1970580bdb30SBarry Smith       ip = aj + ai[i];
1971580bdb30SBarry Smith       ap = aa + bs2*ai[i];
1972584200bdSSatish Balay       N  = ailen[i];
1973580bdb30SBarry Smith       ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr);
1974672ba085SHong Zhang       if (!A->structure_only) {
1975580bdb30SBarry Smith         ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr);
1976584200bdSSatish Balay       }
1977672ba085SHong Zhang     }
1978584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
1979584200bdSSatish Balay   }
1980584200bdSSatish Balay   if (mbs) {
1981584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
1982584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1983584200bdSSatish Balay   }
19847c565772SBarry Smith 
1985584200bdSSatish Balay   /* reset ilen and imax for each row */
19867c565772SBarry Smith   a->nonzerorowcnt = 0;
1987672ba085SHong Zhang   if (A->structure_only) {
1988672ba085SHong Zhang     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1989672ba085SHong Zhang   } else { /* !A->structure_only */
1990584200bdSSatish Balay     for (i=0; i<mbs; i++) {
1991584200bdSSatish Balay       ailen[i] = imax[i] = ai[i+1] - ai[i];
19927c565772SBarry Smith       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1993584200bdSSatish Balay     }
1994672ba085SHong Zhang   }
1995a7c10996SSatish Balay   a->nz = ai[mbs];
1996584200bdSSatish Balay 
1997584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
1998b01c7715SBarry Smith   a->idiagvalid = PETSC_FALSE;
1999584200bdSSatish Balay   if (fshift && a->diag) {
2000606d414cSSatish Balay     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
20013bb1ff40SBarry Smith     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2002f4259b30SLisandro Dalcin     a->diag = NULL;
2003584200bdSSatish Balay   }
20042c71b3e2SJacob Faibussowitsch   PetscCheckFalse(fshift && a->nounused == -1,PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
20057d3de750SJacob Faibussowitsch   ierr = PetscInfo(A,"Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
20067d3de750SJacob Faibussowitsch   ierr = PetscInfo(A,"Number of mallocs during MatSetValues is %" PetscInt_FMT "\n",a->reallocs);CHKERRQ(ierr);
20077d3de750SJacob Faibussowitsch   ierr = PetscInfo(A,"Most nonzeros blocks in any row is %" PetscInt_FMT "\n",rmax);CHKERRQ(ierr);
200826fbe8dcSKarl Rupp 
20098e58a170SBarry Smith   A->info.mallocs    += a->reallocs;
2010e2f3b5e9SSatish Balay   a->reallocs         = 0;
20110e6d2581SBarry Smith   A->info.nz_unneeded = (PetscReal)fshift*bs2;
2012647a6520SHong Zhang   a->rmax             = rmax;
2013cf4441caSHong Zhang 
2014672ba085SHong Zhang   if (!A->structure_only) {
201511e456e1SBarry Smith     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2016672ba085SHong Zhang   }
20173a40ed3dSBarry Smith   PetscFunctionReturn(0);
2018584200bdSSatish Balay }
2019584200bdSSatish Balay 
2020bea157c4SSatish Balay /*
2021bea157c4SSatish Balay    This function returns an array of flags which indicate the locations of contiguous
2022bea157c4SSatish Balay    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2023a5b23f4aSJose E. Roman    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2024bea157c4SSatish Balay    Assume: sizes should be long enough to hold all the values.
2025bea157c4SSatish Balay */
2026c1ac3661SBarry Smith static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2027d9b7c43dSSatish Balay {
2028c1ac3661SBarry Smith   PetscInt  i,j,k,row;
2029ace3abfcSBarry Smith   PetscBool flg;
20303a40ed3dSBarry Smith 
2031433994e6SBarry Smith   PetscFunctionBegin;
2032bea157c4SSatish Balay   for (i=0,j=0; i<n; j++) {
2033bea157c4SSatish Balay     row = idx[i];
2034a5b23f4aSJose E. Roman     if (row%bs!=0) { /* Not the beginning of a block */
2035bea157c4SSatish Balay       sizes[j] = 1;
2036bea157c4SSatish Balay       i++;
2037e4fda26cSSatish Balay     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2038bea157c4SSatish Balay       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2039bea157c4SSatish Balay       i++;
2040bea157c4SSatish Balay     } else { /* Begining of the block, so check if the complete block exists */
2041bea157c4SSatish Balay       flg = PETSC_TRUE;
2042bea157c4SSatish Balay       for (k=1; k<bs; k++) {
2043bea157c4SSatish Balay         if (row+k != idx[i+k]) { /* break in the block */
2044bea157c4SSatish Balay           flg = PETSC_FALSE;
2045bea157c4SSatish Balay           break;
2046d9b7c43dSSatish Balay         }
2047bea157c4SSatish Balay       }
2048abc0a331SBarry Smith       if (flg) { /* No break in the bs */
2049bea157c4SSatish Balay         sizes[j] = bs;
2050bea157c4SSatish Balay         i       += bs;
2051bea157c4SSatish Balay       } else {
2052bea157c4SSatish Balay         sizes[j] = 1;
2053bea157c4SSatish Balay         i++;
2054bea157c4SSatish Balay       }
2055bea157c4SSatish Balay     }
2056bea157c4SSatish Balay   }
2057bea157c4SSatish Balay   *bs_max = j;
20583a40ed3dSBarry Smith   PetscFunctionReturn(0);
2059d9b7c43dSSatish Balay }
2060d9b7c43dSSatish Balay 
20612b40b63fSBarry Smith PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2062d9b7c43dSSatish Balay {
2063d9b7c43dSSatish Balay   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2064dfbe8321SBarry Smith   PetscErrorCode    ierr;
2065f4df32b1SMatthew Knepley   PetscInt          i,j,k,count,*rows;
2066d0f46423SBarry Smith   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
206787828ca2SBarry Smith   PetscScalar       zero = 0.0;
20683f1db9ecSBarry Smith   MatScalar         *aa;
206997b48c8fSBarry Smith   const PetscScalar *xx;
207097b48c8fSBarry Smith   PetscScalar       *bb;
2071d9b7c43dSSatish Balay 
20723a40ed3dSBarry Smith   PetscFunctionBegin;
207397b48c8fSBarry Smith   /* fix right hand side if needed */
207497b48c8fSBarry Smith   if (x && b) {
207597b48c8fSBarry Smith     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
207697b48c8fSBarry Smith     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
207797b48c8fSBarry Smith     for (i=0; i<is_n; i++) {
207897b48c8fSBarry Smith       bb[is_idx[i]] = diag*xx[is_idx[i]];
207997b48c8fSBarry Smith     }
208097b48c8fSBarry Smith     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
208197b48c8fSBarry Smith     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
208297b48c8fSBarry Smith   }
208397b48c8fSBarry Smith 
2084d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
2085bea157c4SSatish Balay   /* allocate memory for rows,sizes */
2086dcca6d9dSJed Brown   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2087bea157c4SSatish Balay 
2088563b5814SBarry Smith   /* copy IS values to rows, and sort them */
208926fbe8dcSKarl Rupp   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2090bea157c4SSatish Balay   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
209197b48c8fSBarry Smith 
2092a9817697SBarry Smith   if (baij->keepnonzeropattern) {
209326fbe8dcSKarl Rupp     for (i=0; i<is_n; i++) sizes[i] = 1;
2094dffd3267SBarry Smith     bs_max          = is_n;
2095dffd3267SBarry Smith   } else {
2096bea157c4SSatish Balay     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2097e56f5c9eSBarry Smith     A->nonzerostate++;
2098dffd3267SBarry Smith   }
2099bea157c4SSatish Balay 
2100bea157c4SSatish Balay   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2101bea157c4SSatish Balay     row = rows[j];
21022c71b3e2SJacob Faibussowitsch     PetscCheckFalse(row < 0 || row > A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %" PetscInt_FMT " out of range",row);
2103bea157c4SSatish Balay     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2104b31fbe3bSSatish Balay     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2105a9817697SBarry Smith     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2106d4a378daSJed Brown       if (diag != (PetscScalar)0.0) {
2107bea157c4SSatish Balay         if (baij->ilen[row/bs] > 0) {
2108bea157c4SSatish Balay           baij->ilen[row/bs]       = 1;
2109bea157c4SSatish Balay           baij->j[baij->i[row/bs]] = row/bs;
211026fbe8dcSKarl Rupp 
2111580bdb30SBarry Smith           ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr);
2112a07cd24cSSatish Balay         }
2113563b5814SBarry Smith         /* Now insert all the diagonal values for this bs */
2114bea157c4SSatish Balay         for (k=0; k<bs; k++) {
2115f4df32b1SMatthew Knepley           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2116bea157c4SSatish Balay         }
2117f4df32b1SMatthew Knepley       } else { /* (diag == 0.0) */
2118bea157c4SSatish Balay         baij->ilen[row/bs] = 0;
2119f4df32b1SMatthew Knepley       } /* end (diag == 0.0) */
2120bea157c4SSatish Balay     } else { /* (sizes[i] != bs) */
21216bdcaf15SBarry Smith       PetscAssert(sizes[i] == 1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2122bea157c4SSatish Balay       for (k=0; k<count; k++) {
2123d9b7c43dSSatish Balay         aa[0] =  zero;
2124d9b7c43dSSatish Balay         aa   += bs;
2125d9b7c43dSSatish Balay       }
2126d4a378daSJed Brown       if (diag != (PetscScalar)0.0) {
2127f4df32b1SMatthew Knepley         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2128d9b7c43dSSatish Balay       }
2129d9b7c43dSSatish Balay     }
2130bea157c4SSatish Balay   }
2131bea157c4SSatish Balay 
2132fca92195SBarry Smith   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
21339a8dea36SBarry Smith   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
21343a40ed3dSBarry Smith   PetscFunctionReturn(0);
2135d9b7c43dSSatish Balay }
21361c351548SSatish Balay 
213797b48c8fSBarry Smith PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
213897b48c8fSBarry Smith {
213997b48c8fSBarry Smith   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
214097b48c8fSBarry Smith   PetscErrorCode    ierr;
214197b48c8fSBarry Smith   PetscInt          i,j,k,count;
214297b48c8fSBarry Smith   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
214397b48c8fSBarry Smith   PetscScalar       zero = 0.0;
214497b48c8fSBarry Smith   MatScalar         *aa;
214597b48c8fSBarry Smith   const PetscScalar *xx;
214697b48c8fSBarry Smith   PetscScalar       *bb;
214756777dd2SBarry Smith   PetscBool         *zeroed,vecs = PETSC_FALSE;
214897b48c8fSBarry Smith 
214997b48c8fSBarry Smith   PetscFunctionBegin;
215097b48c8fSBarry Smith   /* fix right hand side if needed */
215197b48c8fSBarry Smith   if (x && b) {
215297b48c8fSBarry Smith     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
215397b48c8fSBarry Smith     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
215456777dd2SBarry Smith     vecs = PETSC_TRUE;
215597b48c8fSBarry Smith   }
215697b48c8fSBarry Smith 
215797b48c8fSBarry Smith   /* zero the columns */
21581795a4d1SJed Brown   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
215997b48c8fSBarry Smith   for (i=0; i<is_n; i++) {
21602c71b3e2SJacob Faibussowitsch     PetscCheckFalse(is_idx[i] < 0 || is_idx[i] >= A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %" PetscInt_FMT " out of range",is_idx[i]);
216197b48c8fSBarry Smith     zeroed[is_idx[i]] = PETSC_TRUE;
216297b48c8fSBarry Smith   }
216397b48c8fSBarry Smith   for (i=0; i<A->rmap->N; i++) {
216497b48c8fSBarry Smith     if (!zeroed[i]) {
216597b48c8fSBarry Smith       row = i/bs;
216697b48c8fSBarry Smith       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
216797b48c8fSBarry Smith         for (k=0; k<bs; k++) {
216897b48c8fSBarry Smith           col = bs*baij->j[j] + k;
216997b48c8fSBarry Smith           if (zeroed[col]) {
217097b48c8fSBarry Smith             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
217156777dd2SBarry Smith             if (vecs) bb[i] -= aa[0]*xx[col];
217297b48c8fSBarry Smith             aa[0] = 0.0;
217397b48c8fSBarry Smith           }
217497b48c8fSBarry Smith         }
217597b48c8fSBarry Smith       }
217656777dd2SBarry Smith     } else if (vecs) bb[i] = diag*xx[i];
217797b48c8fSBarry Smith   }
217897b48c8fSBarry Smith   ierr = PetscFree(zeroed);CHKERRQ(ierr);
217956777dd2SBarry Smith   if (vecs) {
218056777dd2SBarry Smith     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
218156777dd2SBarry Smith     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
218256777dd2SBarry Smith   }
218397b48c8fSBarry Smith 
218497b48c8fSBarry Smith   /* zero the rows */
218597b48c8fSBarry Smith   for (i=0; i<is_n; i++) {
218697b48c8fSBarry Smith     row   = is_idx[i];
218797b48c8fSBarry Smith     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
218897b48c8fSBarry Smith     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
218997b48c8fSBarry Smith     for (k=0; k<count; k++) {
219097b48c8fSBarry Smith       aa[0] =  zero;
219197b48c8fSBarry Smith       aa   += bs;
219297b48c8fSBarry Smith     }
2193d4a378daSJed Brown     if (diag != (PetscScalar)0.0) {
219497b48c8fSBarry Smith       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
219597b48c8fSBarry Smith     }
219697b48c8fSBarry Smith   }
219797b48c8fSBarry Smith   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
219897b48c8fSBarry Smith   PetscFunctionReturn(0);
219997b48c8fSBarry Smith }
220097b48c8fSBarry Smith 
2201c1ac3661SBarry Smith PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
22022d61bbb3SSatish Balay {
22032d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2204e2ee6c50SBarry Smith   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2205c1ac3661SBarry Smith   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2206d0f46423SBarry Smith   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
22076849ba73SBarry Smith   PetscErrorCode ierr;
2208c1ac3661SBarry Smith   PetscInt       ridx,cidx,bs2=a->bs2;
2209ace3abfcSBarry Smith   PetscBool      roworiented=a->roworiented;
2210d8cdefa3SHong Zhang   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
22112d61bbb3SSatish Balay 
22122d61bbb3SSatish Balay   PetscFunctionBegin;
22132d61bbb3SSatish Balay   for (k=0; k<m; k++) { /* loop over added rows */
2214085a36d4SBarry Smith     row  = im[k];
2215085a36d4SBarry Smith     brow = row/bs;
22165ef9f2a5SBarry Smith     if (row < 0) continue;
22176bdcaf15SBarry Smith     PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,row,A->rmap->N-1);
22182d61bbb3SSatish Balay     rp   = aj + ai[brow];
2219672ba085SHong Zhang     if (!A->structure_only) ap = aa + bs2*ai[brow];
22202d61bbb3SSatish Balay     rmax = imax[brow];
22212d61bbb3SSatish Balay     nrow = ailen[brow];
22222d61bbb3SSatish Balay     low  = 0;
2223c71e6ed7SBarry Smith     high = nrow;
22242d61bbb3SSatish Balay     for (l=0; l<n; l++) { /* loop over added columns */
22255ef9f2a5SBarry Smith       if (in[l] < 0) continue;
22266bdcaf15SBarry Smith       PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[l],A->cmap->n-1);
22272d61bbb3SSatish Balay       col  = in[l]; bcol = col/bs;
22282d61bbb3SSatish Balay       ridx = row % bs; cidx = col % bs;
2229672ba085SHong Zhang       if (!A->structure_only) {
22302d61bbb3SSatish Balay         if (roworiented) {
22315ef9f2a5SBarry Smith           value = v[l + k*n];
22322d61bbb3SSatish Balay         } else {
22332d61bbb3SSatish Balay           value = v[k + l*m];
22342d61bbb3SSatish Balay         }
2235672ba085SHong Zhang       }
22367cd84e04SBarry Smith       if (col <= lastcol) low = 0; else high = nrow;
2237e2ee6c50SBarry Smith       lastcol = col;
22382d61bbb3SSatish Balay       while (high-low > 7) {
22392d61bbb3SSatish Balay         t = (low+high)/2;
22402d61bbb3SSatish Balay         if (rp[t] > bcol) high = t;
22412d61bbb3SSatish Balay         else              low  = t;
22422d61bbb3SSatish Balay       }
22432d61bbb3SSatish Balay       for (i=low; i<high; i++) {
22442d61bbb3SSatish Balay         if (rp[i] > bcol) break;
22452d61bbb3SSatish Balay         if (rp[i] == bcol) {
22462d61bbb3SSatish Balay           bap = ap +  bs2*i + bs*cidx + ridx;
2247672ba085SHong Zhang           if (!A->structure_only) {
22482d61bbb3SSatish Balay             if (is == ADD_VALUES) *bap += value;
22492d61bbb3SSatish Balay             else                  *bap  = value;
2250672ba085SHong Zhang           }
22512d61bbb3SSatish Balay           goto noinsert1;
22522d61bbb3SSatish Balay         }
22532d61bbb3SSatish Balay       }
22542d61bbb3SSatish Balay       if (nonew == 1) goto noinsert1;
22552c71b3e2SJacob Faibussowitsch       PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2256672ba085SHong Zhang       if (A->structure_only) {
2257672ba085SHong Zhang         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2258672ba085SHong Zhang       } else {
2259fef13f97SBarry Smith         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2260672ba085SHong Zhang       }
2261c03d1d03SSatish Balay       N = nrow++ - 1; high++;
22622d61bbb3SSatish Balay       /* shift up all the later entries in this row */
2263580bdb30SBarry Smith       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
22642d61bbb3SSatish Balay       rp[i] = bcol;
2265580bdb30SBarry Smith       if (!A->structure_only) {
2266580bdb30SBarry Smith         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
2267580bdb30SBarry Smith         ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr);
2268580bdb30SBarry Smith         ap[bs2*i + bs*cidx + ridx] = value;
2269580bdb30SBarry Smith       }
2270085a36d4SBarry Smith       a->nz++;
2271e56f5c9eSBarry Smith       A->nonzerostate++;
22722d61bbb3SSatish Balay noinsert1:;
22732d61bbb3SSatish Balay       low = i;
22742d61bbb3SSatish Balay     }
22752d61bbb3SSatish Balay     ailen[brow] = nrow;
22762d61bbb3SSatish Balay   }
22772d61bbb3SSatish Balay   PetscFunctionReturn(0);
22782d61bbb3SSatish Balay }
22792d61bbb3SSatish Balay 
22800481f469SBarry Smith PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
22812d61bbb3SSatish Balay {
22822d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
22832d61bbb3SSatish Balay   Mat            outA;
2284dfbe8321SBarry Smith   PetscErrorCode ierr;
2285ace3abfcSBarry Smith   PetscBool      row_identity,col_identity;
22862d61bbb3SSatish Balay 
22872d61bbb3SSatish Balay   PetscFunctionBegin;
22882c71b3e2SJacob Faibussowitsch   PetscCheckFalse(info->levels != 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2289667159a5SBarry Smith   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2290667159a5SBarry Smith   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
22912c71b3e2SJacob Faibussowitsch   PetscCheckFalse(!row_identity || !col_identity,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
22922d61bbb3SSatish Balay 
22932d61bbb3SSatish Balay   outA            = inA;
2294d5f3da31SBarry Smith   inA->factortype = MAT_FACTOR_LU;
2295f6224b95SHong Zhang   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2296f6224b95SHong Zhang   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
22972d61bbb3SSatish Balay 
2298c4992f7dSBarry Smith   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2299cf242676SKris Buschelman 
2300c38d4ed2SBarry Smith   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
23016bf464f9SBarry Smith   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2302c3122656SLisandro Dalcin   a->row = row;
2303c38d4ed2SBarry Smith   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
23046bf464f9SBarry Smith   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2305c3122656SLisandro Dalcin   a->col = col;
2306c38d4ed2SBarry Smith 
2307c38d4ed2SBarry Smith   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
23086bf464f9SBarry Smith   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
23094c49b128SBarry Smith   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
23103bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2311c38d4ed2SBarry Smith 
2312ace3abfcSBarry Smith   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2313c38d4ed2SBarry Smith   if (!a->solve_work) {
2314854ce69bSBarry Smith     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
23153bb1ff40SBarry Smith     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2316c38d4ed2SBarry Smith   }
2317719d5645SBarry Smith   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
23182d61bbb3SSatish Balay   PetscFunctionReturn(0);
23192d61bbb3SSatish Balay }
2320d9b7c43dSSatish Balay 
23217087cfbeSBarry Smith PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
232227a8da17SBarry Smith {
232327a8da17SBarry Smith   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2324bdb1c0e1SJed Brown   PetscInt    i,nz,mbs;
232527a8da17SBarry Smith 
232627a8da17SBarry Smith   PetscFunctionBegin;
2327b32cb4a7SJed Brown   nz  = baij->maxnz;
2328bdb1c0e1SJed Brown   mbs = baij->mbs;
232927a8da17SBarry Smith   for (i=0; i<nz; i++) {
233027a8da17SBarry Smith     baij->j[i] = indices[i];
233127a8da17SBarry Smith   }
233227a8da17SBarry Smith   baij->nz = nz;
2333bdb1c0e1SJed Brown   for (i=0; i<mbs; i++) {
233427a8da17SBarry Smith     baij->ilen[i] = baij->imax[i];
233527a8da17SBarry Smith   }
233627a8da17SBarry Smith   PetscFunctionReturn(0);
233727a8da17SBarry Smith }
233827a8da17SBarry Smith 
233927a8da17SBarry Smith /*@
234027a8da17SBarry Smith     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
234127a8da17SBarry Smith        in the matrix.
234227a8da17SBarry Smith 
234327a8da17SBarry Smith   Input Parameters:
234427a8da17SBarry Smith +  mat - the SeqBAIJ matrix
234527a8da17SBarry Smith -  indices - the column indices
234627a8da17SBarry Smith 
234715091d37SBarry Smith   Level: advanced
234815091d37SBarry Smith 
234927a8da17SBarry Smith   Notes:
235027a8da17SBarry Smith     This can be called if you have precomputed the nonzero structure of the
235127a8da17SBarry Smith   matrix and want to provide it to the matrix object to improve the performance
235227a8da17SBarry Smith   of the MatSetValues() operation.
235327a8da17SBarry Smith 
235427a8da17SBarry Smith     You MUST have set the correct numbers of nonzeros per row in the call to
2355d1be2dadSMatthew Knepley   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
235627a8da17SBarry Smith 
235727a8da17SBarry Smith     MUST be called before any calls to MatSetValues();
235827a8da17SBarry Smith 
235927a8da17SBarry Smith @*/
23607087cfbeSBarry Smith PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
236127a8da17SBarry Smith {
23624ac538c5SBarry Smith   PetscErrorCode ierr;
236327a8da17SBarry Smith 
236427a8da17SBarry Smith   PetscFunctionBegin;
23650700a824SBarry Smith   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
23664482741eSBarry Smith   PetscValidPointer(indices,2);
23674ac538c5SBarry Smith   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
236827a8da17SBarry Smith   PetscFunctionReturn(0);
236927a8da17SBarry Smith }
237027a8da17SBarry Smith 
2371985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2372273d9f13SBarry Smith {
2373273d9f13SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2374dfbe8321SBarry Smith   PetscErrorCode ierr;
2375c1ac3661SBarry Smith   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2376273d9f13SBarry Smith   PetscReal      atmp;
237787828ca2SBarry Smith   PetscScalar    *x,zero = 0.0;
2378273d9f13SBarry Smith   MatScalar      *aa;
2379c1ac3661SBarry Smith   PetscInt       ncols,brow,krow,kcol;
2380273d9f13SBarry Smith 
2381273d9f13SBarry Smith   PetscFunctionBegin;
23822c71b3e2SJacob Faibussowitsch   PetscCheckFalse(A->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2383d0f46423SBarry Smith   bs  = A->rmap->bs;
2384273d9f13SBarry Smith   aa  = a->a;
2385273d9f13SBarry Smith   ai  = a->i;
2386273d9f13SBarry Smith   aj  = a->j;
2387273d9f13SBarry Smith   mbs = a->mbs;
2388273d9f13SBarry Smith 
23892dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
23901ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2391273d9f13SBarry Smith   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
23922c71b3e2SJacob Faibussowitsch   PetscCheckFalse(n != A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2393273d9f13SBarry Smith   for (i=0; i<mbs; i++) {
2394273d9f13SBarry Smith     ncols = ai[1] - ai[0]; ai++;
2395273d9f13SBarry Smith     brow  = bs*i;
2396273d9f13SBarry Smith     for (j=0; j<ncols; j++) {
2397273d9f13SBarry Smith       for (kcol=0; kcol<bs; kcol++) {
2398273d9f13SBarry Smith         for (krow=0; krow<bs; krow++) {
2399273d9f13SBarry Smith           atmp = PetscAbsScalar(*aa);aa++;
2400273d9f13SBarry Smith           row  = brow + krow;   /* row index */
2401985db425SBarry Smith           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2402273d9f13SBarry Smith         }
2403273d9f13SBarry Smith       }
2404273d9f13SBarry Smith       aj++;
2405273d9f13SBarry Smith     }
2406273d9f13SBarry Smith   }
24071ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2408273d9f13SBarry Smith   PetscFunctionReturn(0);
2409273d9f13SBarry Smith }
2410273d9f13SBarry Smith 
24113c896bc6SHong Zhang PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
24123c896bc6SHong Zhang {
24133c896bc6SHong Zhang   PetscErrorCode ierr;
24143c896bc6SHong Zhang 
24153c896bc6SHong Zhang   PetscFunctionBegin;
24163c896bc6SHong Zhang   /* If the two matrices have the same copy implementation, use fast copy. */
24173c896bc6SHong Zhang   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
24183c896bc6SHong Zhang     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
24193c896bc6SHong Zhang     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2420d88c0aacSHong Zhang     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
24213c896bc6SHong Zhang 
24222c71b3e2SJacob Faibussowitsch     PetscCheckFalse(a->i[ambs] != b->i[bmbs],PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different",a->i[ambs],b->i[bmbs]);
24232c71b3e2SJacob Faibussowitsch     PetscCheckFalse(abs != bbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different",abs,bbs);
2424580bdb30SBarry Smith     ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr);
2425cdc753b6SBarry Smith     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
24263c896bc6SHong Zhang   } else {
24273c896bc6SHong Zhang     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
24283c896bc6SHong Zhang   }
24293c896bc6SHong Zhang   PetscFunctionReturn(0);
24303c896bc6SHong Zhang }
24313c896bc6SHong Zhang 
24324994cf47SJed Brown PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2433273d9f13SBarry Smith {
2434dfbe8321SBarry Smith   PetscErrorCode ierr;
2435273d9f13SBarry Smith 
2436273d9f13SBarry Smith   PetscFunctionBegin;
2437f4259b30SLisandro Dalcin   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2438273d9f13SBarry Smith   PetscFunctionReturn(0);
2439273d9f13SBarry Smith }
2440273d9f13SBarry Smith 
2441cda14afcSprj- static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2442f2a5309cSSatish Balay {
2443f2a5309cSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
24446e111a19SKarl Rupp 
2445f2a5309cSSatish Balay   PetscFunctionBegin;
2446f2a5309cSSatish Balay   *array = a->a;
2447f2a5309cSSatish Balay   PetscFunctionReturn(0);
2448f2a5309cSSatish Balay }
2449f2a5309cSSatish Balay 
2450cda14afcSprj- static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2451f2a5309cSSatish Balay {
2452f2a5309cSSatish Balay   PetscFunctionBegin;
2453cda14afcSprj-   *array = NULL;
2454f2a5309cSSatish Balay   PetscFunctionReturn(0);
2455f2a5309cSSatish Balay }
2456f2a5309cSSatish Balay 
245752768537SHong Zhang PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
245852768537SHong Zhang {
2459b264fe52SHong Zhang   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
246052768537SHong Zhang   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
246152768537SHong Zhang   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2462b264fe52SHong Zhang   PetscErrorCode ierr;
246352768537SHong Zhang 
246452768537SHong Zhang   PetscFunctionBegin;
246552768537SHong Zhang   /* Set the number of nonzeros in the new matrix */
2466b264fe52SHong Zhang   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
246752768537SHong Zhang   PetscFunctionReturn(0);
246852768537SHong Zhang }
246952768537SHong Zhang 
2470f4df32b1SMatthew Knepley PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
247142ee4b1aSHong Zhang {
247242ee4b1aSHong Zhang   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2473dfbe8321SBarry Smith   PetscErrorCode ierr;
247431ce2d13SHong Zhang   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2475e838b9e7SJed Brown   PetscBLASInt   one=1;
247642ee4b1aSHong Zhang 
247742ee4b1aSHong Zhang   PetscFunctionBegin;
2478134adf20SPierre Jolivet   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2479134adf20SPierre Jolivet     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2480134adf20SPierre Jolivet     if (e) {
2481134adf20SPierre Jolivet       ierr = PetscArraycmp(x->i,y->i,x->mbs+1,&e);CHKERRQ(ierr);
2482134adf20SPierre Jolivet       if (e) {
2483134adf20SPierre Jolivet         ierr = PetscArraycmp(x->j,y->j,x->i[x->mbs],&e);CHKERRQ(ierr);
2484134adf20SPierre Jolivet         if (e) str = SAME_NONZERO_PATTERN;
2485134adf20SPierre Jolivet       }
2486134adf20SPierre Jolivet     }
248754c59aa7SJacob Faibussowitsch     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"MatStructure is not SAME_NONZERO_PATTERN");
2488134adf20SPierre Jolivet   }
248942ee4b1aSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
2490f4df32b1SMatthew Knepley     PetscScalar  alpha = a;
2491c5df96a5SBarry Smith     PetscBLASInt bnz;
2492c5df96a5SBarry Smith     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
24938b83055fSJed Brown     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2494a3fa217bSJose E. Roman     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2495ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2496ab784542SHong Zhang     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
249742ee4b1aSHong Zhang   } else {
249852768537SHong Zhang     Mat      B;
249952768537SHong Zhang     PetscInt *nnz;
250054c59aa7SJacob Faibussowitsch     PetscCheck(bs == X->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
250152768537SHong Zhang     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
250252768537SHong Zhang     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
250352768537SHong Zhang     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
250452768537SHong Zhang     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
250552768537SHong Zhang     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
250652768537SHong Zhang     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
250752768537SHong Zhang     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
250852768537SHong Zhang     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
250952768537SHong Zhang     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
251079c2fd05SStefano Zampini     ierr = MatHeaderMerge(Y,&B);CHKERRQ(ierr);
251152768537SHong Zhang     ierr = PetscFree(nnz);CHKERRQ(ierr);
251242ee4b1aSHong Zhang   }
251342ee4b1aSHong Zhang   PetscFunctionReturn(0);
251442ee4b1aSHong Zhang }
251542ee4b1aSHong Zhang 
25162726fb6dSPierre Jolivet PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
25172726fb6dSPierre Jolivet {
25182726fb6dSPierre Jolivet #if defined(PETSC_USE_COMPLEX)
25192726fb6dSPierre Jolivet   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
25202726fb6dSPierre Jolivet   PetscInt    i,nz = a->bs2*a->i[a->mbs];
25212726fb6dSPierre Jolivet   MatScalar   *aa = a->a;
25222726fb6dSPierre Jolivet 
25232726fb6dSPierre Jolivet   PetscFunctionBegin;
25242726fb6dSPierre Jolivet   for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]);
25252726fb6dSPierre Jolivet #else
25262726fb6dSPierre Jolivet   PetscFunctionBegin;
25272726fb6dSPierre Jolivet #endif
25282726fb6dSPierre Jolivet   PetscFunctionReturn(0);
25292726fb6dSPierre Jolivet }
25302726fb6dSPierre Jolivet 
253199cafbc1SBarry Smith PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
253299cafbc1SBarry Smith {
253399cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
253499cafbc1SBarry Smith   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2535dd6ea824SBarry Smith   MatScalar   *aa = a->a;
253699cafbc1SBarry Smith 
253799cafbc1SBarry Smith   PetscFunctionBegin;
253899cafbc1SBarry Smith   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
253999cafbc1SBarry Smith   PetscFunctionReturn(0);
254099cafbc1SBarry Smith }
254199cafbc1SBarry Smith 
254299cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
254399cafbc1SBarry Smith {
254499cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
254599cafbc1SBarry Smith   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2546dd6ea824SBarry Smith   MatScalar   *aa = a->a;
254799cafbc1SBarry Smith 
254899cafbc1SBarry Smith   PetscFunctionBegin;
254999cafbc1SBarry Smith   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
255099cafbc1SBarry Smith   PetscFunctionReturn(0);
255199cafbc1SBarry Smith }
255299cafbc1SBarry Smith 
25533acb8795SBarry Smith /*
25542479783cSJose E. Roman     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
25553acb8795SBarry Smith */
25561a83f524SJed Brown PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
25573acb8795SBarry Smith {
25583acb8795SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
25593acb8795SBarry Smith   PetscErrorCode ierr;
25603acb8795SBarry Smith   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
25613acb8795SBarry Smith   PetscInt       nz = a->i[m],row,*jj,mr,col;
25623acb8795SBarry Smith 
25633acb8795SBarry Smith   PetscFunctionBegin;
25643acb8795SBarry Smith   *nn = n;
25653acb8795SBarry Smith   if (!ia) PetscFunctionReturn(0);
25662c71b3e2SJacob Faibussowitsch   PetscCheckFalse(symmetric,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2567e7e72b3dSBarry Smith   else {
2568b9e7e5c1SBarry Smith     ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2569854ce69bSBarry Smith     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2570b9e7e5c1SBarry Smith     ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
25713acb8795SBarry Smith     jj   = a->j;
25723acb8795SBarry Smith     for (i=0; i<nz; i++) {
25733acb8795SBarry Smith       collengths[jj[i]]++;
25743acb8795SBarry Smith     }
25753acb8795SBarry Smith     cia[0] = oshift;
25763acb8795SBarry Smith     for (i=0; i<n; i++) {
25773acb8795SBarry Smith       cia[i+1] = cia[i] + collengths[i];
25783acb8795SBarry Smith     }
2579580bdb30SBarry Smith     ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
25803acb8795SBarry Smith     jj   = a->j;
25813acb8795SBarry Smith     for (row=0; row<m; row++) {
25823acb8795SBarry Smith       mr = a->i[row+1] - a->i[row];
25833acb8795SBarry Smith       for (i=0; i<mr; i++) {
25843acb8795SBarry Smith         col = *jj++;
258526fbe8dcSKarl Rupp 
25863acb8795SBarry Smith         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
25873acb8795SBarry Smith       }
25883acb8795SBarry Smith     }
25893acb8795SBarry Smith     ierr = PetscFree(collengths);CHKERRQ(ierr);
25903acb8795SBarry Smith     *ia  = cia; *ja = cja;
25913acb8795SBarry Smith   }
25923acb8795SBarry Smith   PetscFunctionReturn(0);
25933acb8795SBarry Smith }
25943acb8795SBarry Smith 
25951a83f524SJed Brown PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
25963acb8795SBarry Smith {
25973acb8795SBarry Smith   PetscErrorCode ierr;
25983acb8795SBarry Smith 
25993acb8795SBarry Smith   PetscFunctionBegin;
26003acb8795SBarry Smith   if (!ia) PetscFunctionReturn(0);
26013acb8795SBarry Smith   ierr = PetscFree(*ia);CHKERRQ(ierr);
26023acb8795SBarry Smith   ierr = PetscFree(*ja);CHKERRQ(ierr);
26033acb8795SBarry Smith   PetscFunctionReturn(0);
26043acb8795SBarry Smith }
26053acb8795SBarry Smith 
2606525d23c0SHong Zhang /*
2607525d23c0SHong Zhang  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2608525d23c0SHong Zhang  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2609040ebd07SHong Zhang  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2610525d23c0SHong Zhang  */
2611525d23c0SHong Zhang PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2612f6d58c54SBarry Smith {
2613525d23c0SHong Zhang   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2614f6d58c54SBarry Smith   PetscErrorCode ierr;
2615c0349474SHong Zhang   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2616525d23c0SHong Zhang   PetscInt       nz = a->i[m],row,*jj,mr,col;
2617525d23c0SHong Zhang   PetscInt       *cspidx;
2618f6d58c54SBarry Smith 
2619f6d58c54SBarry Smith   PetscFunctionBegin;
2620525d23c0SHong Zhang   *nn = n;
2621525d23c0SHong Zhang   if (!ia) PetscFunctionReturn(0);
2622f6d58c54SBarry Smith 
2623b9e7e5c1SBarry Smith   ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2624854ce69bSBarry Smith   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2625b9e7e5c1SBarry Smith   ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2626b9e7e5c1SBarry Smith   ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr);
2627525d23c0SHong Zhang   jj   = a->j;
2628525d23c0SHong Zhang   for (i=0; i<nz; i++) {
2629525d23c0SHong Zhang     collengths[jj[i]]++;
2630f6d58c54SBarry Smith   }
2631525d23c0SHong Zhang   cia[0] = oshift;
2632525d23c0SHong Zhang   for (i=0; i<n; i++) {
2633525d23c0SHong Zhang     cia[i+1] = cia[i] + collengths[i];
2634525d23c0SHong Zhang   }
2635580bdb30SBarry Smith   ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2636525d23c0SHong Zhang   jj   = a->j;
2637525d23c0SHong Zhang   for (row=0; row<m; row++) {
2638525d23c0SHong Zhang     mr = a->i[row+1] - a->i[row];
2639525d23c0SHong Zhang     for (i=0; i<mr; i++) {
2640525d23c0SHong Zhang       col = *jj++;
2641525d23c0SHong Zhang       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2642525d23c0SHong Zhang       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2643525d23c0SHong Zhang     }
2644525d23c0SHong Zhang   }
2645525d23c0SHong Zhang   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2646071fcb05SBarry Smith   *ia    = cia;
2647071fcb05SBarry Smith   *ja    = cja;
2648525d23c0SHong Zhang   *spidx = cspidx;
2649525d23c0SHong Zhang   PetscFunctionReturn(0);
2650f6d58c54SBarry Smith }
2651f6d58c54SBarry Smith 
2652525d23c0SHong Zhang PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2653525d23c0SHong Zhang {
2654525d23c0SHong Zhang   PetscErrorCode ierr;
2655f6d58c54SBarry Smith 
2656525d23c0SHong Zhang   PetscFunctionBegin;
2657525d23c0SHong Zhang   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2658525d23c0SHong Zhang   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2659f6d58c54SBarry Smith   PetscFunctionReturn(0);
2660f6d58c54SBarry Smith }
266199cafbc1SBarry Smith 
26627d68702bSBarry Smith PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
26637d68702bSBarry Smith {
26647d68702bSBarry Smith   PetscErrorCode ierr;
26657d68702bSBarry Smith   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
26667d68702bSBarry Smith 
26677d68702bSBarry Smith   PetscFunctionBegin;
26686f33a894SBarry Smith   if (!Y->preallocated || !aij->nz) {
26697d68702bSBarry Smith     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
26707d68702bSBarry Smith   }
26717d68702bSBarry Smith   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
26727d68702bSBarry Smith   PetscFunctionReturn(0);
26737d68702bSBarry Smith }
26747d68702bSBarry Smith 
26752593348eSBarry Smith /* -------------------------------------------------------------------*/
26763964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2677cc2dc46cSBarry Smith                                        MatGetRow_SeqBAIJ,
2678cc2dc46cSBarry Smith                                        MatRestoreRow_SeqBAIJ,
2679cc2dc46cSBarry Smith                                        MatMult_SeqBAIJ_N,
268097304618SKris Buschelman                                /* 4*/  MatMultAdd_SeqBAIJ_N,
26817c922b88SBarry Smith                                        MatMultTranspose_SeqBAIJ,
26827c922b88SBarry Smith                                        MatMultTransposeAdd_SeqBAIJ,
2683f4259b30SLisandro Dalcin                                        NULL,
2684f4259b30SLisandro Dalcin                                        NULL,
2685f4259b30SLisandro Dalcin                                        NULL,
2686f4259b30SLisandro Dalcin                                /* 10*/ NULL,
2687cc2dc46cSBarry Smith                                        MatLUFactor_SeqBAIJ,
2688f4259b30SLisandro Dalcin                                        NULL,
2689f4259b30SLisandro Dalcin                                        NULL,
2690f2501298SSatish Balay                                        MatTranspose_SeqBAIJ,
269197304618SKris Buschelman                                /* 15*/ MatGetInfo_SeqBAIJ,
2692cc2dc46cSBarry Smith                                        MatEqual_SeqBAIJ,
2693cc2dc46cSBarry Smith                                        MatGetDiagonal_SeqBAIJ,
2694cc2dc46cSBarry Smith                                        MatDiagonalScale_SeqBAIJ,
2695cc2dc46cSBarry Smith                                        MatNorm_SeqBAIJ,
2696f4259b30SLisandro Dalcin                                /* 20*/ NULL,
2697cc2dc46cSBarry Smith                                        MatAssemblyEnd_SeqBAIJ,
2698cc2dc46cSBarry Smith                                        MatSetOption_SeqBAIJ,
2699cc2dc46cSBarry Smith                                        MatZeroEntries_SeqBAIJ,
2700d519adbfSMatthew Knepley                                /* 24*/ MatZeroRows_SeqBAIJ,
2701f4259b30SLisandro Dalcin                                        NULL,
2702f4259b30SLisandro Dalcin                                        NULL,
2703f4259b30SLisandro Dalcin                                        NULL,
2704f4259b30SLisandro Dalcin                                        NULL,
27054994cf47SJed Brown                                /* 29*/ MatSetUp_SeqBAIJ,
2706f4259b30SLisandro Dalcin                                        NULL,
2707f4259b30SLisandro Dalcin                                        NULL,
2708f4259b30SLisandro Dalcin                                        NULL,
2709f4259b30SLisandro Dalcin                                        NULL,
2710d519adbfSMatthew Knepley                                /* 34*/ MatDuplicate_SeqBAIJ,
2711f4259b30SLisandro Dalcin                                        NULL,
2712f4259b30SLisandro Dalcin                                        NULL,
2713cc2dc46cSBarry Smith                                        MatILUFactor_SeqBAIJ,
2714f4259b30SLisandro Dalcin                                        NULL,
2715d519adbfSMatthew Knepley                                /* 39*/ MatAXPY_SeqBAIJ,
27167dae84e0SHong Zhang                                        MatCreateSubMatrices_SeqBAIJ,
2717cc2dc46cSBarry Smith                                        MatIncreaseOverlap_SeqBAIJ,
2718cc2dc46cSBarry Smith                                        MatGetValues_SeqBAIJ,
27193c896bc6SHong Zhang                                        MatCopy_SeqBAIJ,
2720f4259b30SLisandro Dalcin                                /* 44*/ NULL,
2721cc2dc46cSBarry Smith                                        MatScale_SeqBAIJ,
27227d68702bSBarry Smith                                        MatShift_SeqBAIJ,
2723f4259b30SLisandro Dalcin                                        NULL,
272497b48c8fSBarry Smith                                        MatZeroRowsColumns_SeqBAIJ,
2725f4259b30SLisandro Dalcin                                /* 49*/ NULL,
27263b2fbd54SBarry Smith                                        MatGetRowIJ_SeqBAIJ,
272792c4ed94SBarry Smith                                        MatRestoreRowIJ_SeqBAIJ,
27283acb8795SBarry Smith                                        MatGetColumnIJ_SeqBAIJ,
27293acb8795SBarry Smith                                        MatRestoreColumnIJ_SeqBAIJ,
273093dfae19SHong Zhang                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2731f4259b30SLisandro Dalcin                                        NULL,
2732f4259b30SLisandro Dalcin                                        NULL,
2733090001bdSToby Isaac                                        NULL,
2734d3825aa8SBarry Smith                                        MatSetValuesBlocked_SeqBAIJ,
27357dae84e0SHong Zhang                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2736b9b97703SBarry Smith                                        MatDestroy_SeqBAIJ,
2737b9b97703SBarry Smith                                        MatView_SeqBAIJ,
2738f4259b30SLisandro Dalcin                                        NULL,
2739f4259b30SLisandro Dalcin                                        NULL,
2740f4259b30SLisandro Dalcin                                /* 64*/ NULL,
2741f4259b30SLisandro Dalcin                                        NULL,
2742f4259b30SLisandro Dalcin                                        NULL,
2743f4259b30SLisandro Dalcin                                        NULL,
2744f4259b30SLisandro Dalcin                                        NULL,
2745d519adbfSMatthew Knepley                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2746f4259b30SLisandro Dalcin                                        NULL,
2747c87e5d42SMatthew Knepley                                        MatConvert_Basic,
2748f4259b30SLisandro Dalcin                                        NULL,
2749f4259b30SLisandro Dalcin                                        NULL,
2750f4259b30SLisandro Dalcin                                /* 74*/ NULL,
2751f6d58c54SBarry Smith                                        MatFDColoringApply_BAIJ,
2752f4259b30SLisandro Dalcin                                        NULL,
2753f4259b30SLisandro Dalcin                                        NULL,
2754f4259b30SLisandro Dalcin                                        NULL,
2755f4259b30SLisandro Dalcin                                /* 79*/ NULL,
2756f4259b30SLisandro Dalcin                                        NULL,
2757f4259b30SLisandro Dalcin                                        NULL,
2758f4259b30SLisandro Dalcin                                        NULL,
27595bba2384SShri Abhyankar                                        MatLoad_SeqBAIJ,
2760f4259b30SLisandro Dalcin                                /* 84*/ NULL,
2761f4259b30SLisandro Dalcin                                        NULL,
2762f4259b30SLisandro Dalcin                                        NULL,
2763f4259b30SLisandro Dalcin                                        NULL,
2764f4259b30SLisandro Dalcin                                        NULL,
2765f4259b30SLisandro Dalcin                                /* 89*/ NULL,
2766f4259b30SLisandro Dalcin                                        NULL,
2767f4259b30SLisandro Dalcin                                        NULL,
2768f4259b30SLisandro Dalcin                                        NULL,
2769f4259b30SLisandro Dalcin                                        NULL,
2770f4259b30SLisandro Dalcin                                /* 94*/ NULL,
2771f4259b30SLisandro Dalcin                                        NULL,
2772f4259b30SLisandro Dalcin                                        NULL,
2773f4259b30SLisandro Dalcin                                        NULL,
2774f4259b30SLisandro Dalcin                                        NULL,
2775f4259b30SLisandro Dalcin                                /* 99*/ NULL,
2776f4259b30SLisandro Dalcin                                        NULL,
2777f4259b30SLisandro Dalcin                                        NULL,
27782726fb6dSPierre Jolivet                                        MatConjugate_SeqBAIJ,
2779f4259b30SLisandro Dalcin                                        NULL,
2780f4259b30SLisandro Dalcin                                /*104*/ NULL,
278199cafbc1SBarry Smith                                        MatRealPart_SeqBAIJ,
27822af78befSBarry Smith                                        MatImaginaryPart_SeqBAIJ,
2783f4259b30SLisandro Dalcin                                        NULL,
2784f4259b30SLisandro Dalcin                                        NULL,
2785f4259b30SLisandro Dalcin                                /*109*/ NULL,
2786f4259b30SLisandro Dalcin                                        NULL,
2787f4259b30SLisandro Dalcin                                        NULL,
2788f4259b30SLisandro Dalcin                                        NULL,
2789547795f9SHong Zhang                                        MatMissingDiagonal_SeqBAIJ,
2790f4259b30SLisandro Dalcin                                /*114*/ NULL,
2791f4259b30SLisandro Dalcin                                        NULL,
2792f4259b30SLisandro Dalcin                                        NULL,
2793f4259b30SLisandro Dalcin                                        NULL,
2794f4259b30SLisandro Dalcin                                        NULL,
2795f4259b30SLisandro Dalcin                                /*119*/ NULL,
2796f4259b30SLisandro Dalcin                                        NULL,
2797547795f9SHong Zhang                                        MatMultHermitianTranspose_SeqBAIJ,
2798d6037b41SHong Zhang                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2799f4259b30SLisandro Dalcin                                        NULL,
2800f4259b30SLisandro Dalcin                                /*124*/ NULL,
2801857cbf51SRichard Tran Mills                                        MatGetColumnReductions_SeqBAIJ,
28023964eb88SJed Brown                                        MatInvertBlockDiagonal_SeqBAIJ,
2803f4259b30SLisandro Dalcin                                        NULL,
2804f4259b30SLisandro Dalcin                                        NULL,
2805f4259b30SLisandro Dalcin                                /*129*/ NULL,
2806f4259b30SLisandro Dalcin                                        NULL,
2807f4259b30SLisandro Dalcin                                        NULL,
2808f4259b30SLisandro Dalcin                                        NULL,
2809f4259b30SLisandro Dalcin                                        NULL,
2810f4259b30SLisandro Dalcin                                /*134*/ NULL,
2811f4259b30SLisandro Dalcin                                        NULL,
2812f4259b30SLisandro Dalcin                                        NULL,
2813f4259b30SLisandro Dalcin                                        NULL,
2814f4259b30SLisandro Dalcin                                        NULL,
281546533700Sstefano_zampini                                /*139*/ MatSetBlockSizes_Default,
2816f4259b30SLisandro Dalcin                                        NULL,
2817f4259b30SLisandro Dalcin                                        NULL,
2818bdf6f3fcSHong Zhang                                        MatFDColoringSetUp_SeqXAIJ,
2819f4259b30SLisandro Dalcin                                        NULL,
282086e85357SHong Zhang                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2821*d70f29a3SPierre Jolivet                                        MatDestroySubMatrices_SeqBAIJ,
2822*d70f29a3SPierre Jolivet                                        NULL,
2823*d70f29a3SPierre Jolivet                                        NULL
282499cafbc1SBarry Smith };
28252593348eSBarry Smith 
28267087cfbeSBarry Smith PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
28273e90b805SBarry Smith {
28283e90b805SBarry Smith   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
28298ece6314SShri Abhyankar   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2830dfbe8321SBarry Smith   PetscErrorCode ierr;
28313e90b805SBarry Smith 
28323e90b805SBarry Smith   PetscFunctionBegin;
28332c71b3e2SJacob Faibussowitsch   PetscCheckFalse(aij->nonew != 1,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
28343e90b805SBarry Smith 
28353e90b805SBarry Smith   /* allocate space for values if not already there */
28363e90b805SBarry Smith   if (!aij->saved_values) {
2837854ce69bSBarry Smith     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
28383bb1ff40SBarry Smith     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
28393e90b805SBarry Smith   }
28403e90b805SBarry Smith 
28413e90b805SBarry Smith   /* copy values over */
2842580bdb30SBarry Smith   ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr);
28433e90b805SBarry Smith   PetscFunctionReturn(0);
28443e90b805SBarry Smith }
28453e90b805SBarry Smith 
28467087cfbeSBarry Smith PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
28473e90b805SBarry Smith {
28483e90b805SBarry Smith   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
28496849ba73SBarry Smith   PetscErrorCode ierr;
28508ece6314SShri Abhyankar   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
28513e90b805SBarry Smith 
28523e90b805SBarry Smith   PetscFunctionBegin;
28532c71b3e2SJacob Faibussowitsch   PetscCheckFalse(aij->nonew != 1,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
28542c71b3e2SJacob Faibussowitsch   PetscCheckFalse(!aij->saved_values,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
28553e90b805SBarry Smith 
28563e90b805SBarry Smith   /* copy values over */
2857580bdb30SBarry Smith   ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr);
28583e90b805SBarry Smith   PetscFunctionReturn(0);
28593e90b805SBarry Smith }
28603e90b805SBarry Smith 
2861cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2862cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2863273d9f13SBarry Smith 
2864b5b72c8aSIrina Sokolova PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2865a23d5eceSKris Buschelman {
2866a23d5eceSKris Buschelman   Mat_SeqBAIJ    *b;
28676849ba73SBarry Smith   PetscErrorCode ierr;
2868535b19f3SBarry Smith   PetscInt       i,mbs,nbs,bs2;
28698afaa268SBarry Smith   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2870a23d5eceSKris Buschelman 
2871a23d5eceSKris Buschelman   PetscFunctionBegin;
28722576faa2SJed Brown   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2873ab93d7beSBarry Smith   if (nz == MAT_SKIP_ALLOCATION) {
2874ab93d7beSBarry Smith     skipallocation = PETSC_TRUE;
2875ab93d7beSBarry Smith     nz             = 0;
2876ab93d7beSBarry Smith   }
28778c07d4e3SBarry Smith 
287833d57670SJed Brown   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
287926283091SBarry Smith   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
288026283091SBarry Smith   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2881e02043d6SBarry Smith   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2882899cda47SBarry Smith 
2883899cda47SBarry Smith   B->preallocated = PETSC_TRUE;
2884899cda47SBarry Smith 
2885d0f46423SBarry Smith   mbs = B->rmap->n/bs;
2886d0f46423SBarry Smith   nbs = B->cmap->n/bs;
2887a23d5eceSKris Buschelman   bs2 = bs*bs;
2888a23d5eceSKris Buschelman 
28892c71b3e2SJacob Faibussowitsch   PetscCheckFalse(mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT,B->rmap->N,B->cmap->n,bs);
2890a23d5eceSKris Buschelman 
2891a23d5eceSKris Buschelman   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
28922c71b3e2SJacob Faibussowitsch   PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %" PetscInt_FMT,nz);
2893a23d5eceSKris Buschelman   if (nnz) {
2894a23d5eceSKris Buschelman     for (i=0; i<mbs; i++) {
28952c71b3e2SJacob Faibussowitsch       PetscCheckFalse(nnz[i] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,nnz[i]);
28962c71b3e2SJacob Faibussowitsch       PetscCheckFalse(nnz[i] > nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT,i,nnz[i],nbs);
2897a23d5eceSKris Buschelman     }
2898a23d5eceSKris Buschelman   }
2899a23d5eceSKris Buschelman 
2900a23d5eceSKris Buschelman   b    = (Mat_SeqBAIJ*)B->data;
2901ce94432eSBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
29028afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
29038c07d4e3SBarry Smith   ierr = PetscOptionsEnd();CHKERRQ(ierr);
29048c07d4e3SBarry Smith 
2905a23d5eceSKris Buschelman   if (!flg) {
2906a23d5eceSKris Buschelman     switch (bs) {
2907a23d5eceSKris Buschelman     case 1:
2908a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_1;
2909a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2910a23d5eceSKris Buschelman       break;
2911a23d5eceSKris Buschelman     case 2:
2912a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_2;
2913a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2914a23d5eceSKris Buschelman       break;
2915a23d5eceSKris Buschelman     case 3:
2916a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_3;
2917a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2918a23d5eceSKris Buschelman       break;
2919a23d5eceSKris Buschelman     case 4:
2920a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_4;
2921a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2922a23d5eceSKris Buschelman       break;
2923a23d5eceSKris Buschelman     case 5:
2924a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_5;
2925a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2926a23d5eceSKris Buschelman       break;
2927a23d5eceSKris Buschelman     case 6:
2928a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_6;
2929a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2930a23d5eceSKris Buschelman       break;
2931a23d5eceSKris Buschelman     case 7:
2932a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_7;
2933a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2934a23d5eceSKris Buschelman       break;
293596e086a2SDaniel Kokron     case 9:
29366679dcc1SBarry Smith     {
29376679dcc1SBarry Smith       PetscInt version = 1;
29386679dcc1SBarry Smith       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
29396679dcc1SBarry Smith       switch (version) {
29405f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
29416679dcc1SBarry Smith       case 1:
294296e086a2SDaniel Kokron         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
294396e086a2SDaniel Kokron         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
29447d3de750SJacob Faibussowitsch         ierr = PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);CHKERRQ(ierr);
29456679dcc1SBarry Smith         break;
29466679dcc1SBarry Smith #endif
29476679dcc1SBarry Smith       default:
294896e086a2SDaniel Kokron         B->ops->mult    = MatMult_SeqBAIJ_N;
294996e086a2SDaniel Kokron         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
29507d3de750SJacob Faibussowitsch         ierr = PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);CHKERRQ(ierr);
295196e086a2SDaniel Kokron         break;
29526679dcc1SBarry Smith       }
29536679dcc1SBarry Smith       break;
29546679dcc1SBarry Smith     }
2955ebada01fSBarry Smith     case 11:
2956ebada01fSBarry Smith       B->ops->mult    = MatMult_SeqBAIJ_11;
2957ebada01fSBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2958ebada01fSBarry Smith       break;
29596679dcc1SBarry Smith     case 12:
29606679dcc1SBarry Smith     {
29616679dcc1SBarry Smith       PetscInt version = 1;
29626679dcc1SBarry Smith       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
29636679dcc1SBarry Smith       switch (version) {
29646679dcc1SBarry Smith       case 1:
29656679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
29666679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
29677d3de750SJacob Faibussowitsch         ierr = PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);CHKERRQ(ierr);
29688ab949d8SShri Abhyankar         break;
29696679dcc1SBarry Smith       case 2:
29706679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
29716679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
29727d3de750SJacob Faibussowitsch         ierr = PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);CHKERRQ(ierr);
29736679dcc1SBarry Smith         break;
29746679dcc1SBarry Smith #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
29756679dcc1SBarry Smith       case 3:
29766679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
29776679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
29787d3de750SJacob Faibussowitsch         ierr = PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);CHKERRQ(ierr);
29796679dcc1SBarry Smith         break;
29806679dcc1SBarry Smith #endif
2981a23d5eceSKris Buschelman       default:
2982a23d5eceSKris Buschelman         B->ops->mult    = MatMult_SeqBAIJ_N;
2983a23d5eceSKris Buschelman         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
29847d3de750SJacob Faibussowitsch         ierr = PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);CHKERRQ(ierr);
29856679dcc1SBarry Smith         break;
29866679dcc1SBarry Smith       }
29876679dcc1SBarry Smith       break;
29886679dcc1SBarry Smith     }
29896679dcc1SBarry Smith     case 15:
29906679dcc1SBarry Smith     {
29916679dcc1SBarry Smith       PetscInt version = 1;
29926679dcc1SBarry Smith       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
29936679dcc1SBarry Smith       switch (version) {
29946679dcc1SBarry Smith       case 1:
29956679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
29967d3de750SJacob Faibussowitsch         ierr = PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);CHKERRQ(ierr);
29976679dcc1SBarry Smith         break;
29986679dcc1SBarry Smith       case 2:
29996679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_15_ver2;
30007d3de750SJacob Faibussowitsch         ierr = PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);CHKERRQ(ierr);
30016679dcc1SBarry Smith         break;
30026679dcc1SBarry Smith       case 3:
30036679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_15_ver3;
30047d3de750SJacob Faibussowitsch         ierr = PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);CHKERRQ(ierr);
30056679dcc1SBarry Smith         break;
30066679dcc1SBarry Smith       case 4:
30076679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_15_ver4;
30087d3de750SJacob Faibussowitsch         ierr = PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);CHKERRQ(ierr);
30096679dcc1SBarry Smith         break;
30106679dcc1SBarry Smith       default:
30116679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_N;
30127d3de750SJacob Faibussowitsch         ierr = PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);CHKERRQ(ierr);
30136679dcc1SBarry Smith         break;
30146679dcc1SBarry Smith       }
30156679dcc1SBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
30166679dcc1SBarry Smith       break;
30176679dcc1SBarry Smith     }
30186679dcc1SBarry Smith     default:
30196679dcc1SBarry Smith       B->ops->mult    = MatMult_SeqBAIJ_N;
30206679dcc1SBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
30217d3de750SJacob Faibussowitsch       ierr = PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);CHKERRQ(ierr);
3022a23d5eceSKris Buschelman       break;
3023a23d5eceSKris Buschelman     }
3024a23d5eceSKris Buschelman   }
3025e48d15efSToby Isaac   B->ops->sor = MatSOR_SeqBAIJ;
3026a23d5eceSKris Buschelman   b->mbs = mbs;
3027a23d5eceSKris Buschelman   b->nbs = nbs;
3028ab93d7beSBarry Smith   if (!skipallocation) {
30292ee49352SLisandro Dalcin     if (!b->imax) {
3030dcca6d9dSJed Brown       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
30313bb1ff40SBarry Smith       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
303226fbe8dcSKarl Rupp 
30334fd072dbSBarry Smith       b->free_imax_ilen = PETSC_TRUE;
30342ee49352SLisandro Dalcin     }
3035ab93d7beSBarry Smith     /* b->ilen will count nonzeros in each block row so far. */
303626fbe8dcSKarl Rupp     for (i=0; i<mbs; i++) b->ilen[i] = 0;
3037a23d5eceSKris Buschelman     if (!nnz) {
3038a23d5eceSKris Buschelman       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3039c62bd62aSJed Brown       else if (nz < 0) nz = 1;
30405d2a9ed1SStefano Zampini       nz = PetscMin(nz,nbs);
3041a23d5eceSKris Buschelman       for (i=0; i<mbs; i++) b->imax[i] = nz;
304261778c46SBarry Smith       ierr = PetscIntMultError(nz,mbs,&nz);CHKERRQ(ierr);
3043a23d5eceSKris Buschelman     } else {
3044c73702f5SBarry Smith       PetscInt64 nz64 = 0;
3045c73702f5SBarry Smith       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
3046c73702f5SBarry Smith       ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr);
3047a23d5eceSKris Buschelman     }
3048a23d5eceSKris Buschelman 
3049a23d5eceSKris Buschelman     /* allocate the matrix space */
30502ee49352SLisandro Dalcin     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3051672ba085SHong Zhang     if (B->structure_only) {
3052672ba085SHong Zhang       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
3053672ba085SHong Zhang       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
3054672ba085SHong Zhang       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
3055672ba085SHong Zhang     } else {
30566679dcc1SBarry Smith       PetscInt nzbs2 = 0;
30576679dcc1SBarry Smith       ierr = PetscIntMultError(nz,bs2,&nzbs2);CHKERRQ(ierr);
30586679dcc1SBarry Smith       ierr = PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
30593bb1ff40SBarry Smith       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3060580bdb30SBarry Smith       ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr);
3061672ba085SHong Zhang     }
3062580bdb30SBarry Smith     ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr);
306326fbe8dcSKarl Rupp 
3064672ba085SHong Zhang     if (B->structure_only) {
3065672ba085SHong Zhang       b->singlemalloc = PETSC_FALSE;
3066672ba085SHong Zhang       b->free_a       = PETSC_FALSE;
3067672ba085SHong Zhang     } else {
3068a23d5eceSKris Buschelman       b->singlemalloc = PETSC_TRUE;
3069672ba085SHong Zhang       b->free_a       = PETSC_TRUE;
3070672ba085SHong Zhang     }
3071672ba085SHong Zhang     b->free_ij = PETSC_TRUE;
3072672ba085SHong Zhang 
3073a23d5eceSKris Buschelman     b->i[0] = 0;
3074a23d5eceSKris Buschelman     for (i=1; i<mbs+1; i++) {
3075a23d5eceSKris Buschelman       b->i[i] = b->i[i-1] + b->imax[i-1];
3076a23d5eceSKris Buschelman     }
3077672ba085SHong Zhang 
3078e811da20SHong Zhang   } else {
3079e6b907acSBarry Smith     b->free_a  = PETSC_FALSE;
3080e6b907acSBarry Smith     b->free_ij = PETSC_FALSE;
3081ab93d7beSBarry Smith   }
3082a23d5eceSKris Buschelman 
3083a23d5eceSKris Buschelman   b->bs2              = bs2;
3084a23d5eceSKris Buschelman   b->mbs              = mbs;
3085a23d5eceSKris Buschelman   b->nz               = 0;
3086b32cb4a7SJed Brown   b->maxnz            = nz;
3087b32cb4a7SJed Brown   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3088cb7b82ddSBarry Smith   B->was_assembled    = PETSC_FALSE;
3089cb7b82ddSBarry Smith   B->assembled        = PETSC_FALSE;
30902576faa2SJed Brown   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3091a23d5eceSKris Buschelman   PetscFunctionReturn(0);
3092a23d5eceSKris Buschelman }
3093a23d5eceSKris Buschelman 
3094cf12db73SBarry Smith PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3095725b52f3SLisandro Dalcin {
3096725b52f3SLisandro Dalcin   PetscInt       i,m,nz,nz_max=0,*nnz;
3097f4259b30SLisandro Dalcin   PetscScalar    *values=NULL;
3098d47bf9aaSJed Brown   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
3099725b52f3SLisandro Dalcin   PetscErrorCode ierr;
3100725b52f3SLisandro Dalcin 
3101725b52f3SLisandro Dalcin   PetscFunctionBegin;
31022c71b3e2SJacob Faibussowitsch   PetscCheckFalse(bs < 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %" PetscInt_FMT,bs);
310326283091SBarry Smith   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
310426283091SBarry Smith   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
310526283091SBarry Smith   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
310626283091SBarry Smith   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3107e02043d6SBarry Smith   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3108d0f46423SBarry Smith   m    = B->rmap->n/bs;
3109725b52f3SLisandro Dalcin 
31102c71b3e2SJacob Faibussowitsch   PetscCheckFalse(ii[0] != 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]);
3111854ce69bSBarry Smith   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
3112725b52f3SLisandro Dalcin   for (i=0; i<m; i++) {
3113cf12db73SBarry Smith     nz = ii[i+1]- ii[i];
31142c71b3e2SJacob Faibussowitsch     PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz);
3115725b52f3SLisandro Dalcin     nz_max = PetscMax(nz_max, nz);
3116725b52f3SLisandro Dalcin     nnz[i] = nz;
3117725b52f3SLisandro Dalcin   }
3118725b52f3SLisandro Dalcin   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3119725b52f3SLisandro Dalcin   ierr = PetscFree(nnz);CHKERRQ(ierr);
3120725b52f3SLisandro Dalcin 
3121725b52f3SLisandro Dalcin   values = (PetscScalar*)V;
3122725b52f3SLisandro Dalcin   if (!values) {
31231795a4d1SJed Brown     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
3124725b52f3SLisandro Dalcin   }
3125725b52f3SLisandro Dalcin   for (i=0; i<m; i++) {
3126cf12db73SBarry Smith     PetscInt          ncols  = ii[i+1] - ii[i];
3127cf12db73SBarry Smith     const PetscInt    *icols = jj + ii[i];
3128bb80cfbbSStefano Zampini     if (bs == 1 || !roworiented) {
3129cf12db73SBarry Smith       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3130725b52f3SLisandro Dalcin       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
31313adadaf3SJed Brown     } else {
31323adadaf3SJed Brown       PetscInt j;
31333adadaf3SJed Brown       for (j=0; j<ncols; j++) {
31343adadaf3SJed Brown         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3135d47bf9aaSJed Brown         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
31363adadaf3SJed Brown       }
31373adadaf3SJed Brown     }
3138725b52f3SLisandro Dalcin   }
3139725b52f3SLisandro Dalcin   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3140725b52f3SLisandro Dalcin   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3141725b52f3SLisandro Dalcin   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31427827cd58SJed Brown   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3143725b52f3SLisandro Dalcin   PetscFunctionReturn(0);
3144725b52f3SLisandro Dalcin }
3145725b52f3SLisandro Dalcin 
3146cda14afcSprj- /*@C
3147cda14afcSprj-    MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored
3148cda14afcSprj- 
3149cda14afcSprj-    Not Collective
3150cda14afcSprj- 
3151cda14afcSprj-    Input Parameter:
3152cda14afcSprj- .  mat - a MATSEQBAIJ matrix
3153cda14afcSprj- 
3154cda14afcSprj-    Output Parameter:
3155cda14afcSprj- .   array - pointer to the data
3156cda14afcSprj- 
3157cda14afcSprj-    Level: intermediate
3158cda14afcSprj- 
3159cda14afcSprj- .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3160cda14afcSprj- @*/
3161cda14afcSprj- PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3162cda14afcSprj- {
3163cda14afcSprj-   PetscErrorCode ierr;
3164cda14afcSprj- 
3165cda14afcSprj-   PetscFunctionBegin;
3166cda14afcSprj-   ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3167cda14afcSprj-   PetscFunctionReturn(0);
3168cda14afcSprj- }
3169cda14afcSprj- 
3170cda14afcSprj- /*@C
3171cda14afcSprj-    MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()
3172cda14afcSprj- 
3173cda14afcSprj-    Not Collective
3174cda14afcSprj- 
3175cda14afcSprj-    Input Parameters:
3176cda14afcSprj- +  mat - a MATSEQBAIJ matrix
3177cda14afcSprj- -  array - pointer to the data
3178cda14afcSprj- 
3179cda14afcSprj-    Level: intermediate
3180cda14afcSprj- 
3181cda14afcSprj- .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3182cda14afcSprj- @*/
3183cda14afcSprj- PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3184cda14afcSprj- {
3185cda14afcSprj-   PetscErrorCode ierr;
3186cda14afcSprj- 
3187cda14afcSprj-   PetscFunctionBegin;
3188cda14afcSprj-   ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3189cda14afcSprj-   PetscFunctionReturn(0);
3190cda14afcSprj- }
3191cda14afcSprj- 
31920bad9183SKris Buschelman /*MC
3193fafad747SKris Buschelman    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
31940bad9183SKris Buschelman    block sparse compressed row format.
31950bad9183SKris Buschelman 
31960bad9183SKris Buschelman    Options Database Keys:
31976679dcc1SBarry Smith + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
31986679dcc1SBarry Smith - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
31990bad9183SKris Buschelman 
32000bad9183SKris Buschelman    Level: beginner
32010cd7f59aSBarry Smith 
32020cd7f59aSBarry Smith    Notes:
32030cd7f59aSBarry Smith     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
32040cd7f59aSBarry Smith     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
32050bad9183SKris Buschelman 
32066679dcc1SBarry Smith    Run with -info to see what version of the matrix-vector product is being used
32076679dcc1SBarry Smith 
3208f0c06035SSatish Balay .seealso: MatCreateSeqBAIJ()
32090bad9183SKris Buschelman M*/
32100bad9183SKris Buschelman 
3211cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3212b24902e0SBarry Smith 
32138cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
32142593348eSBarry Smith {
3215dfbe8321SBarry Smith   PetscErrorCode ierr;
3216c1ac3661SBarry Smith   PetscMPIInt    size;
3217b6490206SBarry Smith   Mat_SeqBAIJ    *b;
32183b2fbd54SBarry Smith 
32193a40ed3dSBarry Smith   PetscFunctionBegin;
3220ffc4695bSBarry Smith   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
32212c71b3e2SJacob Faibussowitsch   PetscCheckFalse(size > 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3222b6490206SBarry Smith 
3223b00a9115SJed Brown   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3224b0a32e0cSBarry Smith   B->data = (void*)b;
3225549d3d68SSatish Balay   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
322626fbe8dcSKarl Rupp 
3227f4259b30SLisandro Dalcin   b->row          = NULL;
3228f4259b30SLisandro Dalcin   b->col          = NULL;
3229f4259b30SLisandro Dalcin   b->icol         = NULL;
32302593348eSBarry Smith   b->reallocs     = 0;
3231f4259b30SLisandro Dalcin   b->saved_values = NULL;
32322593348eSBarry Smith 
3233c4992f7dSBarry Smith   b->roworiented        = PETSC_TRUE;
32342593348eSBarry Smith   b->nonew              = 0;
3235f4259b30SLisandro Dalcin   b->diag               = NULL;
3236f4259b30SLisandro Dalcin   B->spptr              = NULL;
3237b32cb4a7SJed Brown   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3238a9817697SBarry Smith   b->keepnonzeropattern = PETSC_FALSE;
32394e220ebcSLois Curfman McInnes 
3240cda14afcSprj-   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr);
3241cda14afcSprj-   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr);
3242bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3243bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3244bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3245bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3246bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3247bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3248bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3249bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
32507ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
3251c9225affSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
32527ea3e4caSstefano_zampini #endif
3253c9225affSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
325417667f90SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
32553a40ed3dSBarry Smith   PetscFunctionReturn(0);
32562593348eSBarry Smith }
32572593348eSBarry Smith 
3258ace3abfcSBarry Smith PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
32592593348eSBarry Smith {
3260b24902e0SBarry Smith   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
32616849ba73SBarry Smith   PetscErrorCode ierr;
3262a96a251dSBarry Smith   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3263de6a44a3SBarry Smith 
32643a40ed3dSBarry Smith   PetscFunctionBegin;
32652c71b3e2SJacob Faibussowitsch   PetscCheckFalse(a->i[mbs] != nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
32662593348eSBarry Smith 
32674fd072dbSBarry Smith   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
32684fd072dbSBarry Smith     c->imax           = a->imax;
32694fd072dbSBarry Smith     c->ilen           = a->ilen;
32704fd072dbSBarry Smith     c->free_imax_ilen = PETSC_FALSE;
32714fd072dbSBarry Smith   } else {
3272dcca6d9dSJed Brown     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
32733bb1ff40SBarry Smith     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3274b6490206SBarry Smith     for (i=0; i<mbs; i++) {
32752593348eSBarry Smith       c->imax[i] = a->imax[i];
32762593348eSBarry Smith       c->ilen[i] = a->ilen[i];
32772593348eSBarry Smith     }
32784fd072dbSBarry Smith     c->free_imax_ilen = PETSC_TRUE;
32794fd072dbSBarry Smith   }
32802593348eSBarry Smith 
32812593348eSBarry Smith   /* allocate the matrix space */
328216a2bf60SHong Zhang   if (mallocmatspace) {
32834fd072dbSBarry Smith     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
32841795a4d1SJed Brown       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
32853bb1ff40SBarry Smith       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
328626fbe8dcSKarl Rupp 
32874fd072dbSBarry Smith       c->i            = a->i;
32884fd072dbSBarry Smith       c->j            = a->j;
3289379be0ddSLisandro Dalcin       c->singlemalloc = PETSC_FALSE;
3290379be0ddSLisandro Dalcin       c->free_a       = PETSC_TRUE;
3291379be0ddSLisandro Dalcin       c->free_ij      = PETSC_FALSE;
32924fd072dbSBarry Smith       c->parent       = A;
32931e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
32941e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
329526fbe8dcSKarl Rupp 
32964fd072dbSBarry Smith       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
32974fd072dbSBarry Smith       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
32984fd072dbSBarry Smith       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
32994fd072dbSBarry Smith     } else {
3300dcca6d9dSJed Brown       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
33013bb1ff40SBarry Smith       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
330226fbe8dcSKarl Rupp 
3303c4992f7dSBarry Smith       c->singlemalloc = PETSC_TRUE;
3304379be0ddSLisandro Dalcin       c->free_a       = PETSC_TRUE;
33054fd072dbSBarry Smith       c->free_ij      = PETSC_TRUE;
330626fbe8dcSKarl Rupp 
3307580bdb30SBarry Smith       ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr);
3308b6490206SBarry Smith       if (mbs > 0) {
3309580bdb30SBarry Smith         ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr);
33102e8a6d31SBarry Smith         if (cpvalues == MAT_COPY_VALUES) {
3311580bdb30SBarry Smith           ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr);
33122e8a6d31SBarry Smith         } else {
3313580bdb30SBarry Smith           ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr);
33142593348eSBarry Smith         }
33152593348eSBarry Smith       }
33161e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
33171e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
331816a2bf60SHong Zhang     }
33194fd072dbSBarry Smith   }
332016a2bf60SHong Zhang 
33212593348eSBarry Smith   c->roworiented = a->roworiented;
33222593348eSBarry Smith   c->nonew       = a->nonew;
332326fbe8dcSKarl Rupp 
33241e1e43feSBarry Smith   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
33251e1e43feSBarry Smith   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
332626fbe8dcSKarl Rupp 
33275c9eb25fSBarry Smith   c->bs2         = a->bs2;
33285c9eb25fSBarry Smith   c->mbs         = a->mbs;
33295c9eb25fSBarry Smith   c->nbs         = a->nbs;
33302593348eSBarry Smith 
33312593348eSBarry Smith   if (a->diag) {
33324fd072dbSBarry Smith     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
33334fd072dbSBarry Smith       c->diag      = a->diag;
33344fd072dbSBarry Smith       c->free_diag = PETSC_FALSE;
33354fd072dbSBarry Smith     } else {
3336854ce69bSBarry Smith       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
33373bb1ff40SBarry Smith       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
333826fbe8dcSKarl Rupp       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
33394fd072dbSBarry Smith       c->free_diag = PETSC_TRUE;
33404fd072dbSBarry Smith     }
3341f4259b30SLisandro Dalcin   } else c->diag = NULL;
334226fbe8dcSKarl Rupp 
33432593348eSBarry Smith   c->nz         = a->nz;
3344f2cbd3d5SJed Brown   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3345f361c04dSBarry Smith   c->solve_work = NULL;
3346f361c04dSBarry Smith   c->mult_work  = NULL;
3347f361c04dSBarry Smith   c->sor_workt  = NULL;
3348f361c04dSBarry Smith   c->sor_work   = NULL;
334988e51ccdSHong Zhang 
335088e51ccdSHong Zhang   c->compressedrow.use   = a->compressedrow.use;
335188e51ccdSHong Zhang   c->compressedrow.nrows = a->compressedrow.nrows;
3352cd6b891eSBarry Smith   if (a->compressedrow.use) {
335388e51ccdSHong Zhang     i    = a->compressedrow.nrows;
3354dcca6d9dSJed Brown     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
33553bb1ff40SBarry Smith     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3356580bdb30SBarry Smith     ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr);
3357580bdb30SBarry Smith     ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr);
335888e51ccdSHong Zhang   } else {
335988e51ccdSHong Zhang     c->compressedrow.use    = PETSC_FALSE;
33600298fd71SBarry Smith     c->compressedrow.i      = NULL;
33610298fd71SBarry Smith     c->compressedrow.rindex = NULL;
336288e51ccdSHong Zhang   }
3363e56f5c9eSBarry Smith   C->nonzerostate = A->nonzerostate;
336426fbe8dcSKarl Rupp 
3365140e18c1SBarry Smith   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
33663a40ed3dSBarry Smith   PetscFunctionReturn(0);
33672593348eSBarry Smith }
33682593348eSBarry Smith 
3369b24902e0SBarry Smith PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3370b24902e0SBarry Smith {
3371b24902e0SBarry Smith   PetscErrorCode ierr;
3372b24902e0SBarry Smith 
3373b24902e0SBarry Smith   PetscFunctionBegin;
3374ce94432eSBarry Smith   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3375d0f46423SBarry Smith   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
33765c9eb25fSBarry Smith   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
337798ad0f72SJed Brown   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3378b24902e0SBarry Smith   PetscFunctionReturn(0);
3379b24902e0SBarry Smith }
3380b24902e0SBarry Smith 
3381618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3382b51a4376SLisandro Dalcin PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
3383f501eaabSShri Abhyankar {
3384b51a4376SLisandro Dalcin   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3385b51a4376SLisandro Dalcin   PetscInt       *rowidxs,*colidxs;
3386b51a4376SLisandro Dalcin   PetscScalar    *matvals;
3387f501eaabSShri Abhyankar   PetscErrorCode ierr;
3388b51a4376SLisandro Dalcin 
3389b51a4376SLisandro Dalcin   PetscFunctionBegin;
3390b51a4376SLisandro Dalcin   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3391b51a4376SLisandro Dalcin 
3392b51a4376SLisandro Dalcin   /* read matrix header */
3393b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
33942c71b3e2SJacob Faibussowitsch   PetscCheckFalse(header[0] != MAT_FILE_CLASSID,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3395b51a4376SLisandro Dalcin   M = header[1]; N = header[2]; nz = header[3];
33962c71b3e2SJacob Faibussowitsch   PetscCheckFalse(M < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
33972c71b3e2SJacob Faibussowitsch   PetscCheckFalse(N < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
33982c71b3e2SJacob Faibussowitsch   PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ");
3399b51a4376SLisandro Dalcin 
3400b51a4376SLisandro Dalcin   /* set block sizes from the viewer's .info file */
3401b51a4376SLisandro Dalcin   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3402b51a4376SLisandro Dalcin   /* set local and global sizes if not set already */
3403b51a4376SLisandro Dalcin   if (mat->rmap->n < 0) mat->rmap->n = M;
3404b51a4376SLisandro Dalcin   if (mat->cmap->n < 0) mat->cmap->n = N;
3405b51a4376SLisandro Dalcin   if (mat->rmap->N < 0) mat->rmap->N = M;
3406b51a4376SLisandro Dalcin   if (mat->cmap->N < 0) mat->cmap->N = N;
3407b51a4376SLisandro Dalcin   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3408b51a4376SLisandro Dalcin   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3409b51a4376SLisandro Dalcin 
3410b51a4376SLisandro Dalcin   /* check if the matrix sizes are correct */
3411b51a4376SLisandro Dalcin   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
34122c71b3e2SJacob Faibussowitsch   PetscCheckFalse(M != rows || N != cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3413b51a4376SLisandro Dalcin   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3414b51a4376SLisandro Dalcin   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3415b51a4376SLisandro Dalcin   mbs = m/bs; nbs = n/bs;
3416b51a4376SLisandro Dalcin 
3417b51a4376SLisandro Dalcin   /* read in row lengths, column indices and nonzero values */
3418b51a4376SLisandro Dalcin   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3419b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr);
3420b51a4376SLisandro Dalcin   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3421b51a4376SLisandro Dalcin   sum = rowidxs[m];
34222c71b3e2SJacob Faibussowitsch   PetscCheckFalse(sum != nz,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3423b51a4376SLisandro Dalcin 
3424b51a4376SLisandro Dalcin   /* read in column indices and nonzero values */
3425b51a4376SLisandro Dalcin   ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr);
3426b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr);
3427b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr);
3428b51a4376SLisandro Dalcin 
3429b51a4376SLisandro Dalcin   { /* preallocate matrix storage */
3430b51a4376SLisandro Dalcin     PetscBT   bt; /* helper bit set to count nonzeros */
3431b51a4376SLisandro Dalcin     PetscInt  *nnz;
3432618cc2edSLisandro Dalcin     PetscBool sbaij;
3433b51a4376SLisandro Dalcin 
3434b51a4376SLisandro Dalcin     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3435b51a4376SLisandro Dalcin     ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr);
3436618cc2edSLisandro Dalcin     ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr);
3437b51a4376SLisandro Dalcin     for (i=0; i<mbs; i++) {
3438b51a4376SLisandro Dalcin       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3439618cc2edSLisandro Dalcin       for (k=0; k<bs; k++) {
3440618cc2edSLisandro Dalcin         PetscInt row = bs*i + k;
3441618cc2edSLisandro Dalcin         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3442618cc2edSLisandro Dalcin           PetscInt col = colidxs[j];
3443618cc2edSLisandro Dalcin           if (!sbaij || col >= row)
3444618cc2edSLisandro Dalcin             if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++;
3445618cc2edSLisandro Dalcin         }
3446618cc2edSLisandro Dalcin       }
3447b51a4376SLisandro Dalcin     }
3448b51a4376SLisandro Dalcin     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3449b51a4376SLisandro Dalcin     ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3450618cc2edSLisandro Dalcin     ierr = MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3451b51a4376SLisandro Dalcin     ierr = PetscFree(nnz);CHKERRQ(ierr);
3452b51a4376SLisandro Dalcin   }
3453b51a4376SLisandro Dalcin 
3454b51a4376SLisandro Dalcin   /* store matrix values */
3455b51a4376SLisandro Dalcin   for (i=0; i<m; i++) {
3456b51a4376SLisandro Dalcin     PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1];
3457618cc2edSLisandro Dalcin     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3458b51a4376SLisandro Dalcin   }
3459b51a4376SLisandro Dalcin 
3460b51a4376SLisandro Dalcin   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3461b51a4376SLisandro Dalcin   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3462b51a4376SLisandro Dalcin   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3463b51a4376SLisandro Dalcin   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3464b51a4376SLisandro Dalcin   PetscFunctionReturn(0);
3465b51a4376SLisandro Dalcin }
3466b51a4376SLisandro Dalcin 
3467b51a4376SLisandro Dalcin PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)
3468b51a4376SLisandro Dalcin {
3469b51a4376SLisandro Dalcin   PetscErrorCode ierr;
34707f489da9SVaclav Hapla   PetscBool      isbinary;
3471f501eaabSShri Abhyankar 
3472f501eaabSShri Abhyankar   PetscFunctionBegin;
34737f489da9SVaclav Hapla   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
34742c71b3e2SJacob Faibussowitsch   PetscCheckFalse(!isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3475b51a4376SLisandro Dalcin   ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3476f501eaabSShri Abhyankar   PetscFunctionReturn(0);
3477f501eaabSShri Abhyankar }
3478f501eaabSShri Abhyankar 
3479273d9f13SBarry Smith /*@C
3480273d9f13SBarry Smith    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3481273d9f13SBarry Smith    compressed row) format.  For good matrix assembly performance the
3482273d9f13SBarry Smith    user should preallocate the matrix storage by setting the parameter nz
3483273d9f13SBarry Smith    (or the array nnz).  By setting these parameters accurately, performance
3484273d9f13SBarry Smith    during matrix assembly can be increased by more than a factor of 50.
34852593348eSBarry Smith 
3486d083f849SBarry Smith    Collective
3487273d9f13SBarry Smith 
3488273d9f13SBarry Smith    Input Parameters:
3489273d9f13SBarry Smith +  comm - MPI communicator, set to PETSC_COMM_SELF
3490bb7ae925SBarry Smith .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3491bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3492273d9f13SBarry Smith .  m - number of rows
3493273d9f13SBarry Smith .  n - number of columns
349435d8aa7fSBarry Smith .  nz - number of nonzero blocks  per block row (same for all rows)
349535d8aa7fSBarry Smith -  nnz - array containing the number of nonzero blocks in the various block rows
34960298fd71SBarry Smith          (possibly different for each block row) or NULL
3497273d9f13SBarry Smith 
3498273d9f13SBarry Smith    Output Parameter:
3499273d9f13SBarry Smith .  A - the matrix
3500273d9f13SBarry Smith 
3501175b88e8SBarry Smith    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3502f6f02116SRichard Tran Mills    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3503175b88e8SBarry Smith    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3504175b88e8SBarry Smith 
3505273d9f13SBarry Smith    Options Database Keys:
3506a2b725a8SWilliam Gropp +   -mat_no_unroll - uses code that does not unroll the loops in the
3507273d9f13SBarry Smith                      block calculations (much slower)
3508a2b725a8SWilliam Gropp -    -mat_block_size - size of the blocks to use
3509273d9f13SBarry Smith 
3510273d9f13SBarry Smith    Level: intermediate
3511273d9f13SBarry Smith 
3512273d9f13SBarry Smith    Notes:
3513d1be2dadSMatthew Knepley    The number of rows and columns must be divisible by blocksize.
3514d1be2dadSMatthew Knepley 
351549a6f317SBarry Smith    If the nnz parameter is given then the nz parameter is ignored
351649a6f317SBarry Smith 
351735d8aa7fSBarry Smith    A nonzero block is any block that as 1 or more nonzeros in it
351835d8aa7fSBarry Smith 
3519273d9f13SBarry Smith    The block AIJ format is fully compatible with standard Fortran 77
3520273d9f13SBarry Smith    storage.  That is, the stored row and column indices can begin at
3521273d9f13SBarry Smith    either one (as in Fortran) or zero.  See the users' manual for details.
3522273d9f13SBarry Smith 
3523273d9f13SBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
35240298fd71SBarry Smith    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3525a7f22e61SSatish Balay    allocation.  See Users-Manual: ch_mat for details.
3526273d9f13SBarry Smith    matrices.
3527273d9f13SBarry Smith 
352869b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3529273d9f13SBarry Smith @*/
35307087cfbeSBarry Smith PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3531273d9f13SBarry Smith {
3532dfbe8321SBarry Smith   PetscErrorCode ierr;
3533273d9f13SBarry Smith 
3534273d9f13SBarry Smith   PetscFunctionBegin;
3535f69a0ea3SMatthew Knepley   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3536f69a0ea3SMatthew Knepley   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3537273d9f13SBarry Smith   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3538367daffbSBarry Smith   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3539273d9f13SBarry Smith   PetscFunctionReturn(0);
3540273d9f13SBarry Smith }
3541273d9f13SBarry Smith 
3542273d9f13SBarry Smith /*@C
3543273d9f13SBarry Smith    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3544273d9f13SBarry Smith    per row in the matrix. For good matrix assembly performance the
3545273d9f13SBarry Smith    user should preallocate the matrix storage by setting the parameter nz
3546273d9f13SBarry Smith    (or the array nnz).  By setting these parameters accurately, performance
3547273d9f13SBarry Smith    during matrix assembly can be increased by more than a factor of 50.
3548273d9f13SBarry Smith 
3549d083f849SBarry Smith    Collective
3550273d9f13SBarry Smith 
3551273d9f13SBarry Smith    Input Parameters:
35521c4f3114SJed Brown +  B - the matrix
3553bb7ae925SBarry Smith .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3554bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3555273d9f13SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
3556273d9f13SBarry Smith -  nnz - array containing the number of block nonzeros in the various block rows
35570298fd71SBarry Smith          (possibly different for each block row) or NULL
3558273d9f13SBarry Smith 
3559273d9f13SBarry Smith    Options Database Keys:
3560a2b725a8SWilliam Gropp +   -mat_no_unroll - uses code that does not unroll the loops in the
3561273d9f13SBarry Smith                      block calculations (much slower)
3562a2b725a8SWilliam Gropp -   -mat_block_size - size of the blocks to use
3563273d9f13SBarry Smith 
3564273d9f13SBarry Smith    Level: intermediate
3565273d9f13SBarry Smith 
3566273d9f13SBarry Smith    Notes:
356749a6f317SBarry Smith    If the nnz parameter is given then the nz parameter is ignored
356849a6f317SBarry Smith 
3569aa95bbe8SBarry Smith    You can call MatGetInfo() to get information on how effective the preallocation was;
3570aa95bbe8SBarry Smith    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3571aa95bbe8SBarry Smith    You can also run with the option -info and look for messages with the string
3572aa95bbe8SBarry Smith    malloc in them to see if additional memory allocation was needed.
3573aa95bbe8SBarry Smith 
3574273d9f13SBarry Smith    The block AIJ format is fully compatible with standard Fortran 77
3575273d9f13SBarry Smith    storage.  That is, the stored row and column indices can begin at
3576273d9f13SBarry Smith    either one (as in Fortran) or zero.  See the users' manual for details.
3577273d9f13SBarry Smith 
3578273d9f13SBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
35790298fd71SBarry Smith    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3580a7f22e61SSatish Balay    allocation.  See Users-Manual: ch_mat for details.
3581273d9f13SBarry Smith 
358269b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3583273d9f13SBarry Smith @*/
35847087cfbeSBarry Smith PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3585273d9f13SBarry Smith {
35864ac538c5SBarry Smith   PetscErrorCode ierr;
3587273d9f13SBarry Smith 
3588273d9f13SBarry Smith   PetscFunctionBegin;
35896ba663aaSJed Brown   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
35906ba663aaSJed Brown   PetscValidType(B,1);
35916ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B,bs,2);
35924ac538c5SBarry Smith   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3593273d9f13SBarry Smith   PetscFunctionReturn(0);
3594273d9f13SBarry Smith }
3595a1d92eedSBarry Smith 
3596725b52f3SLisandro Dalcin /*@C
3597664954b6SBarry Smith    MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
3598725b52f3SLisandro Dalcin 
3599d083f849SBarry Smith    Collective
3600725b52f3SLisandro Dalcin 
3601725b52f3SLisandro Dalcin    Input Parameters:
36021c4f3114SJed Brown +  B - the matrix
3603725b52f3SLisandro Dalcin .  i - the indices into j for the start of each local row (starts with zero)
3604725b52f3SLisandro Dalcin .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3605725b52f3SLisandro Dalcin -  v - optional values in the matrix
3606725b52f3SLisandro Dalcin 
3607664954b6SBarry Smith    Level: advanced
3608725b52f3SLisandro Dalcin 
36093adadaf3SJed Brown    Notes:
36103adadaf3SJed Brown    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
36113adadaf3SJed Brown    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
36123adadaf3SJed Brown    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
36133adadaf3SJed Brown    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
36143adadaf3SJed Brown    block column and the second index is over columns within a block.
36153adadaf3SJed Brown 
3616664954b6SBarry Smith    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3617664954b6SBarry Smith 
3618725b52f3SLisandro Dalcin .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3619725b52f3SLisandro Dalcin @*/
36207087cfbeSBarry Smith PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3621725b52f3SLisandro Dalcin {
36224ac538c5SBarry Smith   PetscErrorCode ierr;
3623725b52f3SLisandro Dalcin 
3624725b52f3SLisandro Dalcin   PetscFunctionBegin;
36256ba663aaSJed Brown   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
36266ba663aaSJed Brown   PetscValidType(B,1);
36276ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B,bs,2);
36284ac538c5SBarry Smith   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3629725b52f3SLisandro Dalcin   PetscFunctionReturn(0);
3630725b52f3SLisandro Dalcin }
3631725b52f3SLisandro Dalcin 
3632c75a6043SHong Zhang /*@
3633dfb205c3SBarry Smith      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3634c75a6043SHong Zhang 
3635d083f849SBarry Smith      Collective
3636c75a6043SHong Zhang 
3637c75a6043SHong Zhang    Input Parameters:
3638c75a6043SHong Zhang +  comm - must be an MPI communicator of size 1
3639c75a6043SHong Zhang .  bs - size of block
3640c75a6043SHong Zhang .  m - number of rows
3641c75a6043SHong Zhang .  n - number of columns
3642483a2f95SBarry Smith .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3643c75a6043SHong Zhang .  j - column indices
3644c75a6043SHong Zhang -  a - matrix values
3645c75a6043SHong Zhang 
3646c75a6043SHong Zhang    Output Parameter:
3647c75a6043SHong Zhang .  mat - the matrix
3648c75a6043SHong Zhang 
3649dfb205c3SBarry Smith    Level: advanced
3650c75a6043SHong Zhang 
3651c75a6043SHong Zhang    Notes:
3652c75a6043SHong Zhang        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3653c75a6043SHong Zhang     once the matrix is destroyed
3654c75a6043SHong Zhang 
3655c75a6043SHong Zhang        You cannot set new nonzero locations into this matrix, that will generate an error.
3656c75a6043SHong Zhang 
3657c75a6043SHong Zhang        The i and j indices are 0 based
3658c75a6043SHong Zhang 
3659dfb205c3SBarry Smith        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3660dfb205c3SBarry Smith 
36613adadaf3SJed Brown       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
36623adadaf3SJed Brown       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
36633adadaf3SJed Brown       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
36643adadaf3SJed Brown       with column-major ordering within blocks.
3665dfb205c3SBarry Smith 
366669b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3667c75a6043SHong Zhang 
3668c75a6043SHong Zhang @*/
3669c3c607ccSBarry Smith PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3670c75a6043SHong Zhang {
3671c75a6043SHong Zhang   PetscErrorCode ierr;
3672c75a6043SHong Zhang   PetscInt       ii;
3673c75a6043SHong Zhang   Mat_SeqBAIJ    *baij;
3674c75a6043SHong Zhang 
3675c75a6043SHong Zhang   PetscFunctionBegin;
36762c71b3e2SJacob Faibussowitsch   PetscCheckFalse(bs != 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %" PetscInt_FMT " > 1 is not supported yet",bs);
36772c71b3e2SJacob Faibussowitsch   PetscCheckFalse(m > 0 && i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3678c75a6043SHong Zhang 
3679c75a6043SHong Zhang   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3680c75a6043SHong Zhang   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3681c75a6043SHong Zhang   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3682f4259b30SLisandro Dalcin   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
3683c75a6043SHong Zhang   baij = (Mat_SeqBAIJ*)(*mat)->data;
3684dcca6d9dSJed Brown   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
36853bb1ff40SBarry Smith   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3686c75a6043SHong Zhang 
3687c75a6043SHong Zhang   baij->i = i;
3688c75a6043SHong Zhang   baij->j = j;
3689c75a6043SHong Zhang   baij->a = a;
369026fbe8dcSKarl Rupp 
3691c75a6043SHong Zhang   baij->singlemalloc = PETSC_FALSE;
3692c75a6043SHong Zhang   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3693e6b907acSBarry Smith   baij->free_a       = PETSC_FALSE;
3694e6b907acSBarry Smith   baij->free_ij      = PETSC_FALSE;
3695c75a6043SHong Zhang 
3696c75a6043SHong Zhang   for (ii=0; ii<m; ii++) {
3697c75a6043SHong Zhang     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
36986bdcaf15SBarry Smith     PetscCheck(i[ii+1] - i[ii] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT,ii,i[ii+1] - i[ii]);
3699c75a6043SHong Zhang   }
370076bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {
3701c75a6043SHong Zhang     for (ii=0; ii<baij->i[m]; ii++) {
37026bdcaf15SBarry Smith       PetscCheck(j[ii] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT,ii,j[ii]);
37036bdcaf15SBarry Smith       PetscCheck(j[ii] <= n - 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT,ii,j[ii]);
3704c75a6043SHong Zhang     }
370576bd3646SJed Brown   }
3706c75a6043SHong Zhang 
3707c75a6043SHong Zhang   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3708c75a6043SHong Zhang   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3709c75a6043SHong Zhang   PetscFunctionReturn(0);
3710c75a6043SHong Zhang }
3711bdf6f3fcSHong Zhang 
3712bdf6f3fcSHong Zhang PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3713bdf6f3fcSHong Zhang {
3714bdf6f3fcSHong Zhang   PetscErrorCode ierr;
37158761c3d6SHong Zhang   PetscMPIInt    size;
3716bdf6f3fcSHong Zhang 
3717bdf6f3fcSHong Zhang   PetscFunctionBegin;
3718ffc4695bSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
37198761c3d6SHong Zhang   if (size == 1 && scall == MAT_REUSE_MATRIX) {
37208761c3d6SHong Zhang     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
37218761c3d6SHong Zhang   } else {
3722bdf6f3fcSHong Zhang     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
37238761c3d6SHong Zhang   }
3724bdf6f3fcSHong Zhang   PetscFunctionReturn(0);
3725bdf6f3fcSHong Zhang }
3726