xref: /petsc/src/mat/impls/baij/seq/baij.c (revision d7806518bd2a386d35d6c9ad04d8f79c094eac64)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20   PetscBool      zeropivotdetected;
21 
22   PetscFunctionBegin;
23   if (a->idiagvalid) {
24     if (values) *values = a->idiag;
25     PetscFunctionReturn(0);
26   }
27   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
28   diag_offset = a->diag;
29   if (!a->idiag) {
30     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
31     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
32   }
33   diag  = a->idiag;
34   mdiag = a->idiag+bs2*mbs;
35   if (values) *values = a->idiag;
36   /* factor and invert each block */
37   switch (bs) {
38   case 1:
39     for (i=0; i<mbs; i++) {
40       odiag    = v + 1*diag_offset[i];
41       diag[0]  = odiag[0];
42       mdiag[0] = odiag[0];
43       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
44       diag    += 1;
45       mdiag   += 1;
46     }
47     break;
48   case 2:
49     for (i=0; i<mbs; i++) {
50       odiag    = v + 4*diag_offset[i];
51       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
52       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
53       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
54       diag    += 4;
55       mdiag   += 4;
56     }
57     break;
58   case 3:
59     for (i=0; i<mbs; i++) {
60       odiag    = v + 9*diag_offset[i];
61       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
62       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
63       diag[8]  = odiag[8];
64       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
65       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
66       mdiag[8] = odiag[8];
67       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,!A->erroriffailure,&zeropivotdetected);CHKERRQ(ierr);
68       if (zeropivotdetected) break;
69       diag    += 9;
70       mdiag   += 9;
71     }
72     break;
73   case 4:
74     for (i=0; i<mbs; i++) {
75       odiag  = v + 16*diag_offset[i];
76       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
77       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
78       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
79       diag  += 16;
80       mdiag += 16;
81     }
82     break;
83   case 5:
84     for (i=0; i<mbs; i++) {
85       odiag  = v + 25*diag_offset[i];
86       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
87       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
88       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
89       diag  += 25;
90       mdiag += 25;
91     }
92     break;
93   case 6:
94     for (i=0; i<mbs; i++) {
95       odiag  = v + 36*diag_offset[i];
96       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
97       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
98       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
99       diag  += 36;
100       mdiag += 36;
101     }
102     break;
103   case 7:
104     for (i=0; i<mbs; i++) {
105       odiag  = v + 49*diag_offset[i];
106       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
107       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
108       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
109       diag  += 49;
110       mdiag += 49;
111     }
112     break;
113   default:
114     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
115     for (i=0; i<mbs; i++) {
116       odiag  = v + bs2*diag_offset[i];
117       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
118       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
119       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
120       diag  += bs2;
121       mdiag += bs2;
122     }
123     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
124   }
125   a->idiagvalid = PETSC_TRUE;
126   if (zeropivotdetected) {
127     A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
128   }
129   PetscFunctionReturn(0);
130 }
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "MatSOR_SeqBAIJ"
134 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
135 {
136   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
137   PetscScalar       *x,*work,*w,*workt,*t;
138   const MatScalar   *v,*aa = a->a, *idiag;
139   const PetscScalar *b,*xb;
140   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
141   PetscErrorCode    ierr;
142   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
143   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
144 
145   PetscFunctionBegin;
146   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
147   its = its*lits;
148   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
149   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
150   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
151   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
152   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
153 
154   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
155 
156   if (!m) PetscFunctionReturn(0);
157   diag  = a->diag;
158   idiag = a->idiag;
159   k    = PetscMax(A->rmap->n,A->cmap->n);
160   if (!a->mult_work) {
161     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
162   }
163   if (!a->sor_workt) {
164     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
165   }
166   if (!a->sor_work) {
167     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
168   }
169   work = a->mult_work;
170   t    = a->sor_workt;
171   w    = a->sor_work;
172 
173   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
174   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
175 
176   if (flag & SOR_ZERO_INITIAL_GUESS) {
177     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
178       switch (bs) {
179       case 1:
180         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
181         t[0] = b[0];
182         i2     = 1;
183         idiag += 1;
184         for (i=1; i<m; i++) {
185           v  = aa + ai[i];
186           vi = aj + ai[i];
187           nz = diag[i] - ai[i];
188           s[0] = b[i2];
189           for (j=0; j<nz; j++) {
190             xw[0] = x[vi[j]];
191             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
192           }
193           t[i2] = s[0];
194           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
195           x[i2]  = xw[0];
196           idiag += 1;
197           i2    += 1;
198         }
199         break;
200       case 2:
201         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
202         t[0] = b[0]; t[1] = b[1];
203         i2     = 2;
204         idiag += 4;
205         for (i=1; i<m; i++) {
206           v  = aa + 4*ai[i];
207           vi = aj + ai[i];
208           nz = diag[i] - ai[i];
209           s[0] = b[i2]; s[1] = b[i2+1];
210           for (j=0; j<nz; j++) {
211             idx = 2*vi[j];
212             it  = 4*j;
213             xw[0] = x[idx]; xw[1] = x[1+idx];
214             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
215           }
216           t[i2] = s[0]; t[i2+1] = s[1];
217           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
218           x[i2]   = xw[0]; x[i2+1] = xw[1];
219           idiag  += 4;
220           i2     += 2;
221         }
222         break;
223       case 3:
224         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
225         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
226         i2     = 3;
227         idiag += 9;
228         for (i=1; i<m; i++) {
229           v  = aa + 9*ai[i];
230           vi = aj + ai[i];
231           nz = diag[i] - ai[i];
232           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
233           while (nz--) {
234             idx = 3*(*vi++);
235             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
236             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
237             v  += 9;
238           }
239           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
240           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
241           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
242           idiag  += 9;
243           i2     += 3;
244         }
245         break;
246       case 4:
247         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
248         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
249         i2     = 4;
250         idiag += 16;
251         for (i=1; i<m; i++) {
252           v  = aa + 16*ai[i];
253           vi = aj + ai[i];
254           nz = diag[i] - ai[i];
255           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
256           while (nz--) {
257             idx = 4*(*vi++);
258             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
259             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
260             v  += 16;
261           }
262           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
263           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
264           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
265           idiag  += 16;
266           i2     += 4;
267         }
268         break;
269       case 5:
270         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
271         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
272         i2     = 5;
273         idiag += 25;
274         for (i=1; i<m; i++) {
275           v  = aa + 25*ai[i];
276           vi = aj + ai[i];
277           nz = diag[i] - ai[i];
278           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
279           while (nz--) {
280             idx = 5*(*vi++);
281             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
282             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
283             v  += 25;
284           }
285           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
286           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
287           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
288           idiag  += 25;
289           i2     += 5;
290         }
291         break;
292       case 6:
293         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
294         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
295         i2     = 6;
296         idiag += 36;
297         for (i=1; i<m; i++) {
298           v  = aa + 36*ai[i];
299           vi = aj + ai[i];
300           nz = diag[i] - ai[i];
301           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
302           while (nz--) {
303             idx = 6*(*vi++);
304             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
305             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
306             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
307             v  += 36;
308           }
309           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
310           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
311           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
312           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
313           idiag  += 36;
314           i2     += 6;
315         }
316         break;
317       case 7:
318         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
319         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
320         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
321         i2     = 7;
322         idiag += 49;
323         for (i=1; i<m; i++) {
324           v  = aa + 49*ai[i];
325           vi = aj + ai[i];
326           nz = diag[i] - ai[i];
327           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
328           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
329           while (nz--) {
330             idx = 7*(*vi++);
331             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
332             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
333             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
334             v  += 49;
335           }
336           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
337           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
338           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
339           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
340           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
341           idiag  += 49;
342           i2     += 7;
343         }
344         break;
345       default:
346         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
347         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
348         i2     = bs;
349         idiag += bs2;
350         for (i=1; i<m; i++) {
351           v  = aa + bs2*ai[i];
352           vi = aj + ai[i];
353           nz = diag[i] - ai[i];
354 
355           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
356           /* copy all rows of x that are needed into contiguous space */
357           workt = work;
358           for (j=0; j<nz; j++) {
359             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
360             workt += bs;
361           }
362           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
363           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
364           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
365 
366           idiag += bs2;
367           i2    += bs;
368         }
369         break;
370       }
371       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
372       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
373       xb = t;
374     }
375     else xb = b;
376     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
377       idiag = a->idiag+bs2*(a->mbs-1);
378       i2 = bs * (m-1);
379       switch (bs) {
380       case 1:
381         s[0]  = xb[i2];
382         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
383         x[i2] = xw[0];
384         i2   -= 1;
385         for (i=m-2; i>=0; i--) {
386           v  = aa + (diag[i]+1);
387           vi = aj + diag[i] + 1;
388           nz = ai[i+1] - diag[i] - 1;
389           s[0] = xb[i2];
390           for (j=0; j<nz; j++) {
391             xw[0] = x[vi[j]];
392             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
393           }
394           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
395           x[i2]  = xw[0];
396           idiag -= 1;
397           i2    -= 1;
398         }
399         break;
400       case 2:
401         s[0]  = xb[i2]; s[1] = xb[i2+1];
402         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
403         x[i2] = xw[0]; x[i2+1] = xw[1];
404         i2    -= 2;
405         idiag -= 4;
406         for (i=m-2; i>=0; i--) {
407           v  = aa + 4*(diag[i] + 1);
408           vi = aj + diag[i] + 1;
409           nz = ai[i+1] - diag[i] - 1;
410           s[0] = xb[i2]; s[1] = xb[i2+1];
411           for (j=0; j<nz; j++) {
412             idx = 2*vi[j];
413             it  = 4*j;
414             xw[0] = x[idx]; xw[1] = x[1+idx];
415             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
416           }
417           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
418           x[i2]   = xw[0]; x[i2+1] = xw[1];
419           idiag  -= 4;
420           i2     -= 2;
421         }
422         break;
423       case 3:
424         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
425         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
426         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
427         i2    -= 3;
428         idiag -= 9;
429         for (i=m-2; i>=0; i--) {
430           v  = aa + 9*(diag[i]+1);
431           vi = aj + diag[i] + 1;
432           nz = ai[i+1] - diag[i] - 1;
433           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
434           while (nz--) {
435             idx = 3*(*vi++);
436             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
437             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
438             v  += 9;
439           }
440           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
441           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
442           idiag  -= 9;
443           i2     -= 3;
444         }
445         break;
446       case 4:
447         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
448         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
449         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
450         i2    -= 4;
451         idiag -= 16;
452         for (i=m-2; i>=0; i--) {
453           v  = aa + 16*(diag[i]+1);
454           vi = aj + diag[i] + 1;
455           nz = ai[i+1] - diag[i] - 1;
456           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
457           while (nz--) {
458             idx = 4*(*vi++);
459             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
460             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
461             v  += 16;
462           }
463           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
464           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
465           idiag  -= 16;
466           i2     -= 4;
467         }
468         break;
469       case 5:
470         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
471         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
472         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
473         i2    -= 5;
474         idiag -= 25;
475         for (i=m-2; i>=0; i--) {
476           v  = aa + 25*(diag[i]+1);
477           vi = aj + diag[i] + 1;
478           nz = ai[i+1] - diag[i] - 1;
479           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
480           while (nz--) {
481             idx = 5*(*vi++);
482             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
483             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
484             v  += 25;
485           }
486           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
487           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
488           idiag  -= 25;
489           i2     -= 5;
490         }
491         break;
492       case 6:
493         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
494         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
495         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
496         i2    -= 6;
497         idiag -= 36;
498         for (i=m-2; i>=0; i--) {
499           v  = aa + 36*(diag[i]+1);
500           vi = aj + diag[i] + 1;
501           nz = ai[i+1] - diag[i] - 1;
502           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
503           while (nz--) {
504             idx = 6*(*vi++);
505             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
506             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
507             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
508             v  += 36;
509           }
510           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
511           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
512           idiag  -= 36;
513           i2     -= 6;
514         }
515         break;
516       case 7:
517         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
518         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
519         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
520         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
521         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
522         i2    -= 7;
523         idiag -= 49;
524         for (i=m-2; i>=0; i--) {
525           v  = aa + 49*(diag[i]+1);
526           vi = aj + diag[i] + 1;
527           nz = ai[i+1] - diag[i] - 1;
528           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
529           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
530           while (nz--) {
531             idx = 7*(*vi++);
532             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
533             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
534             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
535             v  += 49;
536           }
537           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
538           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
539           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
540           idiag  -= 49;
541           i2     -= 7;
542         }
543         break;
544       default:
545         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
546         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
547         i2    -= bs;
548         idiag -= bs2;
549         for (i=m-2; i>=0; i--) {
550           v  = aa + bs2*(diag[i]+1);
551           vi = aj + diag[i] + 1;
552           nz = ai[i+1] - diag[i] - 1;
553 
554           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
555           /* copy all rows of x that are needed into contiguous space */
556           workt = work;
557           for (j=0; j<nz; j++) {
558             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
559             workt += bs;
560           }
561           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
562           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
563 
564           idiag -= bs2;
565           i2    -= bs;
566         }
567         break;
568       }
569       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
570     }
571     its--;
572   }
573   while (its--) {
574     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
575       idiag = a->idiag;
576       i2 = 0;
577       switch (bs) {
578       case 1:
579         for (i=0; i<m; i++) {
580           v  = aa + ai[i];
581           vi = aj + ai[i];
582           nz = ai[i+1] - ai[i];
583           s[0] = b[i2];
584           for (j=0; j<nz; j++) {
585             xw[0] = x[vi[j]];
586             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
587           }
588           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
589           x[i2] += xw[0];
590           idiag += 1;
591           i2    += 1;
592         }
593         break;
594       case 2:
595         for (i=0; i<m; i++) {
596           v  = aa + 4*ai[i];
597           vi = aj + ai[i];
598           nz = ai[i+1] - ai[i];
599           s[0] = b[i2]; s[1] = b[i2+1];
600           for (j=0; j<nz; j++) {
601             idx = 2*vi[j];
602             it  = 4*j;
603             xw[0] = x[idx]; xw[1] = x[1+idx];
604             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
605           }
606           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
607           x[i2]  += xw[0]; x[i2+1] += xw[1];
608           idiag  += 4;
609           i2     += 2;
610         }
611         break;
612       case 3:
613         for (i=0; i<m; i++) {
614           v  = aa + 9*ai[i];
615           vi = aj + ai[i];
616           nz = ai[i+1] - ai[i];
617           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
618           while (nz--) {
619             idx = 3*(*vi++);
620             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
621             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
622             v  += 9;
623           }
624           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
625           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
626           idiag  += 9;
627           i2     += 3;
628         }
629         break;
630       case 4:
631         for (i=0; i<m; i++) {
632           v  = aa + 16*ai[i];
633           vi = aj + ai[i];
634           nz = ai[i+1] - ai[i];
635           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
636           while (nz--) {
637             idx = 4*(*vi++);
638             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
639             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
640             v  += 16;
641           }
642           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
643           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
644           idiag  += 16;
645           i2     += 4;
646         }
647         break;
648       case 5:
649         for (i=0; i<m; i++) {
650           v  = aa + 25*ai[i];
651           vi = aj + ai[i];
652           nz = ai[i+1] - ai[i];
653           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
654           while (nz--) {
655             idx = 5*(*vi++);
656             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
657             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
658             v  += 25;
659           }
660           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
661           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
662           idiag  += 25;
663           i2     += 5;
664         }
665         break;
666       case 6:
667         for (i=0; i<m; i++) {
668           v  = aa + 36*ai[i];
669           vi = aj + ai[i];
670           nz = ai[i+1] - ai[i];
671           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
672           while (nz--) {
673             idx = 6*(*vi++);
674             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
675             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
676             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
677             v  += 36;
678           }
679           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
680           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
681           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
682           idiag  += 36;
683           i2     += 6;
684         }
685         break;
686       case 7:
687         for (i=0; i<m; i++) {
688           v  = aa + 49*ai[i];
689           vi = aj + ai[i];
690           nz = ai[i+1] - ai[i];
691           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
692           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
693           while (nz--) {
694             idx = 7*(*vi++);
695             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
696             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
697             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
698             v  += 49;
699           }
700           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
701           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
702           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
703           idiag  += 49;
704           i2     += 7;
705         }
706         break;
707       default:
708         for (i=0; i<m; i++) {
709           v  = aa + bs2*ai[i];
710           vi = aj + ai[i];
711           nz = ai[i+1] - ai[i];
712 
713           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
714           /* copy all rows of x that are needed into contiguous space */
715           workt = work;
716           for (j=0; j<nz; j++) {
717             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
718             workt += bs;
719           }
720           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
721           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
722 
723           idiag += bs2;
724           i2    += bs;
725         }
726         break;
727       }
728       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
729     }
730     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
731       idiag = a->idiag+bs2*(a->mbs-1);
732       i2 = bs * (m-1);
733       switch (bs) {
734       case 1:
735         for (i=m-1; i>=0; i--) {
736           v  = aa + ai[i];
737           vi = aj + ai[i];
738           nz = ai[i+1] - ai[i];
739           s[0] = b[i2];
740           for (j=0; j<nz; j++) {
741             xw[0] = x[vi[j]];
742             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
743           }
744           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
745           x[i2] += xw[0];
746           idiag -= 1;
747           i2    -= 1;
748         }
749         break;
750       case 2:
751         for (i=m-1; i>=0; i--) {
752           v  = aa + 4*ai[i];
753           vi = aj + ai[i];
754           nz = ai[i+1] - ai[i];
755           s[0] = b[i2]; s[1] = b[i2+1];
756           for (j=0; j<nz; j++) {
757             idx = 2*vi[j];
758             it  = 4*j;
759             xw[0] = x[idx]; xw[1] = x[1+idx];
760             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
761           }
762           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
763           x[i2]  += xw[0]; x[i2+1] += xw[1];
764           idiag  -= 4;
765           i2     -= 2;
766         }
767         break;
768       case 3:
769         for (i=m-1; i>=0; i--) {
770           v  = aa + 9*ai[i];
771           vi = aj + ai[i];
772           nz = ai[i+1] - ai[i];
773           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
774           while (nz--) {
775             idx = 3*(*vi++);
776             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
777             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
778             v  += 9;
779           }
780           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
781           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
782           idiag  -= 9;
783           i2     -= 3;
784         }
785         break;
786       case 4:
787         for (i=m-1; i>=0; i--) {
788           v  = aa + 16*ai[i];
789           vi = aj + ai[i];
790           nz = ai[i+1] - ai[i];
791           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
792           while (nz--) {
793             idx = 4*(*vi++);
794             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
795             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
796             v  += 16;
797           }
798           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
799           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
800           idiag  -= 16;
801           i2     -= 4;
802         }
803         break;
804       case 5:
805         for (i=m-1; i>=0; i--) {
806           v  = aa + 25*ai[i];
807           vi = aj + ai[i];
808           nz = ai[i+1] - ai[i];
809           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
810           while (nz--) {
811             idx = 5*(*vi++);
812             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
813             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
814             v  += 25;
815           }
816           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
817           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
818           idiag  -= 25;
819           i2     -= 5;
820         }
821         break;
822       case 6:
823         for (i=m-1; i>=0; i--) {
824           v  = aa + 36*ai[i];
825           vi = aj + ai[i];
826           nz = ai[i+1] - ai[i];
827           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
828           while (nz--) {
829             idx = 6*(*vi++);
830             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
831             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
832             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
833             v  += 36;
834           }
835           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
836           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
837           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
838           idiag  -= 36;
839           i2     -= 6;
840         }
841         break;
842       case 7:
843         for (i=m-1; i>=0; i--) {
844           v  = aa + 49*ai[i];
845           vi = aj + ai[i];
846           nz = ai[i+1] - ai[i];
847           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
848           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
849           while (nz--) {
850             idx = 7*(*vi++);
851             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
852             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
853             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
854             v  += 49;
855           }
856           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
857           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
858           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
859           idiag  -= 49;
860           i2     -= 7;
861         }
862         break;
863       default:
864         for (i=m-1; i>=0; i--) {
865           v  = aa + bs2*ai[i];
866           vi = aj + ai[i];
867           nz = ai[i+1] - ai[i];
868 
869           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
870           /* copy all rows of x that are needed into contiguous space */
871           workt = work;
872           for (j=0; j<nz; j++) {
873             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
874             workt += bs;
875           }
876           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
877           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
878 
879           idiag -= bs2;
880           i2    -= bs;
881         }
882         break;
883       }
884       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
885     }
886   }
887   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
888   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
889   PetscFunctionReturn(0);
890 }
891 
892 
893 /*
894     Special version for direct calls from Fortran (Used in PETSc-fun3d)
895 */
896 #if defined(PETSC_HAVE_FORTRAN_CAPS)
897 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
898 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
899 #define matsetvaluesblocked4_ matsetvaluesblocked4
900 #endif
901 
902 #undef __FUNCT__
903 #define __FUNCT__ "matsetvaluesblocked4_"
904 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
905 {
906   Mat               A  = *AA;
907   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
908   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
909   PetscInt          *ai    =a->i,*ailen=a->ilen;
910   PetscInt          *aj    =a->j,stepval,lastcol = -1;
911   const PetscScalar *value = v;
912   MatScalar         *ap,*aa = a->a,*bap;
913 
914   PetscFunctionBegin;
915   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
916   stepval = (n-1)*4;
917   for (k=0; k<m; k++) { /* loop over added rows */
918     row  = im[k];
919     rp   = aj + ai[row];
920     ap   = aa + 16*ai[row];
921     nrow = ailen[row];
922     low  = 0;
923     high = nrow;
924     for (l=0; l<n; l++) { /* loop over added columns */
925       col = in[l];
926       if (col <= lastcol)  low = 0;
927       else                high = nrow;
928       lastcol = col;
929       value   = v + k*(stepval+4 + l)*4;
930       while (high-low > 7) {
931         t = (low+high)/2;
932         if (rp[t] > col) high = t;
933         else             low  = t;
934       }
935       for (i=low; i<high; i++) {
936         if (rp[i] > col) break;
937         if (rp[i] == col) {
938           bap = ap +  16*i;
939           for (ii=0; ii<4; ii++,value+=stepval) {
940             for (jj=ii; jj<16; jj+=4) {
941               bap[jj] += *value++;
942             }
943           }
944           goto noinsert2;
945         }
946       }
947       N = nrow++ - 1;
948       high++; /* added new column index thus must search to one higher than before */
949       /* shift up all the later entries in this row */
950       for (ii=N; ii>=i; ii--) {
951         rp[ii+1] = rp[ii];
952         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
953       }
954       if (N >= i) {
955         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
956       }
957       rp[i] = col;
958       bap   = ap +  16*i;
959       for (ii=0; ii<4; ii++,value+=stepval) {
960         for (jj=ii; jj<16; jj+=4) {
961           bap[jj] = *value++;
962         }
963       }
964       noinsert2:;
965       low = i;
966     }
967     ailen[row] = nrow;
968   }
969   PetscFunctionReturnVoid();
970 }
971 
972 #if defined(PETSC_HAVE_FORTRAN_CAPS)
973 #define matsetvalues4_ MATSETVALUES4
974 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
975 #define matsetvalues4_ matsetvalues4
976 #endif
977 
978 #undef __FUNCT__
979 #define __FUNCT__ "MatSetValues4_"
980 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
981 {
982   Mat         A  = *AA;
983   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
984   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
985   PetscInt    *ai=a->i,*ailen=a->ilen;
986   PetscInt    *aj=a->j,brow,bcol;
987   PetscInt    ridx,cidx,lastcol = -1;
988   MatScalar   *ap,value,*aa=a->a,*bap;
989 
990   PetscFunctionBegin;
991   for (k=0; k<m; k++) { /* loop over added rows */
992     row  = im[k]; brow = row/4;
993     rp   = aj + ai[brow];
994     ap   = aa + 16*ai[brow];
995     nrow = ailen[brow];
996     low  = 0;
997     high = nrow;
998     for (l=0; l<n; l++) { /* loop over added columns */
999       col   = in[l]; bcol = col/4;
1000       ridx  = row % 4; cidx = col % 4;
1001       value = v[l + k*n];
1002       if (col <= lastcol)  low = 0;
1003       else                high = nrow;
1004       lastcol = col;
1005       while (high-low > 7) {
1006         t = (low+high)/2;
1007         if (rp[t] > bcol) high = t;
1008         else              low  = t;
1009       }
1010       for (i=low; i<high; i++) {
1011         if (rp[i] > bcol) break;
1012         if (rp[i] == bcol) {
1013           bap   = ap +  16*i + 4*cidx + ridx;
1014           *bap += value;
1015           goto noinsert1;
1016         }
1017       }
1018       N = nrow++ - 1;
1019       high++; /* added new column thus must search to one higher than before */
1020       /* shift up all the later entries in this row */
1021       for (ii=N; ii>=i; ii--) {
1022         rp[ii+1] = rp[ii];
1023         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1024       }
1025       if (N>=i) {
1026         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1027       }
1028       rp[i]                    = bcol;
1029       ap[16*i + 4*cidx + ridx] = value;
1030 noinsert1:;
1031       low = i;
1032     }
1033     ailen[brow] = nrow;
1034   }
1035   PetscFunctionReturnVoid();
1036 }
1037 
1038 /*
1039      Checks for missing diagonals
1040 */
1041 #undef __FUNCT__
1042 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1043 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1044 {
1045   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1046   PetscErrorCode ierr;
1047   PetscInt       *diag,*ii = a->i,i;
1048 
1049   PetscFunctionBegin;
1050   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1051   *missing = PETSC_FALSE;
1052   if (A->rmap->n > 0 && !ii) {
1053     *missing = PETSC_TRUE;
1054     if (d) *d = 0;
1055     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1056   } else {
1057     diag = a->diag;
1058     for (i=0; i<a->mbs; i++) {
1059       if (diag[i] >= ii[i+1]) {
1060         *missing = PETSC_TRUE;
1061         if (d) *d = i;
1062         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1063         break;
1064       }
1065     }
1066   }
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 #undef __FUNCT__
1071 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1072 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1073 {
1074   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1075   PetscErrorCode ierr;
1076   PetscInt       i,j,m = a->mbs;
1077 
1078   PetscFunctionBegin;
1079   if (!a->diag) {
1080     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1081     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1082     a->free_diag = PETSC_TRUE;
1083   }
1084   for (i=0; i<m; i++) {
1085     a->diag[i] = a->i[i+1];
1086     for (j=a->i[i]; j<a->i[i+1]; j++) {
1087       if (a->j[j] == i) {
1088         a->diag[i] = j;
1089         break;
1090       }
1091     }
1092   }
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 
1097 #undef __FUNCT__
1098 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1099 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1100 {
1101   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1102   PetscErrorCode ierr;
1103   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1104   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1105 
1106   PetscFunctionBegin;
1107   *nn = n;
1108   if (!ia) PetscFunctionReturn(0);
1109   if (symmetric) {
1110     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1111     nz   = tia[n];
1112   } else {
1113     tia = a->i; tja = a->j;
1114   }
1115 
1116   if (!blockcompressed && bs > 1) {
1117     (*nn) *= bs;
1118     /* malloc & create the natural set of indices */
1119     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1120     if (n) {
1121       (*ia)[0] = 0;
1122       for (j=1; j<bs; j++) {
1123         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1124       }
1125     }
1126 
1127     for (i=1; i<n; i++) {
1128       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1129       for (j=1; j<bs; j++) {
1130         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1131       }
1132     }
1133     if (n) {
1134       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1135     }
1136 
1137     if (inja) {
1138       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1139       cnt = 0;
1140       for (i=0; i<n; i++) {
1141         for (j=0; j<bs; j++) {
1142           for (k=tia[i]; k<tia[i+1]; k++) {
1143             for (l=0; l<bs; l++) {
1144               (*ja)[cnt++] = bs*tja[k] + l;
1145             }
1146           }
1147         }
1148       }
1149     }
1150 
1151     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1152       ierr = PetscFree(tia);CHKERRQ(ierr);
1153       ierr = PetscFree(tja);CHKERRQ(ierr);
1154     }
1155   } else if (oshift == 1) {
1156     if (symmetric) {
1157       nz = tia[A->rmap->n/bs];
1158       /*  add 1 to i and j indices */
1159       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1160       *ia = tia;
1161       if (ja) {
1162         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1163         *ja = tja;
1164       }
1165     } else {
1166       nz = a->i[A->rmap->n/bs];
1167       /* malloc space and  add 1 to i and j indices */
1168       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1169       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1170       if (ja) {
1171         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1172         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1173       }
1174     }
1175   } else {
1176     *ia = tia;
1177     if (ja) *ja = tja;
1178   }
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 #undef __FUNCT__
1183 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1184 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1185 {
1186   PetscErrorCode ierr;
1187 
1188   PetscFunctionBegin;
1189   if (!ia) PetscFunctionReturn(0);
1190   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1191     ierr = PetscFree(*ia);CHKERRQ(ierr);
1192     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1193   }
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1199 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1200 {
1201   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205 #if defined(PETSC_USE_LOG)
1206   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1207 #endif
1208   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1209   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1210   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1211   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1212   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1213   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1214   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1215   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1216   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1217   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1218   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1219   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1220   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1221 
1222   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1223   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1224   ierr = PetscFree(A->data);CHKERRQ(ierr);
1225 
1226   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1227   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1231   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1232   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1233   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1235   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1236   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 #undef __FUNCT__
1241 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1242 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1243 {
1244   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1245   PetscErrorCode ierr;
1246 
1247   PetscFunctionBegin;
1248   switch (op) {
1249   case MAT_ROW_ORIENTED:
1250     a->roworiented = flg;
1251     break;
1252   case MAT_KEEP_NONZERO_PATTERN:
1253     a->keepnonzeropattern = flg;
1254     break;
1255   case MAT_NEW_NONZERO_LOCATIONS:
1256     a->nonew = (flg ? 0 : 1);
1257     break;
1258   case MAT_NEW_NONZERO_LOCATION_ERR:
1259     a->nonew = (flg ? -1 : 0);
1260     break;
1261   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1262     a->nonew = (flg ? -2 : 0);
1263     break;
1264   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1265     a->nounused = (flg ? -1 : 0);
1266     break;
1267   case MAT_NEW_DIAGONALS:
1268   case MAT_IGNORE_OFF_PROC_ENTRIES:
1269   case MAT_USE_HASH_TABLE:
1270     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1271     break;
1272   case MAT_SPD:
1273   case MAT_SYMMETRIC:
1274   case MAT_STRUCTURALLY_SYMMETRIC:
1275   case MAT_HERMITIAN:
1276   case MAT_SYMMETRY_ETERNAL:
1277     /* These options are handled directly by MatSetOption() */
1278     break;
1279   default:
1280     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1281   }
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1286 #undef __FUNCT__
1287 #define __FUNCT__ "MatGetRow_SeqBAIJ_private"
1288 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1289 {
1290   PetscErrorCode ierr;
1291   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1292   MatScalar      *aa_i;
1293   PetscScalar    *v_i;
1294 
1295   PetscFunctionBegin;
1296   bs  = A->rmap->bs;
1297   bs2 = bs*bs;
1298   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1299 
1300   bn  = row/bs;   /* Block number */
1301   bp  = row % bs; /* Block Position */
1302   M   = ai[bn+1] - ai[bn];
1303   *nz = bs*M;
1304 
1305   if (v) {
1306     *v = 0;
1307     if (*nz) {
1308       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1309       for (i=0; i<M; i++) { /* for each block in the block row */
1310         v_i  = *v + i*bs;
1311         aa_i = aa + bs2*(ai[bn] + i);
1312         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1313       }
1314     }
1315   }
1316 
1317   if (idx) {
1318     *idx = 0;
1319     if (*nz) {
1320       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1321       for (i=0; i<M; i++) { /* for each block in the block row */
1322         idx_i = *idx + i*bs;
1323         itmp  = bs*aj[ai[bn] + i];
1324         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1325       }
1326     }
1327   }
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 #undef __FUNCT__
1332 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1333 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1334 {
1335   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1336   PetscErrorCode ierr;
1337 
1338   PetscFunctionBegin;
1339   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 #undef __FUNCT__
1344 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1345 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1346 {
1347   PetscErrorCode ierr;
1348 
1349   PetscFunctionBegin;
1350   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1351   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1356 
1357 #undef __FUNCT__
1358 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1359 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1360 {
1361   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1362   Mat            C;
1363   PetscErrorCode ierr;
1364   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1365   PetscInt       *rows,*cols,bs2=a->bs2;
1366   MatScalar      *array;
1367 
1368   PetscFunctionBegin;
1369   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1370   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1371     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1372 
1373     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1374     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1375     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1376     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1377     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1378     ierr = PetscFree(col);CHKERRQ(ierr);
1379   } else {
1380     C = *B;
1381   }
1382 
1383   array = a->a;
1384   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1385   for (i=0; i<mbs; i++) {
1386     cols[0] = i*bs;
1387     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1388     len = ai[i+1] - ai[i];
1389     for (j=0; j<len; j++) {
1390       rows[0] = (*aj++)*bs;
1391       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1392       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1393       array += bs2;
1394     }
1395   }
1396   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1397 
1398   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1399   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1400 
1401   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1402     *B = C;
1403   } else {
1404     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1405   }
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1411 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1412 {
1413   PetscErrorCode ierr;
1414   Mat            Btrans;
1415 
1416   PetscFunctionBegin;
1417   *f   = PETSC_FALSE;
1418   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1419   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1420   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1426 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1427 {
1428   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1429   PetscErrorCode ierr;
1430   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1431   int            fd;
1432   PetscScalar    *aa;
1433   FILE           *file;
1434 
1435   PetscFunctionBegin;
1436   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1437   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1438   col_lens[0] = MAT_FILE_CLASSID;
1439 
1440   col_lens[1] = A->rmap->N;
1441   col_lens[2] = A->cmap->n;
1442   col_lens[3] = a->nz*bs2;
1443 
1444   /* store lengths of each row and write (including header) to file */
1445   count = 0;
1446   for (i=0; i<a->mbs; i++) {
1447     for (j=0; j<bs; j++) {
1448       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1449     }
1450   }
1451   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1452   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1453 
1454   /* store column indices (zero start index) */
1455   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1456   count = 0;
1457   for (i=0; i<a->mbs; i++) {
1458     for (j=0; j<bs; j++) {
1459       for (k=a->i[i]; k<a->i[i+1]; k++) {
1460         for (l=0; l<bs; l++) {
1461           jj[count++] = bs*a->j[k] + l;
1462         }
1463       }
1464     }
1465   }
1466   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1467   ierr = PetscFree(jj);CHKERRQ(ierr);
1468 
1469   /* store nonzero values */
1470   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1471   count = 0;
1472   for (i=0; i<a->mbs; i++) {
1473     for (j=0; j<bs; j++) {
1474       for (k=a->i[i]; k<a->i[i+1]; k++) {
1475         for (l=0; l<bs; l++) {
1476           aa[count++] = a->a[bs2*k + l*bs + j];
1477         }
1478       }
1479     }
1480   }
1481   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1482   ierr = PetscFree(aa);CHKERRQ(ierr);
1483 
1484   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1485   if (file) {
1486     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1487   }
1488   PetscFunctionReturn(0);
1489 }
1490 
1491 #undef __FUNCT__
1492 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1493 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1494 {
1495   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1496   PetscErrorCode    ierr;
1497   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1498   PetscViewerFormat format;
1499 
1500   PetscFunctionBegin;
1501   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1502   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1503     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1504   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1505     const char *matname;
1506     Mat        aij;
1507     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1508     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1509     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1510     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1511     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1512   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1513       PetscFunctionReturn(0);
1514   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1515     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1516     for (i=0; i<a->mbs; i++) {
1517       for (j=0; j<bs; j++) {
1518         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1519         for (k=a->i[i]; k<a->i[i+1]; k++) {
1520           for (l=0; l<bs; l++) {
1521 #if defined(PETSC_USE_COMPLEX)
1522             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1523               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1524                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1525             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1526               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1527                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1528             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1529               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1530             }
1531 #else
1532             if (a->a[bs2*k + l*bs + j] != 0.0) {
1533               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1534             }
1535 #endif
1536           }
1537         }
1538         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1539       }
1540     }
1541     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1542   } else {
1543     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1544     for (i=0; i<a->mbs; i++) {
1545       for (j=0; j<bs; j++) {
1546         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1547         for (k=a->i[i]; k<a->i[i+1]; k++) {
1548           for (l=0; l<bs; l++) {
1549 #if defined(PETSC_USE_COMPLEX)
1550             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1551               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1552                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1553             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1554               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1555                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1556             } else {
1557               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1558             }
1559 #else
1560             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1561 #endif
1562           }
1563         }
1564         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1565       }
1566     }
1567     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1568   }
1569   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1570   PetscFunctionReturn(0);
1571 }
1572 
1573 #include <petscdraw.h>
1574 #undef __FUNCT__
1575 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1576 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1577 {
1578   Mat               A = (Mat) Aa;
1579   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1580   PetscErrorCode    ierr;
1581   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1582   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1583   MatScalar         *aa;
1584   PetscViewer       viewer;
1585   PetscViewerFormat format;
1586 
1587   PetscFunctionBegin;
1588   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1589   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1590 
1591   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1592 
1593   /* loop over matrix elements drawing boxes */
1594 
1595   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1596     color = PETSC_DRAW_BLUE;
1597     for (i=0,row=0; i<mbs; i++,row+=bs) {
1598       for (j=a->i[i]; j<a->i[i+1]; j++) {
1599         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1600         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1601         aa  = a->a + j*bs2;
1602         for (k=0; k<bs; k++) {
1603           for (l=0; l<bs; l++) {
1604             if (PetscRealPart(*aa++) >=  0.) continue;
1605             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1606           }
1607         }
1608       }
1609     }
1610     color = PETSC_DRAW_CYAN;
1611     for (i=0,row=0; i<mbs; i++,row+=bs) {
1612       for (j=a->i[i]; j<a->i[i+1]; j++) {
1613         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1614         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1615         aa  = a->a + j*bs2;
1616         for (k=0; k<bs; k++) {
1617           for (l=0; l<bs; l++) {
1618             if (PetscRealPart(*aa++) != 0.) continue;
1619             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1620           }
1621         }
1622       }
1623     }
1624     color = PETSC_DRAW_RED;
1625     for (i=0,row=0; i<mbs; i++,row+=bs) {
1626       for (j=a->i[i]; j<a->i[i+1]; j++) {
1627         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1628         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1629         aa  = a->a + j*bs2;
1630         for (k=0; k<bs; k++) {
1631           for (l=0; l<bs; l++) {
1632             if (PetscRealPart(*aa++) <= 0.) continue;
1633             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1634           }
1635         }
1636       }
1637     }
1638   } else {
1639     /* use contour shading to indicate magnitude of values */
1640     /* first determine max of all nonzero values */
1641     PetscDraw popup;
1642     PetscReal scale,maxv = 0.0;
1643 
1644     for (i=0; i<a->nz*a->bs2; i++) {
1645       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1646     }
1647     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1648     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1649     if (popup) {
1650       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1651     }
1652     for (i=0,row=0; i<mbs; i++,row+=bs) {
1653       for (j=a->i[i]; j<a->i[i+1]; j++) {
1654         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1655         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1656         aa  = a->a + j*bs2;
1657         for (k=0; k<bs; k++) {
1658           for (l=0; l<bs; l++) {
1659             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1660             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1661           }
1662         }
1663       }
1664     }
1665   }
1666   PetscFunctionReturn(0);
1667 }
1668 
1669 #undef __FUNCT__
1670 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1671 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1672 {
1673   PetscErrorCode ierr;
1674   PetscReal      xl,yl,xr,yr,w,h;
1675   PetscDraw      draw;
1676   PetscBool      isnull;
1677 
1678   PetscFunctionBegin;
1679   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1680   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1681 
1682   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1683   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1684   xr  += w;    yr += h;  xl = -w;     yl = -h;
1685   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1686   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1687   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatView_SeqBAIJ"
1693 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1694 {
1695   PetscErrorCode ierr;
1696   PetscBool      iascii,isbinary,isdraw;
1697 
1698   PetscFunctionBegin;
1699   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1700   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1701   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1702   if (iascii) {
1703     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1704   } else if (isbinary) {
1705     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1706   } else if (isdraw) {
1707     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1708   } else {
1709     Mat B;
1710     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1711     ierr = MatView(B,viewer);CHKERRQ(ierr);
1712     ierr = MatDestroy(&B);CHKERRQ(ierr);
1713   }
1714   PetscFunctionReturn(0);
1715 }
1716 
1717 
1718 #undef __FUNCT__
1719 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1720 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1721 {
1722   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1723   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1724   PetscInt    *ai = a->i,*ailen = a->ilen;
1725   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1726   MatScalar   *ap,*aa = a->a;
1727 
1728   PetscFunctionBegin;
1729   for (k=0; k<m; k++) { /* loop over rows */
1730     row = im[k]; brow = row/bs;
1731     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1732     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1733     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1734     nrow = ailen[brow];
1735     for (l=0; l<n; l++) { /* loop over columns */
1736       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1737       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1738       col  = in[l];
1739       bcol = col/bs;
1740       cidx = col%bs;
1741       ridx = row%bs;
1742       high = nrow;
1743       low  = 0; /* assume unsorted */
1744       while (high-low > 5) {
1745         t = (low+high)/2;
1746         if (rp[t] > bcol) high = t;
1747         else             low  = t;
1748       }
1749       for (i=low; i<high; i++) {
1750         if (rp[i] > bcol) break;
1751         if (rp[i] == bcol) {
1752           *v++ = ap[bs2*i+bs*cidx+ridx];
1753           goto finished;
1754         }
1755       }
1756       *v++ = 0.0;
1757 finished:;
1758     }
1759   }
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 #undef __FUNCT__
1764 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1765 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1766 {
1767   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1768   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1769   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1770   PetscErrorCode    ierr;
1771   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1772   PetscBool         roworiented=a->roworiented;
1773   const PetscScalar *value     = v;
1774   MatScalar         *ap,*aa = a->a,*bap;
1775 
1776   PetscFunctionBegin;
1777   if (roworiented) {
1778     stepval = (n-1)*bs;
1779   } else {
1780     stepval = (m-1)*bs;
1781   }
1782   for (k=0; k<m; k++) { /* loop over added rows */
1783     row = im[k];
1784     if (row < 0) continue;
1785 #if defined(PETSC_USE_DEBUG)
1786     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1787 #endif
1788     rp   = aj + ai[row];
1789     ap   = aa + bs2*ai[row];
1790     rmax = imax[row];
1791     nrow = ailen[row];
1792     low  = 0;
1793     high = nrow;
1794     for (l=0; l<n; l++) { /* loop over added columns */
1795       if (in[l] < 0) continue;
1796 #if defined(PETSC_USE_DEBUG)
1797       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1798 #endif
1799       col = in[l];
1800       if (roworiented) {
1801         value = v + (k*(stepval+bs) + l)*bs;
1802       } else {
1803         value = v + (l*(stepval+bs) + k)*bs;
1804       }
1805       if (col <= lastcol) low = 0;
1806       else high = nrow;
1807       lastcol = col;
1808       while (high-low > 7) {
1809         t = (low+high)/2;
1810         if (rp[t] > col) high = t;
1811         else             low  = t;
1812       }
1813       for (i=low; i<high; i++) {
1814         if (rp[i] > col) break;
1815         if (rp[i] == col) {
1816           bap = ap +  bs2*i;
1817           if (roworiented) {
1818             if (is == ADD_VALUES) {
1819               for (ii=0; ii<bs; ii++,value+=stepval) {
1820                 for (jj=ii; jj<bs2; jj+=bs) {
1821                   bap[jj] += *value++;
1822                 }
1823               }
1824             } else {
1825               for (ii=0; ii<bs; ii++,value+=stepval) {
1826                 for (jj=ii; jj<bs2; jj+=bs) {
1827                   bap[jj] = *value++;
1828                 }
1829               }
1830             }
1831           } else {
1832             if (is == ADD_VALUES) {
1833               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1834                 for (jj=0; jj<bs; jj++) {
1835                   bap[jj] += value[jj];
1836                 }
1837                 bap += bs;
1838               }
1839             } else {
1840               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1841                 for (jj=0; jj<bs; jj++) {
1842                   bap[jj]  = value[jj];
1843                 }
1844                 bap += bs;
1845               }
1846             }
1847           }
1848           goto noinsert2;
1849         }
1850       }
1851       if (nonew == 1) goto noinsert2;
1852       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1853       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1854       N = nrow++ - 1; high++;
1855       /* shift up all the later entries in this row */
1856       for (ii=N; ii>=i; ii--) {
1857         rp[ii+1] = rp[ii];
1858         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1859       }
1860       if (N >= i) {
1861         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1862       }
1863       rp[i] = col;
1864       bap   = ap +  bs2*i;
1865       if (roworiented) {
1866         for (ii=0; ii<bs; ii++,value+=stepval) {
1867           for (jj=ii; jj<bs2; jj+=bs) {
1868             bap[jj] = *value++;
1869           }
1870         }
1871       } else {
1872         for (ii=0; ii<bs; ii++,value+=stepval) {
1873           for (jj=0; jj<bs; jj++) {
1874             *bap++ = *value++;
1875           }
1876         }
1877       }
1878 noinsert2:;
1879       low = i;
1880     }
1881     ailen[row] = nrow;
1882   }
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 #undef __FUNCT__
1887 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1888 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1889 {
1890   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1891   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1892   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1893   PetscErrorCode ierr;
1894   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1895   MatScalar      *aa  = a->a,*ap;
1896   PetscReal      ratio=0.6;
1897 
1898   PetscFunctionBegin;
1899   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1900 
1901   if (m) rmax = ailen[0];
1902   for (i=1; i<mbs; i++) {
1903     /* move each row back by the amount of empty slots (fshift) before it*/
1904     fshift += imax[i-1] - ailen[i-1];
1905     rmax    = PetscMax(rmax,ailen[i]);
1906     if (fshift) {
1907       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1908       N  = ailen[i];
1909       for (j=0; j<N; j++) {
1910         ip[j-fshift] = ip[j];
1911 
1912         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1913       }
1914     }
1915     ai[i] = ai[i-1] + ailen[i-1];
1916   }
1917   if (mbs) {
1918     fshift += imax[mbs-1] - ailen[mbs-1];
1919     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1920   }
1921 
1922   /* reset ilen and imax for each row */
1923   a->nonzerorowcnt = 0;
1924   for (i=0; i<mbs; i++) {
1925     ailen[i] = imax[i] = ai[i+1] - ai[i];
1926     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1927   }
1928   a->nz = ai[mbs];
1929 
1930   /* diagonals may have moved, so kill the diagonal pointers */
1931   a->idiagvalid = PETSC_FALSE;
1932   if (fshift && a->diag) {
1933     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1934     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1935     a->diag = 0;
1936   }
1937   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1938   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1939   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1940   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1941 
1942   A->info.mallocs    += a->reallocs;
1943   a->reallocs         = 0;
1944   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1945   a->rmax             = rmax;
1946 
1947   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 /*
1952    This function returns an array of flags which indicate the locations of contiguous
1953    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1954    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1955    Assume: sizes should be long enough to hold all the values.
1956 */
1957 #undef __FUNCT__
1958 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1959 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1960 {
1961   PetscInt  i,j,k,row;
1962   PetscBool flg;
1963 
1964   PetscFunctionBegin;
1965   for (i=0,j=0; i<n; j++) {
1966     row = idx[i];
1967     if (row%bs!=0) { /* Not the begining of a block */
1968       sizes[j] = 1;
1969       i++;
1970     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1971       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1972       i++;
1973     } else { /* Begining of the block, so check if the complete block exists */
1974       flg = PETSC_TRUE;
1975       for (k=1; k<bs; k++) {
1976         if (row+k != idx[i+k]) { /* break in the block */
1977           flg = PETSC_FALSE;
1978           break;
1979         }
1980       }
1981       if (flg) { /* No break in the bs */
1982         sizes[j] = bs;
1983         i       += bs;
1984       } else {
1985         sizes[j] = 1;
1986         i++;
1987       }
1988     }
1989   }
1990   *bs_max = j;
1991   PetscFunctionReturn(0);
1992 }
1993 
1994 #undef __FUNCT__
1995 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1996 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1997 {
1998   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
1999   PetscErrorCode    ierr;
2000   PetscInt          i,j,k,count,*rows;
2001   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2002   PetscScalar       zero = 0.0;
2003   MatScalar         *aa;
2004   const PetscScalar *xx;
2005   PetscScalar       *bb;
2006 
2007   PetscFunctionBegin;
2008   /* fix right hand side if needed */
2009   if (x && b) {
2010     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2011     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2012     for (i=0; i<is_n; i++) {
2013       bb[is_idx[i]] = diag*xx[is_idx[i]];
2014     }
2015     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2016     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2017   }
2018 
2019   /* Make a copy of the IS and  sort it */
2020   /* allocate memory for rows,sizes */
2021   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2022 
2023   /* copy IS values to rows, and sort them */
2024   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2025   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2026 
2027   if (baij->keepnonzeropattern) {
2028     for (i=0; i<is_n; i++) sizes[i] = 1;
2029     bs_max          = is_n;
2030   } else {
2031     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2032     A->nonzerostate++;
2033   }
2034 
2035   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2036     row = rows[j];
2037     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2038     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2039     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2040     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2041       if (diag != (PetscScalar)0.0) {
2042         if (baij->ilen[row/bs] > 0) {
2043           baij->ilen[row/bs]       = 1;
2044           baij->j[baij->i[row/bs]] = row/bs;
2045 
2046           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2047         }
2048         /* Now insert all the diagonal values for this bs */
2049         for (k=0; k<bs; k++) {
2050           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2051         }
2052       } else { /* (diag == 0.0) */
2053         baij->ilen[row/bs] = 0;
2054       } /* end (diag == 0.0) */
2055     } else { /* (sizes[i] != bs) */
2056 #if defined(PETSC_USE_DEBUG)
2057       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2058 #endif
2059       for (k=0; k<count; k++) {
2060         aa[0] =  zero;
2061         aa   += bs;
2062       }
2063       if (diag != (PetscScalar)0.0) {
2064         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2065       }
2066     }
2067   }
2068 
2069   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2070   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 #undef __FUNCT__
2075 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2076 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2077 {
2078   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2079   PetscErrorCode    ierr;
2080   PetscInt          i,j,k,count;
2081   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2082   PetscScalar       zero = 0.0;
2083   MatScalar         *aa;
2084   const PetscScalar *xx;
2085   PetscScalar       *bb;
2086   PetscBool         *zeroed,vecs = PETSC_FALSE;
2087 
2088   PetscFunctionBegin;
2089   /* fix right hand side if needed */
2090   if (x && b) {
2091     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2092     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2093     vecs = PETSC_TRUE;
2094   }
2095 
2096   /* zero the columns */
2097   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2098   for (i=0; i<is_n; i++) {
2099     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2100     zeroed[is_idx[i]] = PETSC_TRUE;
2101   }
2102   for (i=0; i<A->rmap->N; i++) {
2103     if (!zeroed[i]) {
2104       row = i/bs;
2105       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2106         for (k=0; k<bs; k++) {
2107           col = bs*baij->j[j] + k;
2108           if (zeroed[col]) {
2109             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2110             if (vecs) bb[i] -= aa[0]*xx[col];
2111             aa[0] = 0.0;
2112           }
2113         }
2114       }
2115     } else if (vecs) bb[i] = diag*xx[i];
2116   }
2117   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2118   if (vecs) {
2119     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2120     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2121   }
2122 
2123   /* zero the rows */
2124   for (i=0; i<is_n; i++) {
2125     row   = is_idx[i];
2126     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2127     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2128     for (k=0; k<count; k++) {
2129       aa[0] =  zero;
2130       aa   += bs;
2131     }
2132     if (diag != (PetscScalar)0.0) {
2133       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2134     }
2135   }
2136   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2137   PetscFunctionReturn(0);
2138 }
2139 
2140 #undef __FUNCT__
2141 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2142 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2143 {
2144   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2145   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2146   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2147   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2148   PetscErrorCode ierr;
2149   PetscInt       ridx,cidx,bs2=a->bs2;
2150   PetscBool      roworiented=a->roworiented;
2151   MatScalar      *ap,value,*aa=a->a,*bap;
2152 
2153   PetscFunctionBegin;
2154   for (k=0; k<m; k++) { /* loop over added rows */
2155     row  = im[k];
2156     brow = row/bs;
2157     if (row < 0) continue;
2158 #if defined(PETSC_USE_DEBUG)
2159     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2160 #endif
2161     rp   = aj + ai[brow];
2162     ap   = aa + bs2*ai[brow];
2163     rmax = imax[brow];
2164     nrow = ailen[brow];
2165     low  = 0;
2166     high = nrow;
2167     for (l=0; l<n; l++) { /* loop over added columns */
2168       if (in[l] < 0) continue;
2169 #if defined(PETSC_USE_DEBUG)
2170       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2171 #endif
2172       col  = in[l]; bcol = col/bs;
2173       ridx = row % bs; cidx = col % bs;
2174       if (roworiented) {
2175         value = v[l + k*n];
2176       } else {
2177         value = v[k + l*m];
2178       }
2179       if (col <= lastcol) low = 0; else high = nrow;
2180       lastcol = col;
2181       while (high-low > 7) {
2182         t = (low+high)/2;
2183         if (rp[t] > bcol) high = t;
2184         else              low  = t;
2185       }
2186       for (i=low; i<high; i++) {
2187         if (rp[i] > bcol) break;
2188         if (rp[i] == bcol) {
2189           bap = ap +  bs2*i + bs*cidx + ridx;
2190           if (is == ADD_VALUES) *bap += value;
2191           else                  *bap  = value;
2192           goto noinsert1;
2193         }
2194       }
2195       if (nonew == 1) goto noinsert1;
2196       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2197       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2198       N = nrow++ - 1; high++;
2199       /* shift up all the later entries in this row */
2200       for (ii=N; ii>=i; ii--) {
2201         rp[ii+1] = rp[ii];
2202         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2203       }
2204       if (N>=i) {
2205         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2206       }
2207       rp[i]                      = bcol;
2208       ap[bs2*i + bs*cidx + ridx] = value;
2209       a->nz++;
2210       A->nonzerostate++;
2211 noinsert1:;
2212       low = i;
2213     }
2214     ailen[brow] = nrow;
2215   }
2216   PetscFunctionReturn(0);
2217 }
2218 
2219 #undef __FUNCT__
2220 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2221 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2222 {
2223   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2224   Mat            outA;
2225   PetscErrorCode ierr;
2226   PetscBool      row_identity,col_identity;
2227 
2228   PetscFunctionBegin;
2229   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2230   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2231   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2232   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2233 
2234   outA            = inA;
2235   inA->factortype = MAT_FACTOR_LU;
2236 
2237   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2238 
2239   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2240   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2241   a->row = row;
2242   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2243   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2244   a->col = col;
2245 
2246   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2247   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2248   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2249   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2250 
2251   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2252   if (!a->solve_work) {
2253     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2254     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2255   }
2256   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 #undef __FUNCT__
2261 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2262 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2263 {
2264   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2265   PetscInt    i,nz,mbs;
2266 
2267   PetscFunctionBegin;
2268   nz  = baij->maxnz;
2269   mbs = baij->mbs;
2270   for (i=0; i<nz; i++) {
2271     baij->j[i] = indices[i];
2272   }
2273   baij->nz = nz;
2274   for (i=0; i<mbs; i++) {
2275     baij->ilen[i] = baij->imax[i];
2276   }
2277   PetscFunctionReturn(0);
2278 }
2279 
2280 #undef __FUNCT__
2281 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2282 /*@
2283     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2284        in the matrix.
2285 
2286   Input Parameters:
2287 +  mat - the SeqBAIJ matrix
2288 -  indices - the column indices
2289 
2290   Level: advanced
2291 
2292   Notes:
2293     This can be called if you have precomputed the nonzero structure of the
2294   matrix and want to provide it to the matrix object to improve the performance
2295   of the MatSetValues() operation.
2296 
2297     You MUST have set the correct numbers of nonzeros per row in the call to
2298   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2299 
2300     MUST be called before any calls to MatSetValues();
2301 
2302 @*/
2303 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2304 {
2305   PetscErrorCode ierr;
2306 
2307   PetscFunctionBegin;
2308   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2309   PetscValidPointer(indices,2);
2310   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2311   PetscFunctionReturn(0);
2312 }
2313 
2314 #undef __FUNCT__
2315 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2316 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2317 {
2318   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2319   PetscErrorCode ierr;
2320   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2321   PetscReal      atmp;
2322   PetscScalar    *x,zero = 0.0;
2323   MatScalar      *aa;
2324   PetscInt       ncols,brow,krow,kcol;
2325 
2326   PetscFunctionBegin;
2327   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2328   bs  = A->rmap->bs;
2329   aa  = a->a;
2330   ai  = a->i;
2331   aj  = a->j;
2332   mbs = a->mbs;
2333 
2334   ierr = VecSet(v,zero);CHKERRQ(ierr);
2335   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2336   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2337   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2338   for (i=0; i<mbs; i++) {
2339     ncols = ai[1] - ai[0]; ai++;
2340     brow  = bs*i;
2341     for (j=0; j<ncols; j++) {
2342       for (kcol=0; kcol<bs; kcol++) {
2343         for (krow=0; krow<bs; krow++) {
2344           atmp = PetscAbsScalar(*aa);aa++;
2345           row  = brow + krow;   /* row index */
2346           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2347         }
2348       }
2349       aj++;
2350     }
2351   }
2352   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2353   PetscFunctionReturn(0);
2354 }
2355 
2356 #undef __FUNCT__
2357 #define __FUNCT__ "MatCopy_SeqBAIJ"
2358 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2359 {
2360   PetscErrorCode ierr;
2361 
2362   PetscFunctionBegin;
2363   /* If the two matrices have the same copy implementation, use fast copy. */
2364   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2365     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2366     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2367     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2368 
2369     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2370     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2371     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2372   } else {
2373     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2374   }
2375   PetscFunctionReturn(0);
2376 }
2377 
2378 #undef __FUNCT__
2379 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2380 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2381 {
2382   PetscErrorCode ierr;
2383 
2384   PetscFunctionBegin;
2385   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2386   PetscFunctionReturn(0);
2387 }
2388 
2389 #undef __FUNCT__
2390 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2391 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2392 {
2393   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2394 
2395   PetscFunctionBegin;
2396   *array = a->a;
2397   PetscFunctionReturn(0);
2398 }
2399 
2400 #undef __FUNCT__
2401 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2402 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2403 {
2404   PetscFunctionBegin;
2405   PetscFunctionReturn(0);
2406 }
2407 
2408 #undef __FUNCT__
2409 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ"
2410 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2411 {
2412   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2413   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2414   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2415   PetscErrorCode ierr;
2416 
2417   PetscFunctionBegin;
2418   /* Set the number of nonzeros in the new matrix */
2419   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 #undef __FUNCT__
2424 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2425 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2426 {
2427   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2428   PetscErrorCode ierr;
2429   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2430   PetscBLASInt   one=1;
2431 
2432   PetscFunctionBegin;
2433   if (str == SAME_NONZERO_PATTERN) {
2434     PetscScalar  alpha = a;
2435     PetscBLASInt bnz;
2436     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2437     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2438     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2439   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2440     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2441   } else {
2442     Mat      B;
2443     PetscInt *nnz;
2444     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2445     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2446     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2447     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2448     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2449     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2450     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2451     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2452     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2453     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2454     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2455     ierr = PetscFree(nnz);CHKERRQ(ierr);
2456   }
2457   PetscFunctionReturn(0);
2458 }
2459 
2460 #undef __FUNCT__
2461 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2462 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2463 {
2464   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2465   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2466   MatScalar   *aa = a->a;
2467 
2468   PetscFunctionBegin;
2469   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2470   PetscFunctionReturn(0);
2471 }
2472 
2473 #undef __FUNCT__
2474 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2475 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2476 {
2477   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2478   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2479   MatScalar   *aa = a->a;
2480 
2481   PetscFunctionBegin;
2482   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2483   PetscFunctionReturn(0);
2484 }
2485 
2486 #undef __FUNCT__
2487 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2488 /*
2489     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2490 */
2491 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2492 {
2493   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2494   PetscErrorCode ierr;
2495   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2496   PetscInt       nz = a->i[m],row,*jj,mr,col;
2497 
2498   PetscFunctionBegin;
2499   *nn = n;
2500   if (!ia) PetscFunctionReturn(0);
2501   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2502   else {
2503     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2504     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2505     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2506     jj   = a->j;
2507     for (i=0; i<nz; i++) {
2508       collengths[jj[i]]++;
2509     }
2510     cia[0] = oshift;
2511     for (i=0; i<n; i++) {
2512       cia[i+1] = cia[i] + collengths[i];
2513     }
2514     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2515     jj   = a->j;
2516     for (row=0; row<m; row++) {
2517       mr = a->i[row+1] - a->i[row];
2518       for (i=0; i<mr; i++) {
2519         col = *jj++;
2520 
2521         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2522       }
2523     }
2524     ierr = PetscFree(collengths);CHKERRQ(ierr);
2525     *ia  = cia; *ja = cja;
2526   }
2527   PetscFunctionReturn(0);
2528 }
2529 
2530 #undef __FUNCT__
2531 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2532 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2533 {
2534   PetscErrorCode ierr;
2535 
2536   PetscFunctionBegin;
2537   if (!ia) PetscFunctionReturn(0);
2538   ierr = PetscFree(*ia);CHKERRQ(ierr);
2539   ierr = PetscFree(*ja);CHKERRQ(ierr);
2540   PetscFunctionReturn(0);
2541 }
2542 
2543 /*
2544  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2545  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2546  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2547  */
2548 #undef __FUNCT__
2549 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color"
2550 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2551 {
2552   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2553   PetscErrorCode ierr;
2554   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2555   PetscInt       nz = a->i[m],row,*jj,mr,col;
2556   PetscInt       *cspidx;
2557 
2558   PetscFunctionBegin;
2559   *nn = n;
2560   if (!ia) PetscFunctionReturn(0);
2561 
2562   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2563   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2564   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2565   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2566   jj   = a->j;
2567   for (i=0; i<nz; i++) {
2568     collengths[jj[i]]++;
2569   }
2570   cia[0] = oshift;
2571   for (i=0; i<n; i++) {
2572     cia[i+1] = cia[i] + collengths[i];
2573   }
2574   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2575   jj   = a->j;
2576   for (row=0; row<m; row++) {
2577     mr = a->i[row+1] - a->i[row];
2578     for (i=0; i<mr; i++) {
2579       col = *jj++;
2580       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2581       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2582     }
2583   }
2584   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2585   *ia    = cia; *ja = cja;
2586   *spidx = cspidx;
2587   PetscFunctionReturn(0);
2588 }
2589 
2590 #undef __FUNCT__
2591 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color"
2592 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2593 {
2594   PetscErrorCode ierr;
2595 
2596   PetscFunctionBegin;
2597   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2598   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2599   PetscFunctionReturn(0);
2600 }
2601 
2602 #undef __FUNCT__
2603 #define __FUNCT__ "MatShift_SeqBAIJ"
2604 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2605 {
2606   PetscErrorCode ierr;
2607   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2608 
2609   PetscFunctionBegin;
2610   if (!Y->preallocated || !aij->nz) {
2611     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2612   }
2613   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2614   PetscFunctionReturn(0);
2615 }
2616 
2617 /* -------------------------------------------------------------------*/
2618 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2619                                        MatGetRow_SeqBAIJ,
2620                                        MatRestoreRow_SeqBAIJ,
2621                                        MatMult_SeqBAIJ_N,
2622                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2623                                        MatMultTranspose_SeqBAIJ,
2624                                        MatMultTransposeAdd_SeqBAIJ,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                /* 10*/ 0,
2629                                        MatLUFactor_SeqBAIJ,
2630                                        0,
2631                                        0,
2632                                        MatTranspose_SeqBAIJ,
2633                                /* 15*/ MatGetInfo_SeqBAIJ,
2634                                        MatEqual_SeqBAIJ,
2635                                        MatGetDiagonal_SeqBAIJ,
2636                                        MatDiagonalScale_SeqBAIJ,
2637                                        MatNorm_SeqBAIJ,
2638                                /* 20*/ 0,
2639                                        MatAssemblyEnd_SeqBAIJ,
2640                                        MatSetOption_SeqBAIJ,
2641                                        MatZeroEntries_SeqBAIJ,
2642                                /* 24*/ MatZeroRows_SeqBAIJ,
2643                                        0,
2644                                        0,
2645                                        0,
2646                                        0,
2647                                /* 29*/ MatSetUp_SeqBAIJ,
2648                                        0,
2649                                        0,
2650                                        0,
2651                                        0,
2652                                /* 34*/ MatDuplicate_SeqBAIJ,
2653                                        0,
2654                                        0,
2655                                        MatILUFactor_SeqBAIJ,
2656                                        0,
2657                                /* 39*/ MatAXPY_SeqBAIJ,
2658                                        MatGetSubMatrices_SeqBAIJ,
2659                                        MatIncreaseOverlap_SeqBAIJ,
2660                                        MatGetValues_SeqBAIJ,
2661                                        MatCopy_SeqBAIJ,
2662                                /* 44*/ 0,
2663                                        MatScale_SeqBAIJ,
2664                                        MatShift_SeqBAIJ,
2665                                        0,
2666                                        MatZeroRowsColumns_SeqBAIJ,
2667                                /* 49*/ 0,
2668                                        MatGetRowIJ_SeqBAIJ,
2669                                        MatRestoreRowIJ_SeqBAIJ,
2670                                        MatGetColumnIJ_SeqBAIJ,
2671                                        MatRestoreColumnIJ_SeqBAIJ,
2672                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2673                                        0,
2674                                        0,
2675                                        0,
2676                                        MatSetValuesBlocked_SeqBAIJ,
2677                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2678                                        MatDestroy_SeqBAIJ,
2679                                        MatView_SeqBAIJ,
2680                                        0,
2681                                        0,
2682                                /* 64*/ 0,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2688                                        0,
2689                                        MatConvert_Basic,
2690                                        0,
2691                                        0,
2692                                /* 74*/ 0,
2693                                        MatFDColoringApply_BAIJ,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                /* 79*/ 0,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                        MatLoad_SeqBAIJ,
2702                                /* 84*/ 0,
2703                                        0,
2704                                        0,
2705                                        0,
2706                                        0,
2707                                /* 89*/ 0,
2708                                        0,
2709                                        0,
2710                                        0,
2711                                        0,
2712                                /* 94*/ 0,
2713                                        0,
2714                                        0,
2715                                        0,
2716                                        0,
2717                                /* 99*/ 0,
2718                                        0,
2719                                        0,
2720                                        0,
2721                                        0,
2722                                /*104*/ 0,
2723                                        MatRealPart_SeqBAIJ,
2724                                        MatImaginaryPart_SeqBAIJ,
2725                                        0,
2726                                        0,
2727                                /*109*/ 0,
2728                                        0,
2729                                        0,
2730                                        0,
2731                                        MatMissingDiagonal_SeqBAIJ,
2732                                /*114*/ 0,
2733                                        0,
2734                                        0,
2735                                        0,
2736                                        0,
2737                                /*119*/ 0,
2738                                        0,
2739                                        MatMultHermitianTranspose_SeqBAIJ,
2740                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2741                                        0,
2742                                /*124*/ 0,
2743                                        0,
2744                                        MatInvertBlockDiagonal_SeqBAIJ,
2745                                        0,
2746                                        0,
2747                                /*129*/ 0,
2748                                        0,
2749                                        0,
2750                                        0,
2751                                        0,
2752                                /*134*/ 0,
2753                                        0,
2754                                        0,
2755                                        0,
2756                                        0,
2757                                /*139*/ 0,
2758                                        0,
2759                                        0,
2760                                        MatFDColoringSetUp_SeqXAIJ,
2761                                        0,
2762                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2763 };
2764 
2765 #undef __FUNCT__
2766 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2767 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2768 {
2769   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2770   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2771   PetscErrorCode ierr;
2772 
2773   PetscFunctionBegin;
2774   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2775 
2776   /* allocate space for values if not already there */
2777   if (!aij->saved_values) {
2778     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2779     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2780   }
2781 
2782   /* copy values over */
2783   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2784   PetscFunctionReturn(0);
2785 }
2786 
2787 #undef __FUNCT__
2788 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2789 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2790 {
2791   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2792   PetscErrorCode ierr;
2793   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2794 
2795   PetscFunctionBegin;
2796   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2797   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2798 
2799   /* copy values over */
2800   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2801   PetscFunctionReturn(0);
2802 }
2803 
2804 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2805 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2806 
2807 #undef __FUNCT__
2808 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2809 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2810 {
2811   Mat_SeqBAIJ    *b;
2812   PetscErrorCode ierr;
2813   PetscInt       i,mbs,nbs,bs2;
2814   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2815 
2816   PetscFunctionBegin;
2817   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2818   if (nz == MAT_SKIP_ALLOCATION) {
2819     skipallocation = PETSC_TRUE;
2820     nz             = 0;
2821   }
2822 
2823   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2824   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2825   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2826   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2827 
2828   B->preallocated = PETSC_TRUE;
2829 
2830   mbs = B->rmap->n/bs;
2831   nbs = B->cmap->n/bs;
2832   bs2 = bs*bs;
2833 
2834   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2835 
2836   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2837   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2838   if (nnz) {
2839     for (i=0; i<mbs; i++) {
2840       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2841       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2842     }
2843   }
2844 
2845   b    = (Mat_SeqBAIJ*)B->data;
2846   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2847   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2848   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2849 
2850   if (!flg) {
2851     switch (bs) {
2852     case 1:
2853       B->ops->mult    = MatMult_SeqBAIJ_1;
2854       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2855       break;
2856     case 2:
2857       B->ops->mult    = MatMult_SeqBAIJ_2;
2858       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2859       break;
2860     case 3:
2861       B->ops->mult    = MatMult_SeqBAIJ_3;
2862       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2863       break;
2864     case 4:
2865       B->ops->mult    = MatMult_SeqBAIJ_4;
2866       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2867       break;
2868     case 5:
2869       B->ops->mult    = MatMult_SeqBAIJ_5;
2870       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2871       break;
2872     case 6:
2873       B->ops->mult    = MatMult_SeqBAIJ_6;
2874       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2875       break;
2876     case 7:
2877       B->ops->mult    = MatMult_SeqBAIJ_7;
2878       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2879       break;
2880     case 15:
2881       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2882       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2883       break;
2884     default:
2885       B->ops->mult    = MatMult_SeqBAIJ_N;
2886       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2887       break;
2888     }
2889   }
2890   B->ops->sor = MatSOR_SeqBAIJ;
2891   b->mbs = mbs;
2892   b->nbs = nbs;
2893   if (!skipallocation) {
2894     if (!b->imax) {
2895       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2896       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2897 
2898       b->free_imax_ilen = PETSC_TRUE;
2899     }
2900     /* b->ilen will count nonzeros in each block row so far. */
2901     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2902     if (!nnz) {
2903       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2904       else if (nz < 0) nz = 1;
2905       for (i=0; i<mbs; i++) b->imax[i] = nz;
2906       nz = nz*mbs;
2907     } else {
2908       nz = 0;
2909       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2910     }
2911 
2912     /* allocate the matrix space */
2913     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2914     ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2915     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2916     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2917     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2918 
2919     b->singlemalloc = PETSC_TRUE;
2920     b->i[0]         = 0;
2921     for (i=1; i<mbs+1; i++) {
2922       b->i[i] = b->i[i-1] + b->imax[i-1];
2923     }
2924     b->free_a  = PETSC_TRUE;
2925     b->free_ij = PETSC_TRUE;
2926   } else {
2927     b->free_a  = PETSC_FALSE;
2928     b->free_ij = PETSC_FALSE;
2929   }
2930 
2931   b->bs2              = bs2;
2932   b->mbs              = mbs;
2933   b->nz               = 0;
2934   b->maxnz            = nz;
2935   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2936   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2937   PetscFunctionReturn(0);
2938 }
2939 
2940 #undef __FUNCT__
2941 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2942 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2943 {
2944   PetscInt       i,m,nz,nz_max=0,*nnz;
2945   PetscScalar    *values=0;
2946   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2947   PetscErrorCode ierr;
2948 
2949   PetscFunctionBegin;
2950   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2951   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2952   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2953   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2954   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2955   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2956   m    = B->rmap->n/bs;
2957 
2958   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2959   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2960   for (i=0; i<m; i++) {
2961     nz = ii[i+1]- ii[i];
2962     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2963     nz_max = PetscMax(nz_max, nz);
2964     nnz[i] = nz;
2965   }
2966   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2967   ierr = PetscFree(nnz);CHKERRQ(ierr);
2968 
2969   values = (PetscScalar*)V;
2970   if (!values) {
2971     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2972   }
2973   for (i=0; i<m; i++) {
2974     PetscInt          ncols  = ii[i+1] - ii[i];
2975     const PetscInt    *icols = jj + ii[i];
2976     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2977     if (!roworiented) {
2978       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2979     } else {
2980       PetscInt j;
2981       for (j=0; j<ncols; j++) {
2982         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2983         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2984       }
2985     }
2986   }
2987   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2988   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2989   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2990   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2991   PetscFunctionReturn(0);
2992 }
2993 
2994 /*MC
2995    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2996    block sparse compressed row format.
2997 
2998    Options Database Keys:
2999 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3000 
3001   Level: beginner
3002 
3003 .seealso: MatCreateSeqBAIJ()
3004 M*/
3005 
3006 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3007 
3008 #undef __FUNCT__
3009 #define __FUNCT__ "MatCreate_SeqBAIJ"
3010 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3011 {
3012   PetscErrorCode ierr;
3013   PetscMPIInt    size;
3014   Mat_SeqBAIJ    *b;
3015 
3016   PetscFunctionBegin;
3017   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3018   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3019 
3020   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3021   B->data = (void*)b;
3022   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3023 
3024   b->row          = 0;
3025   b->col          = 0;
3026   b->icol         = 0;
3027   b->reallocs     = 0;
3028   b->saved_values = 0;
3029 
3030   b->roworiented        = PETSC_TRUE;
3031   b->nonew              = 0;
3032   b->diag               = 0;
3033   B->spptr              = 0;
3034   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3035   b->keepnonzeropattern = PETSC_FALSE;
3036 
3037   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3038   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3039   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3047   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3048   PetscFunctionReturn(0);
3049 }
3050 
3051 #undef __FUNCT__
3052 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3053 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3054 {
3055   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3056   PetscErrorCode ierr;
3057   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3058 
3059   PetscFunctionBegin;
3060   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3061 
3062   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3063     c->imax           = a->imax;
3064     c->ilen           = a->ilen;
3065     c->free_imax_ilen = PETSC_FALSE;
3066   } else {
3067     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3068     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3069     for (i=0; i<mbs; i++) {
3070       c->imax[i] = a->imax[i];
3071       c->ilen[i] = a->ilen[i];
3072     }
3073     c->free_imax_ilen = PETSC_TRUE;
3074   }
3075 
3076   /* allocate the matrix space */
3077   if (mallocmatspace) {
3078     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3079       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3080       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3081 
3082       c->i            = a->i;
3083       c->j            = a->j;
3084       c->singlemalloc = PETSC_FALSE;
3085       c->free_a       = PETSC_TRUE;
3086       c->free_ij      = PETSC_FALSE;
3087       c->parent       = A;
3088       C->preallocated = PETSC_TRUE;
3089       C->assembled    = PETSC_TRUE;
3090 
3091       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3092       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3093       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3094     } else {
3095       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3096       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3097 
3098       c->singlemalloc = PETSC_TRUE;
3099       c->free_a       = PETSC_TRUE;
3100       c->free_ij      = PETSC_TRUE;
3101 
3102       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3103       if (mbs > 0) {
3104         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3105         if (cpvalues == MAT_COPY_VALUES) {
3106           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3107         } else {
3108           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3109         }
3110       }
3111       C->preallocated = PETSC_TRUE;
3112       C->assembled    = PETSC_TRUE;
3113     }
3114   }
3115 
3116   c->roworiented = a->roworiented;
3117   c->nonew       = a->nonew;
3118 
3119   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3120   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3121 
3122   c->bs2         = a->bs2;
3123   c->mbs         = a->mbs;
3124   c->nbs         = a->nbs;
3125 
3126   if (a->diag) {
3127     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3128       c->diag      = a->diag;
3129       c->free_diag = PETSC_FALSE;
3130     } else {
3131       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3132       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3133       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3134       c->free_diag = PETSC_TRUE;
3135     }
3136   } else c->diag = 0;
3137 
3138   c->nz         = a->nz;
3139   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3140   c->solve_work = NULL;
3141   c->mult_work  = NULL;
3142   c->sor_workt  = NULL;
3143   c->sor_work   = NULL;
3144 
3145   c->compressedrow.use   = a->compressedrow.use;
3146   c->compressedrow.nrows = a->compressedrow.nrows;
3147   if (a->compressedrow.use) {
3148     i    = a->compressedrow.nrows;
3149     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3150     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3151     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3152     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3153   } else {
3154     c->compressedrow.use    = PETSC_FALSE;
3155     c->compressedrow.i      = NULL;
3156     c->compressedrow.rindex = NULL;
3157   }
3158   C->nonzerostate = A->nonzerostate;
3159 
3160   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3161   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3162   PetscFunctionReturn(0);
3163 }
3164 
3165 #undef __FUNCT__
3166 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3167 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3168 {
3169   PetscErrorCode ierr;
3170 
3171   PetscFunctionBegin;
3172   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3173   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3174   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3175   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3176   PetscFunctionReturn(0);
3177 }
3178 
3179 #undef __FUNCT__
3180 #define __FUNCT__ "MatLoad_SeqBAIJ"
3181 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3182 {
3183   Mat_SeqBAIJ    *a;
3184   PetscErrorCode ierr;
3185   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3186   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3187   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3188   PetscInt       *masked,nmask,tmp,bs2,ishift;
3189   PetscMPIInt    size;
3190   int            fd;
3191   PetscScalar    *aa;
3192   MPI_Comm       comm;
3193 
3194   PetscFunctionBegin;
3195   /* force binary viewer to load .info file if it has not yet done so */
3196   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3197   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3198   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3199   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3200   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3201   if (bs < 0) bs = 1;
3202   bs2  = bs*bs;
3203 
3204   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3205   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3206   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3207   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3208   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3209   M = header[1]; N = header[2]; nz = header[3];
3210 
3211   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3212   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3213 
3214   /*
3215      This code adds extra rows to make sure the number of rows is
3216     divisible by the blocksize
3217   */
3218   mbs        = M/bs;
3219   extra_rows = bs - M + bs*(mbs);
3220   if (extra_rows == bs) extra_rows = 0;
3221   else mbs++;
3222   if (extra_rows) {
3223     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3224   }
3225 
3226   /* Set global sizes if not already set */
3227   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3228     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3229   } else { /* Check if the matrix global sizes are correct */
3230     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3231     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3232       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3233     }
3234     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3235   }
3236 
3237   /* read in row lengths */
3238   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3239   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3240   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3241 
3242   /* read in column indices */
3243   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3244   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3245   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3246 
3247   /* loop over row lengths determining block row lengths */
3248   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3249   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3250   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3251   rowcount = 0;
3252   nzcount  = 0;
3253   for (i=0; i<mbs; i++) {
3254     nmask = 0;
3255     for (j=0; j<bs; j++) {
3256       kmax = rowlengths[rowcount];
3257       for (k=0; k<kmax; k++) {
3258         tmp = jj[nzcount++]/bs;
3259         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3260       }
3261       rowcount++;
3262     }
3263     browlengths[i] += nmask;
3264     /* zero out the mask elements we set */
3265     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3266   }
3267 
3268   /* Do preallocation  */
3269   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3270   a    = (Mat_SeqBAIJ*)newmat->data;
3271 
3272   /* set matrix "i" values */
3273   a->i[0] = 0;
3274   for (i=1; i<= mbs; i++) {
3275     a->i[i]      = a->i[i-1] + browlengths[i-1];
3276     a->ilen[i-1] = browlengths[i-1];
3277   }
3278   a->nz = 0;
3279   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3280 
3281   /* read in nonzero values */
3282   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3283   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3284   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3285 
3286   /* set "a" and "j" values into matrix */
3287   nzcount = 0; jcount = 0;
3288   for (i=0; i<mbs; i++) {
3289     nzcountb = nzcount;
3290     nmask    = 0;
3291     for (j=0; j<bs; j++) {
3292       kmax = rowlengths[i*bs+j];
3293       for (k=0; k<kmax; k++) {
3294         tmp = jj[nzcount++]/bs;
3295         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3296       }
3297     }
3298     /* sort the masked values */
3299     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3300 
3301     /* set "j" values into matrix */
3302     maskcount = 1;
3303     for (j=0; j<nmask; j++) {
3304       a->j[jcount++]  = masked[j];
3305       mask[masked[j]] = maskcount++;
3306     }
3307     /* set "a" values into matrix */
3308     ishift = bs2*a->i[i];
3309     for (j=0; j<bs; j++) {
3310       kmax = rowlengths[i*bs+j];
3311       for (k=0; k<kmax; k++) {
3312         tmp       = jj[nzcountb]/bs;
3313         block     = mask[tmp] - 1;
3314         point     = jj[nzcountb] - bs*tmp;
3315         idx       = ishift + bs2*block + j + bs*point;
3316         a->a[idx] = (MatScalar)aa[nzcountb++];
3317       }
3318     }
3319     /* zero out the mask elements we set */
3320     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3321   }
3322   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3323 
3324   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3325   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3326   ierr = PetscFree(aa);CHKERRQ(ierr);
3327   ierr = PetscFree(jj);CHKERRQ(ierr);
3328   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3329 
3330   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3331   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3332   PetscFunctionReturn(0);
3333 }
3334 
3335 #undef __FUNCT__
3336 #define __FUNCT__ "MatCreateSeqBAIJ"
3337 /*@C
3338    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3339    compressed row) format.  For good matrix assembly performance the
3340    user should preallocate the matrix storage by setting the parameter nz
3341    (or the array nnz).  By setting these parameters accurately, performance
3342    during matrix assembly can be increased by more than a factor of 50.
3343 
3344    Collective on MPI_Comm
3345 
3346    Input Parameters:
3347 +  comm - MPI communicator, set to PETSC_COMM_SELF
3348 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3349           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3350 .  m - number of rows
3351 .  n - number of columns
3352 .  nz - number of nonzero blocks  per block row (same for all rows)
3353 -  nnz - array containing the number of nonzero blocks in the various block rows
3354          (possibly different for each block row) or NULL
3355 
3356    Output Parameter:
3357 .  A - the matrix
3358 
3359    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3360    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3361    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3362 
3363    Options Database Keys:
3364 .   -mat_no_unroll - uses code that does not unroll the loops in the
3365                      block calculations (much slower)
3366 .    -mat_block_size - size of the blocks to use
3367 
3368    Level: intermediate
3369 
3370    Notes:
3371    The number of rows and columns must be divisible by blocksize.
3372 
3373    If the nnz parameter is given then the nz parameter is ignored
3374 
3375    A nonzero block is any block that as 1 or more nonzeros in it
3376 
3377    The block AIJ format is fully compatible with standard Fortran 77
3378    storage.  That is, the stored row and column indices can begin at
3379    either one (as in Fortran) or zero.  See the users' manual for details.
3380 
3381    Specify the preallocated storage with either nz or nnz (not both).
3382    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3383    allocation.  See Users-Manual: ch_mat for details.
3384    matrices.
3385 
3386 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3387 @*/
3388 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3389 {
3390   PetscErrorCode ierr;
3391 
3392   PetscFunctionBegin;
3393   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3394   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3395   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3396   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3397   PetscFunctionReturn(0);
3398 }
3399 
3400 #undef __FUNCT__
3401 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3402 /*@C
3403    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3404    per row in the matrix. For good matrix assembly performance the
3405    user should preallocate the matrix storage by setting the parameter nz
3406    (or the array nnz).  By setting these parameters accurately, performance
3407    during matrix assembly can be increased by more than a factor of 50.
3408 
3409    Collective on MPI_Comm
3410 
3411    Input Parameters:
3412 +  B - the matrix
3413 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3414           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3415 .  nz - number of block nonzeros per block row (same for all rows)
3416 -  nnz - array containing the number of block nonzeros in the various block rows
3417          (possibly different for each block row) or NULL
3418 
3419    Options Database Keys:
3420 .   -mat_no_unroll - uses code that does not unroll the loops in the
3421                      block calculations (much slower)
3422 .   -mat_block_size - size of the blocks to use
3423 
3424    Level: intermediate
3425 
3426    Notes:
3427    If the nnz parameter is given then the nz parameter is ignored
3428 
3429    You can call MatGetInfo() to get information on how effective the preallocation was;
3430    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3431    You can also run with the option -info and look for messages with the string
3432    malloc in them to see if additional memory allocation was needed.
3433 
3434    The block AIJ format is fully compatible with standard Fortran 77
3435    storage.  That is, the stored row and column indices can begin at
3436    either one (as in Fortran) or zero.  See the users' manual for details.
3437 
3438    Specify the preallocated storage with either nz or nnz (not both).
3439    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3440    allocation.  See Users-Manual: ch_mat for details.
3441 
3442 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3443 @*/
3444 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3445 {
3446   PetscErrorCode ierr;
3447 
3448   PetscFunctionBegin;
3449   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3450   PetscValidType(B,1);
3451   PetscValidLogicalCollectiveInt(B,bs,2);
3452   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3453   PetscFunctionReturn(0);
3454 }
3455 
3456 #undef __FUNCT__
3457 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3458 /*@C
3459    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3460    (the default sequential PETSc format).
3461 
3462    Collective on MPI_Comm
3463 
3464    Input Parameters:
3465 +  B - the matrix
3466 .  i - the indices into j for the start of each local row (starts with zero)
3467 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3468 -  v - optional values in the matrix
3469 
3470    Level: developer
3471 
3472    Notes:
3473    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3474    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3475    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3476    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3477    block column and the second index is over columns within a block.
3478 
3479 .keywords: matrix, aij, compressed row, sparse
3480 
3481 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3482 @*/
3483 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3484 {
3485   PetscErrorCode ierr;
3486 
3487   PetscFunctionBegin;
3488   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3489   PetscValidType(B,1);
3490   PetscValidLogicalCollectiveInt(B,bs,2);
3491   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3492   PetscFunctionReturn(0);
3493 }
3494 
3495 
3496 #undef __FUNCT__
3497 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3498 /*@
3499      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3500 
3501      Collective on MPI_Comm
3502 
3503    Input Parameters:
3504 +  comm - must be an MPI communicator of size 1
3505 .  bs - size of block
3506 .  m - number of rows
3507 .  n - number of columns
3508 .  i - row indices
3509 .  j - column indices
3510 -  a - matrix values
3511 
3512    Output Parameter:
3513 .  mat - the matrix
3514 
3515    Level: advanced
3516 
3517    Notes:
3518        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3519     once the matrix is destroyed
3520 
3521        You cannot set new nonzero locations into this matrix, that will generate an error.
3522 
3523        The i and j indices are 0 based
3524 
3525        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3526 
3527       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3528       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3529       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3530       with column-major ordering within blocks.
3531 
3532 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3533 
3534 @*/
3535 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3536 {
3537   PetscErrorCode ierr;
3538   PetscInt       ii;
3539   Mat_SeqBAIJ    *baij;
3540 
3541   PetscFunctionBegin;
3542   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3543   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3544 
3545   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3546   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3547   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3548   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3549   baij = (Mat_SeqBAIJ*)(*mat)->data;
3550   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3551   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3552 
3553   baij->i = i;
3554   baij->j = j;
3555   baij->a = a;
3556 
3557   baij->singlemalloc = PETSC_FALSE;
3558   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3559   baij->free_a       = PETSC_FALSE;
3560   baij->free_ij      = PETSC_FALSE;
3561 
3562   for (ii=0; ii<m; ii++) {
3563     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3564 #if defined(PETSC_USE_DEBUG)
3565     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3566 #endif
3567   }
3568 #if defined(PETSC_USE_DEBUG)
3569   for (ii=0; ii<baij->i[m]; ii++) {
3570     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3571     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3572   }
3573 #endif
3574 
3575   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3576   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3577   PetscFunctionReturn(0);
3578 }
3579 
3580 #undef __FUNCT__
3581 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ"
3582 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3583 {
3584   PetscErrorCode ierr;
3585 
3586   PetscFunctionBegin;
3587   ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3588   PetscFunctionReturn(0);
3589 }
3590