xref: /petsc/src/mat/impls/baij/seq/baij.c (revision b06ff27eac5114371f4ff10d36699518dda77a85)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
16 {
17   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
18   PetscErrorCode ierr;
19   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
20   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
21   PetscReal      shift = 0.0;
22   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
23 
24   PetscFunctionBegin;
25   allowzeropivot = PetscNot(A->erroriffailure);
26 
27   if (a->idiagvalid) {
28     if (values) *values = a->idiag;
29     PetscFunctionReturn(0);
30   }
31   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
32   diag_offset = a->diag;
33   if (!a->idiag) {
34     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
35     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
36   }
37   diag  = a->idiag;
38   mdiag = a->idiag+bs2*mbs;
39   if (values) *values = a->idiag;
40   /* factor and invert each block */
41   switch (bs) {
42   case 1:
43     for (i=0; i<mbs; i++) {
44       odiag    = v + 1*diag_offset[i];
45       diag[0]  = odiag[0];
46       mdiag[0] = odiag[0];
47 
48       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
49         if (allowzeropivot) {
50           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
51           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
52           A->factorerror_zeropivot_row   = i;
53           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
54         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
55       }
56 
57       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
58       diag    += 1;
59       mdiag   += 1;
60     }
61     break;
62   case 2:
63     for (i=0; i<mbs; i++) {
64       odiag    = v + 4*diag_offset[i];
65       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
66       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
67       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
68       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
69       diag    += 4;
70       mdiag   += 4;
71     }
72     break;
73   case 3:
74     for (i=0; i<mbs; i++) {
75       odiag    = v + 9*diag_offset[i];
76       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
77       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
78       diag[8]  = odiag[8];
79       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
80       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
81       mdiag[8] = odiag[8];
82       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
83       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
84       diag    += 9;
85       mdiag   += 9;
86     }
87     break;
88   case 4:
89     for (i=0; i<mbs; i++) {
90       odiag  = v + 16*diag_offset[i];
91       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
94       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
95       diag  += 16;
96       mdiag += 16;
97     }
98     break;
99   case 5:
100     for (i=0; i<mbs; i++) {
101       odiag  = v + 25*diag_offset[i];
102       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
104       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
105       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
106       diag  += 25;
107       mdiag += 25;
108     }
109     break;
110   case 6:
111     for (i=0; i<mbs; i++) {
112       odiag  = v + 36*diag_offset[i];
113       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
114       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
115       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
116       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
117       diag  += 36;
118       mdiag += 36;
119     }
120     break;
121   case 7:
122     for (i=0; i<mbs; i++) {
123       odiag  = v + 49*diag_offset[i];
124       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
125       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
126       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
127       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
128       diag  += 49;
129       mdiag += 49;
130     }
131     break;
132   default:
133     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
134     for (i=0; i<mbs; i++) {
135       odiag  = v + bs2*diag_offset[i];
136       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
137       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
138       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
139       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
140       diag  += bs2;
141       mdiag += bs2;
142     }
143     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
144   }
145   a->idiagvalid = PETSC_TRUE;
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
150 {
151   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
152   PetscScalar       *x,*work,*w,*workt,*t;
153   const MatScalar   *v,*aa = a->a, *idiag;
154   const PetscScalar *b,*xb;
155   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
156   PetscErrorCode    ierr;
157   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
158   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
159 
160   PetscFunctionBegin;
161   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
162   its = its*lits;
163   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
164   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
165   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
166   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
167   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
168 
169   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
170 
171   if (!m) PetscFunctionReturn(0);
172   diag  = a->diag;
173   idiag = a->idiag;
174   k    = PetscMax(A->rmap->n,A->cmap->n);
175   if (!a->mult_work) {
176     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
177   }
178   if (!a->sor_workt) {
179     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
180   }
181   if (!a->sor_work) {
182     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
183   }
184   work = a->mult_work;
185   t    = a->sor_workt;
186   w    = a->sor_work;
187 
188   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
189   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
190 
191   if (flag & SOR_ZERO_INITIAL_GUESS) {
192     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
193       switch (bs) {
194       case 1:
195         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
196         t[0] = b[0];
197         i2     = 1;
198         idiag += 1;
199         for (i=1; i<m; i++) {
200           v  = aa + ai[i];
201           vi = aj + ai[i];
202           nz = diag[i] - ai[i];
203           s[0] = b[i2];
204           for (j=0; j<nz; j++) {
205             xw[0] = x[vi[j]];
206             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
207           }
208           t[i2] = s[0];
209           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
210           x[i2]  = xw[0];
211           idiag += 1;
212           i2    += 1;
213         }
214         break;
215       case 2:
216         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
217         t[0] = b[0]; t[1] = b[1];
218         i2     = 2;
219         idiag += 4;
220         for (i=1; i<m; i++) {
221           v  = aa + 4*ai[i];
222           vi = aj + ai[i];
223           nz = diag[i] - ai[i];
224           s[0] = b[i2]; s[1] = b[i2+1];
225           for (j=0; j<nz; j++) {
226             idx = 2*vi[j];
227             it  = 4*j;
228             xw[0] = x[idx]; xw[1] = x[1+idx];
229             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
230           }
231           t[i2] = s[0]; t[i2+1] = s[1];
232           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
233           x[i2]   = xw[0]; x[i2+1] = xw[1];
234           idiag  += 4;
235           i2     += 2;
236         }
237         break;
238       case 3:
239         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
240         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
241         i2     = 3;
242         idiag += 9;
243         for (i=1; i<m; i++) {
244           v  = aa + 9*ai[i];
245           vi = aj + ai[i];
246           nz = diag[i] - ai[i];
247           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
248           while (nz--) {
249             idx = 3*(*vi++);
250             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
251             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
252             v  += 9;
253           }
254           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
255           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
256           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
257           idiag  += 9;
258           i2     += 3;
259         }
260         break;
261       case 4:
262         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
263         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
264         i2     = 4;
265         idiag += 16;
266         for (i=1; i<m; i++) {
267           v  = aa + 16*ai[i];
268           vi = aj + ai[i];
269           nz = diag[i] - ai[i];
270           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
271           while (nz--) {
272             idx = 4*(*vi++);
273             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
274             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
275             v  += 16;
276           }
277           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
278           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
279           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
280           idiag  += 16;
281           i2     += 4;
282         }
283         break;
284       case 5:
285         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
286         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
287         i2     = 5;
288         idiag += 25;
289         for (i=1; i<m; i++) {
290           v  = aa + 25*ai[i];
291           vi = aj + ai[i];
292           nz = diag[i] - ai[i];
293           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
294           while (nz--) {
295             idx = 5*(*vi++);
296             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
297             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
298             v  += 25;
299           }
300           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
301           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
302           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
303           idiag  += 25;
304           i2     += 5;
305         }
306         break;
307       case 6:
308         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
309         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
310         i2     = 6;
311         idiag += 36;
312         for (i=1; i<m; i++) {
313           v  = aa + 36*ai[i];
314           vi = aj + ai[i];
315           nz = diag[i] - ai[i];
316           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
317           while (nz--) {
318             idx = 6*(*vi++);
319             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
320             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
321             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
322             v  += 36;
323           }
324           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
325           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
326           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
327           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
328           idiag  += 36;
329           i2     += 6;
330         }
331         break;
332       case 7:
333         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
334         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
335         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
336         i2     = 7;
337         idiag += 49;
338         for (i=1; i<m; i++) {
339           v  = aa + 49*ai[i];
340           vi = aj + ai[i];
341           nz = diag[i] - ai[i];
342           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
343           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
344           while (nz--) {
345             idx = 7*(*vi++);
346             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
347             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
348             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
349             v  += 49;
350           }
351           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
352           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
353           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
354           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
355           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
356           idiag  += 49;
357           i2     += 7;
358         }
359         break;
360       default:
361         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
362         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
363         i2     = bs;
364         idiag += bs2;
365         for (i=1; i<m; i++) {
366           v  = aa + bs2*ai[i];
367           vi = aj + ai[i];
368           nz = diag[i] - ai[i];
369 
370           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
371           /* copy all rows of x that are needed into contiguous space */
372           workt = work;
373           for (j=0; j<nz; j++) {
374             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
375             workt += bs;
376           }
377           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
378           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
379           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
380 
381           idiag += bs2;
382           i2    += bs;
383         }
384         break;
385       }
386       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
387       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
388       xb = t;
389     }
390     else xb = b;
391     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
392       idiag = a->idiag+bs2*(a->mbs-1);
393       i2 = bs * (m-1);
394       switch (bs) {
395       case 1:
396         s[0]  = xb[i2];
397         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
398         x[i2] = xw[0];
399         i2   -= 1;
400         for (i=m-2; i>=0; i--) {
401           v  = aa + (diag[i]+1);
402           vi = aj + diag[i] + 1;
403           nz = ai[i+1] - diag[i] - 1;
404           s[0] = xb[i2];
405           for (j=0; j<nz; j++) {
406             xw[0] = x[vi[j]];
407             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
408           }
409           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
410           x[i2]  = xw[0];
411           idiag -= 1;
412           i2    -= 1;
413         }
414         break;
415       case 2:
416         s[0]  = xb[i2]; s[1] = xb[i2+1];
417         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
418         x[i2] = xw[0]; x[i2+1] = xw[1];
419         i2    -= 2;
420         idiag -= 4;
421         for (i=m-2; i>=0; i--) {
422           v  = aa + 4*(diag[i] + 1);
423           vi = aj + diag[i] + 1;
424           nz = ai[i+1] - diag[i] - 1;
425           s[0] = xb[i2]; s[1] = xb[i2+1];
426           for (j=0; j<nz; j++) {
427             idx = 2*vi[j];
428             it  = 4*j;
429             xw[0] = x[idx]; xw[1] = x[1+idx];
430             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
431           }
432           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
433           x[i2]   = xw[0]; x[i2+1] = xw[1];
434           idiag  -= 4;
435           i2     -= 2;
436         }
437         break;
438       case 3:
439         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
440         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
441         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
442         i2    -= 3;
443         idiag -= 9;
444         for (i=m-2; i>=0; i--) {
445           v  = aa + 9*(diag[i]+1);
446           vi = aj + diag[i] + 1;
447           nz = ai[i+1] - diag[i] - 1;
448           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
449           while (nz--) {
450             idx = 3*(*vi++);
451             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
452             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
453             v  += 9;
454           }
455           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
456           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
457           idiag  -= 9;
458           i2     -= 3;
459         }
460         break;
461       case 4:
462         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
463         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
464         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
465         i2    -= 4;
466         idiag -= 16;
467         for (i=m-2; i>=0; i--) {
468           v  = aa + 16*(diag[i]+1);
469           vi = aj + diag[i] + 1;
470           nz = ai[i+1] - diag[i] - 1;
471           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
472           while (nz--) {
473             idx = 4*(*vi++);
474             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
475             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
476             v  += 16;
477           }
478           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
479           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
480           idiag  -= 16;
481           i2     -= 4;
482         }
483         break;
484       case 5:
485         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
486         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
487         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
488         i2    -= 5;
489         idiag -= 25;
490         for (i=m-2; i>=0; i--) {
491           v  = aa + 25*(diag[i]+1);
492           vi = aj + diag[i] + 1;
493           nz = ai[i+1] - diag[i] - 1;
494           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
495           while (nz--) {
496             idx = 5*(*vi++);
497             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
498             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
499             v  += 25;
500           }
501           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
502           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
503           idiag  -= 25;
504           i2     -= 5;
505         }
506         break;
507       case 6:
508         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
509         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
510         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
511         i2    -= 6;
512         idiag -= 36;
513         for (i=m-2; i>=0; i--) {
514           v  = aa + 36*(diag[i]+1);
515           vi = aj + diag[i] + 1;
516           nz = ai[i+1] - diag[i] - 1;
517           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
518           while (nz--) {
519             idx = 6*(*vi++);
520             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
521             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
522             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
523             v  += 36;
524           }
525           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
526           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
527           idiag  -= 36;
528           i2     -= 6;
529         }
530         break;
531       case 7:
532         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
533         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
534         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
535         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
536         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
537         i2    -= 7;
538         idiag -= 49;
539         for (i=m-2; i>=0; i--) {
540           v  = aa + 49*(diag[i]+1);
541           vi = aj + diag[i] + 1;
542           nz = ai[i+1] - diag[i] - 1;
543           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
544           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
545           while (nz--) {
546             idx = 7*(*vi++);
547             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
548             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
549             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
550             v  += 49;
551           }
552           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
553           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
554           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
555           idiag  -= 49;
556           i2     -= 7;
557         }
558         break;
559       default:
560         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
561         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
562         i2    -= bs;
563         idiag -= bs2;
564         for (i=m-2; i>=0; i--) {
565           v  = aa + bs2*(diag[i]+1);
566           vi = aj + diag[i] + 1;
567           nz = ai[i+1] - diag[i] - 1;
568 
569           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
570           /* copy all rows of x that are needed into contiguous space */
571           workt = work;
572           for (j=0; j<nz; j++) {
573             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
574             workt += bs;
575           }
576           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
577           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
578 
579           idiag -= bs2;
580           i2    -= bs;
581         }
582         break;
583       }
584       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
585     }
586     its--;
587   }
588   while (its--) {
589     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
590       idiag = a->idiag;
591       i2 = 0;
592       switch (bs) {
593       case 1:
594         for (i=0; i<m; i++) {
595           v  = aa + ai[i];
596           vi = aj + ai[i];
597           nz = ai[i+1] - ai[i];
598           s[0] = b[i2];
599           for (j=0; j<nz; j++) {
600             xw[0] = x[vi[j]];
601             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
602           }
603           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
604           x[i2] += xw[0];
605           idiag += 1;
606           i2    += 1;
607         }
608         break;
609       case 2:
610         for (i=0; i<m; i++) {
611           v  = aa + 4*ai[i];
612           vi = aj + ai[i];
613           nz = ai[i+1] - ai[i];
614           s[0] = b[i2]; s[1] = b[i2+1];
615           for (j=0; j<nz; j++) {
616             idx = 2*vi[j];
617             it  = 4*j;
618             xw[0] = x[idx]; xw[1] = x[1+idx];
619             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
620           }
621           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
622           x[i2]  += xw[0]; x[i2+1] += xw[1];
623           idiag  += 4;
624           i2     += 2;
625         }
626         break;
627       case 3:
628         for (i=0; i<m; i++) {
629           v  = aa + 9*ai[i];
630           vi = aj + ai[i];
631           nz = ai[i+1] - ai[i];
632           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
633           while (nz--) {
634             idx = 3*(*vi++);
635             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
636             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
637             v  += 9;
638           }
639           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
640           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
641           idiag  += 9;
642           i2     += 3;
643         }
644         break;
645       case 4:
646         for (i=0; i<m; i++) {
647           v  = aa + 16*ai[i];
648           vi = aj + ai[i];
649           nz = ai[i+1] - ai[i];
650           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
651           while (nz--) {
652             idx = 4*(*vi++);
653             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
654             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
655             v  += 16;
656           }
657           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
658           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
659           idiag  += 16;
660           i2     += 4;
661         }
662         break;
663       case 5:
664         for (i=0; i<m; i++) {
665           v  = aa + 25*ai[i];
666           vi = aj + ai[i];
667           nz = ai[i+1] - ai[i];
668           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
669           while (nz--) {
670             idx = 5*(*vi++);
671             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
672             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
673             v  += 25;
674           }
675           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
676           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
677           idiag  += 25;
678           i2     += 5;
679         }
680         break;
681       case 6:
682         for (i=0; i<m; i++) {
683           v  = aa + 36*ai[i];
684           vi = aj + ai[i];
685           nz = ai[i+1] - ai[i];
686           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
687           while (nz--) {
688             idx = 6*(*vi++);
689             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
690             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
691             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
692             v  += 36;
693           }
694           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
695           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
696           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
697           idiag  += 36;
698           i2     += 6;
699         }
700         break;
701       case 7:
702         for (i=0; i<m; i++) {
703           v  = aa + 49*ai[i];
704           vi = aj + ai[i];
705           nz = ai[i+1] - ai[i];
706           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
707           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
708           while (nz--) {
709             idx = 7*(*vi++);
710             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
711             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
712             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
713             v  += 49;
714           }
715           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
716           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
717           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
718           idiag  += 49;
719           i2     += 7;
720         }
721         break;
722       default:
723         for (i=0; i<m; i++) {
724           v  = aa + bs2*ai[i];
725           vi = aj + ai[i];
726           nz = ai[i+1] - ai[i];
727 
728           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
729           /* copy all rows of x that are needed into contiguous space */
730           workt = work;
731           for (j=0; j<nz; j++) {
732             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
733             workt += bs;
734           }
735           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
736           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
737 
738           idiag += bs2;
739           i2    += bs;
740         }
741         break;
742       }
743       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
744     }
745     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
746       idiag = a->idiag+bs2*(a->mbs-1);
747       i2 = bs * (m-1);
748       switch (bs) {
749       case 1:
750         for (i=m-1; i>=0; i--) {
751           v  = aa + ai[i];
752           vi = aj + ai[i];
753           nz = ai[i+1] - ai[i];
754           s[0] = b[i2];
755           for (j=0; j<nz; j++) {
756             xw[0] = x[vi[j]];
757             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
758           }
759           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
760           x[i2] += xw[0];
761           idiag -= 1;
762           i2    -= 1;
763         }
764         break;
765       case 2:
766         for (i=m-1; i>=0; i--) {
767           v  = aa + 4*ai[i];
768           vi = aj + ai[i];
769           nz = ai[i+1] - ai[i];
770           s[0] = b[i2]; s[1] = b[i2+1];
771           for (j=0; j<nz; j++) {
772             idx = 2*vi[j];
773             it  = 4*j;
774             xw[0] = x[idx]; xw[1] = x[1+idx];
775             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
776           }
777           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
778           x[i2]  += xw[0]; x[i2+1] += xw[1];
779           idiag  -= 4;
780           i2     -= 2;
781         }
782         break;
783       case 3:
784         for (i=m-1; i>=0; i--) {
785           v  = aa + 9*ai[i];
786           vi = aj + ai[i];
787           nz = ai[i+1] - ai[i];
788           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
789           while (nz--) {
790             idx = 3*(*vi++);
791             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
792             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
793             v  += 9;
794           }
795           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
796           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
797           idiag  -= 9;
798           i2     -= 3;
799         }
800         break;
801       case 4:
802         for (i=m-1; i>=0; i--) {
803           v  = aa + 16*ai[i];
804           vi = aj + ai[i];
805           nz = ai[i+1] - ai[i];
806           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
807           while (nz--) {
808             idx = 4*(*vi++);
809             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
810             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
811             v  += 16;
812           }
813           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
814           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
815           idiag  -= 16;
816           i2     -= 4;
817         }
818         break;
819       case 5:
820         for (i=m-1; i>=0; i--) {
821           v  = aa + 25*ai[i];
822           vi = aj + ai[i];
823           nz = ai[i+1] - ai[i];
824           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
825           while (nz--) {
826             idx = 5*(*vi++);
827             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
828             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
829             v  += 25;
830           }
831           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
832           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
833           idiag  -= 25;
834           i2     -= 5;
835         }
836         break;
837       case 6:
838         for (i=m-1; i>=0; i--) {
839           v  = aa + 36*ai[i];
840           vi = aj + ai[i];
841           nz = ai[i+1] - ai[i];
842           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
843           while (nz--) {
844             idx = 6*(*vi++);
845             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
846             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
847             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
848             v  += 36;
849           }
850           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
851           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
852           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
853           idiag  -= 36;
854           i2     -= 6;
855         }
856         break;
857       case 7:
858         for (i=m-1; i>=0; i--) {
859           v  = aa + 49*ai[i];
860           vi = aj + ai[i];
861           nz = ai[i+1] - ai[i];
862           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
863           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
864           while (nz--) {
865             idx = 7*(*vi++);
866             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
867             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
868             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
869             v  += 49;
870           }
871           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
872           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
873           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
874           idiag  -= 49;
875           i2     -= 7;
876         }
877         break;
878       default:
879         for (i=m-1; i>=0; i--) {
880           v  = aa + bs2*ai[i];
881           vi = aj + ai[i];
882           nz = ai[i+1] - ai[i];
883 
884           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
885           /* copy all rows of x that are needed into contiguous space */
886           workt = work;
887           for (j=0; j<nz; j++) {
888             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
889             workt += bs;
890           }
891           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
892           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
893 
894           idiag -= bs2;
895           i2    -= bs;
896         }
897         break;
898       }
899       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
900     }
901   }
902   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
903   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
904   PetscFunctionReturn(0);
905 }
906 
907 
908 /*
909     Special version for direct calls from Fortran (Used in PETSc-fun3d)
910 */
911 #if defined(PETSC_HAVE_FORTRAN_CAPS)
912 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
913 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
914 #define matsetvaluesblocked4_ matsetvaluesblocked4
915 #endif
916 
917 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
918 {
919   Mat               A  = *AA;
920   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
921   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
922   PetscInt          *ai    =a->i,*ailen=a->ilen;
923   PetscInt          *aj    =a->j,stepval,lastcol = -1;
924   const PetscScalar *value = v;
925   MatScalar         *ap,*aa = a->a,*bap;
926 
927   PetscFunctionBegin;
928   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
929   stepval = (n-1)*4;
930   for (k=0; k<m; k++) { /* loop over added rows */
931     row  = im[k];
932     rp   = aj + ai[row];
933     ap   = aa + 16*ai[row];
934     nrow = ailen[row];
935     low  = 0;
936     high = nrow;
937     for (l=0; l<n; l++) { /* loop over added columns */
938       col = in[l];
939       if (col <= lastcol)  low = 0;
940       else                high = nrow;
941       lastcol = col;
942       value   = v + k*(stepval+4 + l)*4;
943       while (high-low > 7) {
944         t = (low+high)/2;
945         if (rp[t] > col) high = t;
946         else             low  = t;
947       }
948       for (i=low; i<high; i++) {
949         if (rp[i] > col) break;
950         if (rp[i] == col) {
951           bap = ap +  16*i;
952           for (ii=0; ii<4; ii++,value+=stepval) {
953             for (jj=ii; jj<16; jj+=4) {
954               bap[jj] += *value++;
955             }
956           }
957           goto noinsert2;
958         }
959       }
960       N = nrow++ - 1;
961       high++; /* added new column index thus must search to one higher than before */
962       /* shift up all the later entries in this row */
963       for (ii=N; ii>=i; ii--) {
964         rp[ii+1] = rp[ii];
965         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
966       }
967       if (N >= i) {
968         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
969       }
970       rp[i] = col;
971       bap   = ap +  16*i;
972       for (ii=0; ii<4; ii++,value+=stepval) {
973         for (jj=ii; jj<16; jj+=4) {
974           bap[jj] = *value++;
975         }
976       }
977       noinsert2:;
978       low = i;
979     }
980     ailen[row] = nrow;
981   }
982   PetscFunctionReturnVoid();
983 }
984 
985 #if defined(PETSC_HAVE_FORTRAN_CAPS)
986 #define matsetvalues4_ MATSETVALUES4
987 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
988 #define matsetvalues4_ matsetvalues4
989 #endif
990 
991 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
992 {
993   Mat         A  = *AA;
994   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
995   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
996   PetscInt    *ai=a->i,*ailen=a->ilen;
997   PetscInt    *aj=a->j,brow,bcol;
998   PetscInt    ridx,cidx,lastcol = -1;
999   MatScalar   *ap,value,*aa=a->a,*bap;
1000 
1001   PetscFunctionBegin;
1002   for (k=0; k<m; k++) { /* loop over added rows */
1003     row  = im[k]; brow = row/4;
1004     rp   = aj + ai[brow];
1005     ap   = aa + 16*ai[brow];
1006     nrow = ailen[brow];
1007     low  = 0;
1008     high = nrow;
1009     for (l=0; l<n; l++) { /* loop over added columns */
1010       col   = in[l]; bcol = col/4;
1011       ridx  = row % 4; cidx = col % 4;
1012       value = v[l + k*n];
1013       if (col <= lastcol)  low = 0;
1014       else                high = nrow;
1015       lastcol = col;
1016       while (high-low > 7) {
1017         t = (low+high)/2;
1018         if (rp[t] > bcol) high = t;
1019         else              low  = t;
1020       }
1021       for (i=low; i<high; i++) {
1022         if (rp[i] > bcol) break;
1023         if (rp[i] == bcol) {
1024           bap   = ap +  16*i + 4*cidx + ridx;
1025           *bap += value;
1026           goto noinsert1;
1027         }
1028       }
1029       N = nrow++ - 1;
1030       high++; /* added new column thus must search to one higher than before */
1031       /* shift up all the later entries in this row */
1032       for (ii=N; ii>=i; ii--) {
1033         rp[ii+1] = rp[ii];
1034         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1035       }
1036       if (N>=i) {
1037         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1038       }
1039       rp[i]                    = bcol;
1040       ap[16*i + 4*cidx + ridx] = value;
1041 noinsert1:;
1042       low = i;
1043     }
1044     ailen[brow] = nrow;
1045   }
1046   PetscFunctionReturnVoid();
1047 }
1048 
1049 /*
1050      Checks for missing diagonals
1051 */
1052 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1053 {
1054   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1055   PetscErrorCode ierr;
1056   PetscInt       *diag,*ii = a->i,i;
1057 
1058   PetscFunctionBegin;
1059   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1060   *missing = PETSC_FALSE;
1061   if (A->rmap->n > 0 && !ii) {
1062     *missing = PETSC_TRUE;
1063     if (d) *d = 0;
1064     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1065   } else {
1066     diag = a->diag;
1067     for (i=0; i<a->mbs; i++) {
1068       if (diag[i] >= ii[i+1]) {
1069         *missing = PETSC_TRUE;
1070         if (d) *d = i;
1071         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1072         break;
1073       }
1074     }
1075   }
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1080 {
1081   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1082   PetscErrorCode ierr;
1083   PetscInt       i,j,m = a->mbs;
1084 
1085   PetscFunctionBegin;
1086   if (!a->diag) {
1087     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1088     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1089     a->free_diag = PETSC_TRUE;
1090   }
1091   for (i=0; i<m; i++) {
1092     a->diag[i] = a->i[i+1];
1093     for (j=a->i[i]; j<a->i[i+1]; j++) {
1094       if (a->j[j] == i) {
1095         a->diag[i] = j;
1096         break;
1097       }
1098     }
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 
1104 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1105 {
1106   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1107   PetscErrorCode ierr;
1108   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1109   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1110 
1111   PetscFunctionBegin;
1112   *nn = n;
1113   if (!ia) PetscFunctionReturn(0);
1114   if (symmetric) {
1115     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1116     nz   = tia[n];
1117   } else {
1118     tia = a->i; tja = a->j;
1119   }
1120 
1121   if (!blockcompressed && bs > 1) {
1122     (*nn) *= bs;
1123     /* malloc & create the natural set of indices */
1124     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1125     if (n) {
1126       (*ia)[0] = oshift;
1127       for (j=1; j<bs; j++) {
1128         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1129       }
1130     }
1131 
1132     for (i=1; i<n; i++) {
1133       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1134       for (j=1; j<bs; j++) {
1135         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1136       }
1137     }
1138     if (n) {
1139       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1140     }
1141 
1142     if (inja) {
1143       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1144       cnt = 0;
1145       for (i=0; i<n; i++) {
1146         for (j=0; j<bs; j++) {
1147           for (k=tia[i]; k<tia[i+1]; k++) {
1148             for (l=0; l<bs; l++) {
1149               (*ja)[cnt++] = bs*tja[k] + l;
1150             }
1151           }
1152         }
1153       }
1154     }
1155 
1156     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1157       ierr = PetscFree(tia);CHKERRQ(ierr);
1158       ierr = PetscFree(tja);CHKERRQ(ierr);
1159     }
1160   } else if (oshift == 1) {
1161     if (symmetric) {
1162       nz = tia[A->rmap->n/bs];
1163       /*  add 1 to i and j indices */
1164       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1165       *ia = tia;
1166       if (ja) {
1167         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1168         *ja = tja;
1169       }
1170     } else {
1171       nz = a->i[A->rmap->n/bs];
1172       /* malloc space and  add 1 to i and j indices */
1173       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1174       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1175       if (ja) {
1176         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1177         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1178       }
1179     }
1180   } else {
1181     *ia = tia;
1182     if (ja) *ja = tja;
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1188 {
1189   PetscErrorCode ierr;
1190 
1191   PetscFunctionBegin;
1192   if (!ia) PetscFunctionReturn(0);
1193   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1194     ierr = PetscFree(*ia);CHKERRQ(ierr);
1195     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1196   }
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1201 {
1202   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1203   PetscErrorCode ierr;
1204 
1205   PetscFunctionBegin;
1206 #if defined(PETSC_USE_LOG)
1207   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1208 #endif
1209   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1210   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1211   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1212   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1213   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1214   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1215   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1216   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1217   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1218   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1219   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1220   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1221   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1222 
1223   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1224   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1225   ierr = PetscFree(A->data);CHKERRQ(ierr);
1226 
1227   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1231   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1232   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1233   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1235   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1236   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1237   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1238 #if defined(PETSC_HAVE_HYPRE)
1239   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1240 #endif
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1245 {
1246   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   switch (op) {
1251   case MAT_ROW_ORIENTED:
1252     a->roworiented = flg;
1253     break;
1254   case MAT_KEEP_NONZERO_PATTERN:
1255     a->keepnonzeropattern = flg;
1256     break;
1257   case MAT_NEW_NONZERO_LOCATIONS:
1258     a->nonew = (flg ? 0 : 1);
1259     break;
1260   case MAT_NEW_NONZERO_LOCATION_ERR:
1261     a->nonew = (flg ? -1 : 0);
1262     break;
1263   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1264     a->nonew = (flg ? -2 : 0);
1265     break;
1266   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1267     a->nounused = (flg ? -1 : 0);
1268     break;
1269   case MAT_NEW_DIAGONALS:
1270   case MAT_IGNORE_OFF_PROC_ENTRIES:
1271   case MAT_USE_HASH_TABLE:
1272     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1273     break;
1274   case MAT_SPD:
1275   case MAT_SYMMETRIC:
1276   case MAT_STRUCTURALLY_SYMMETRIC:
1277   case MAT_HERMITIAN:
1278   case MAT_SYMMETRY_ETERNAL:
1279   case MAT_SUBMAT_SINGLEIS:
1280   case MAT_STRUCTURE_ONLY:
1281     /* These options are handled directly by MatSetOption() */
1282     break;
1283   default:
1284     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1285   }
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1290 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1291 {
1292   PetscErrorCode ierr;
1293   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1294   MatScalar      *aa_i;
1295   PetscScalar    *v_i;
1296 
1297   PetscFunctionBegin;
1298   bs  = A->rmap->bs;
1299   bs2 = bs*bs;
1300   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1301 
1302   bn  = row/bs;   /* Block number */
1303   bp  = row % bs; /* Block Position */
1304   M   = ai[bn+1] - ai[bn];
1305   *nz = bs*M;
1306 
1307   if (v) {
1308     *v = 0;
1309     if (*nz) {
1310       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1311       for (i=0; i<M; i++) { /* for each block in the block row */
1312         v_i  = *v + i*bs;
1313         aa_i = aa + bs2*(ai[bn] + i);
1314         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1315       }
1316     }
1317   }
1318 
1319   if (idx) {
1320     *idx = 0;
1321     if (*nz) {
1322       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1323       for (i=0; i<M; i++) { /* for each block in the block row */
1324         idx_i = *idx + i*bs;
1325         itmp  = bs*aj[ai[bn] + i];
1326         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1327       }
1328     }
1329   }
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1334 {
1335   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1336   PetscErrorCode ierr;
1337 
1338   PetscFunctionBegin;
1339   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1344 {
1345   PetscErrorCode ierr;
1346 
1347   PetscFunctionBegin;
1348   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1349   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1354 {
1355   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1356   Mat            C;
1357   PetscErrorCode ierr;
1358   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1359   PetscInt       *rows,*cols,bs2=a->bs2;
1360   MatScalar      *array;
1361 
1362   PetscFunctionBegin;
1363   if (reuse == MAT_INPLACE_MATRIX && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1364   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1365     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1366 
1367     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1368     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1369     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1370     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1371     ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr);
1372     ierr = PetscFree(col);CHKERRQ(ierr);
1373   } else {
1374     C = *B;
1375   }
1376 
1377   array = a->a;
1378   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1379   for (i=0; i<mbs; i++) {
1380     cols[0] = i*bs;
1381     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1382     len = ai[i+1] - ai[i];
1383     for (j=0; j<len; j++) {
1384       rows[0] = (*aj++)*bs;
1385       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1386       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1387       array += bs2;
1388     }
1389   }
1390   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1391 
1392   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1393   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1394 
1395   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1396     *B = C;
1397   } else {
1398     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1399   }
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1404 {
1405   PetscErrorCode ierr;
1406   Mat            Btrans;
1407 
1408   PetscFunctionBegin;
1409   *f   = PETSC_FALSE;
1410   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1411   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1412   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1417 {
1418   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1419   PetscErrorCode ierr;
1420   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1421   int            fd;
1422   PetscScalar    *aa;
1423   FILE           *file;
1424 
1425   PetscFunctionBegin;
1426   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1427   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1428   col_lens[0] = MAT_FILE_CLASSID;
1429 
1430   col_lens[1] = A->rmap->N;
1431   col_lens[2] = A->cmap->n;
1432   col_lens[3] = a->nz*bs2;
1433 
1434   /* store lengths of each row and write (including header) to file */
1435   count = 0;
1436   for (i=0; i<a->mbs; i++) {
1437     for (j=0; j<bs; j++) {
1438       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1439     }
1440   }
1441   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1442   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1443 
1444   /* store column indices (zero start index) */
1445   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1446   count = 0;
1447   for (i=0; i<a->mbs; i++) {
1448     for (j=0; j<bs; j++) {
1449       for (k=a->i[i]; k<a->i[i+1]; k++) {
1450         for (l=0; l<bs; l++) {
1451           jj[count++] = bs*a->j[k] + l;
1452         }
1453       }
1454     }
1455   }
1456   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1457   ierr = PetscFree(jj);CHKERRQ(ierr);
1458 
1459   /* store nonzero values */
1460   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1461   count = 0;
1462   for (i=0; i<a->mbs; i++) {
1463     for (j=0; j<bs; j++) {
1464       for (k=a->i[i]; k<a->i[i+1]; k++) {
1465         for (l=0; l<bs; l++) {
1466           aa[count++] = a->a[bs2*k + l*bs + j];
1467         }
1468       }
1469     }
1470   }
1471   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1472   ierr = PetscFree(aa);CHKERRQ(ierr);
1473 
1474   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1475   if (file) {
1476     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1477   }
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1482 {
1483   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1484   PetscErrorCode    ierr;
1485   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1486   PetscViewerFormat format;
1487 
1488   PetscFunctionBegin;
1489   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1490   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1491     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1492   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1493     const char *matname;
1494     Mat        aij;
1495     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1496     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1497     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1498     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1499     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1500   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1501       PetscFunctionReturn(0);
1502   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1503     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1504     for (i=0; i<a->mbs; i++) {
1505       for (j=0; j<bs; j++) {
1506         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1507         for (k=a->i[i]; k<a->i[i+1]; k++) {
1508           for (l=0; l<bs; l++) {
1509 #if defined(PETSC_USE_COMPLEX)
1510             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1511               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1512                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1513             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1514               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1515                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1516             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1517               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1518             }
1519 #else
1520             if (a->a[bs2*k + l*bs + j] != 0.0) {
1521               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1522             }
1523 #endif
1524           }
1525         }
1526         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1527       }
1528     }
1529     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1530   } else {
1531     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1532     for (i=0; i<a->mbs; i++) {
1533       for (j=0; j<bs; j++) {
1534         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1535         for (k=a->i[i]; k<a->i[i+1]; k++) {
1536           for (l=0; l<bs; l++) {
1537 #if defined(PETSC_USE_COMPLEX)
1538             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1539               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1540                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1541             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1542               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1543                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1544             } else {
1545               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1546             }
1547 #else
1548             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1549 #endif
1550           }
1551         }
1552         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1553       }
1554     }
1555     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1556   }
1557   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 #include <petscdraw.h>
1562 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1563 {
1564   Mat               A = (Mat) Aa;
1565   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1566   PetscErrorCode    ierr;
1567   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1568   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1569   MatScalar         *aa;
1570   PetscViewer       viewer;
1571   PetscViewerFormat format;
1572 
1573   PetscFunctionBegin;
1574   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1575   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1576   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1577 
1578   /* loop over matrix elements drawing boxes */
1579 
1580   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1581     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1582     /* Blue for negative, Cyan for zero and  Red for positive */
1583     color = PETSC_DRAW_BLUE;
1584     for (i=0,row=0; i<mbs; i++,row+=bs) {
1585       for (j=a->i[i]; j<a->i[i+1]; j++) {
1586         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1587         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1588         aa  = a->a + j*bs2;
1589         for (k=0; k<bs; k++) {
1590           for (l=0; l<bs; l++) {
1591             if (PetscRealPart(*aa++) >=  0.) continue;
1592             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1593           }
1594         }
1595       }
1596     }
1597     color = PETSC_DRAW_CYAN;
1598     for (i=0,row=0; i<mbs; i++,row+=bs) {
1599       for (j=a->i[i]; j<a->i[i+1]; j++) {
1600         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1601         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1602         aa  = a->a + j*bs2;
1603         for (k=0; k<bs; k++) {
1604           for (l=0; l<bs; l++) {
1605             if (PetscRealPart(*aa++) != 0.) continue;
1606             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1607           }
1608         }
1609       }
1610     }
1611     color = PETSC_DRAW_RED;
1612     for (i=0,row=0; i<mbs; i++,row+=bs) {
1613       for (j=a->i[i]; j<a->i[i+1]; j++) {
1614         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1615         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1616         aa  = a->a + j*bs2;
1617         for (k=0; k<bs; k++) {
1618           for (l=0; l<bs; l++) {
1619             if (PetscRealPart(*aa++) <= 0.) continue;
1620             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1621           }
1622         }
1623       }
1624     }
1625     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1626   } else {
1627     /* use contour shading to indicate magnitude of values */
1628     /* first determine max of all nonzero values */
1629     PetscReal minv = 0.0, maxv = 0.0;
1630     PetscDraw popup;
1631 
1632     for (i=0; i<a->nz*a->bs2; i++) {
1633       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1634     }
1635     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1636     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1637     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1638 
1639     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1640     for (i=0,row=0; i<mbs; i++,row+=bs) {
1641       for (j=a->i[i]; j<a->i[i+1]; j++) {
1642         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1643         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1644         aa  = a->a + j*bs2;
1645         for (k=0; k<bs; k++) {
1646           for (l=0; l<bs; l++) {
1647             MatScalar v = *aa++;
1648             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1649             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1650           }
1651         }
1652       }
1653     }
1654     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1655   }
1656   PetscFunctionReturn(0);
1657 }
1658 
1659 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1660 {
1661   PetscErrorCode ierr;
1662   PetscReal      xl,yl,xr,yr,w,h;
1663   PetscDraw      draw;
1664   PetscBool      isnull;
1665 
1666   PetscFunctionBegin;
1667   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1668   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1669   if (isnull) PetscFunctionReturn(0);
1670 
1671   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1672   xr  += w;          yr += h;        xl = -w;     yl = -h;
1673   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1674   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1675   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1676   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1677   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1682 {
1683   PetscErrorCode ierr;
1684   PetscBool      iascii,isbinary,isdraw;
1685 
1686   PetscFunctionBegin;
1687   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1688   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1689   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1690   if (iascii) {
1691     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1692   } else if (isbinary) {
1693     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1694   } else if (isdraw) {
1695     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1696   } else {
1697     Mat B;
1698     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1699     ierr = MatView(B,viewer);CHKERRQ(ierr);
1700     ierr = MatDestroy(&B);CHKERRQ(ierr);
1701   }
1702   PetscFunctionReturn(0);
1703 }
1704 
1705 
1706 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1707 {
1708   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1709   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1710   PetscInt    *ai = a->i,*ailen = a->ilen;
1711   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1712   MatScalar   *ap,*aa = a->a;
1713 
1714   PetscFunctionBegin;
1715   for (k=0; k<m; k++) { /* loop over rows */
1716     row = im[k]; brow = row/bs;
1717     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1718     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1719     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1720     nrow = ailen[brow];
1721     for (l=0; l<n; l++) { /* loop over columns */
1722       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1723       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1724       col  = in[l];
1725       bcol = col/bs;
1726       cidx = col%bs;
1727       ridx = row%bs;
1728       high = nrow;
1729       low  = 0; /* assume unsorted */
1730       while (high-low > 5) {
1731         t = (low+high)/2;
1732         if (rp[t] > bcol) high = t;
1733         else             low  = t;
1734       }
1735       for (i=low; i<high; i++) {
1736         if (rp[i] > bcol) break;
1737         if (rp[i] == bcol) {
1738           *v++ = ap[bs2*i+bs*cidx+ridx];
1739           goto finished;
1740         }
1741       }
1742       *v++ = 0.0;
1743 finished:;
1744     }
1745   }
1746   PetscFunctionReturn(0);
1747 }
1748 
1749 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1750 {
1751   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1752   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1753   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1754   PetscErrorCode    ierr;
1755   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1756   PetscBool         roworiented=a->roworiented;
1757   const PetscScalar *value     = v;
1758   MatScalar         *ap,*aa = a->a,*bap;
1759 
1760   PetscFunctionBegin;
1761   if (roworiented) {
1762     stepval = (n-1)*bs;
1763   } else {
1764     stepval = (m-1)*bs;
1765   }
1766   for (k=0; k<m; k++) { /* loop over added rows */
1767     row = im[k];
1768     if (row < 0) continue;
1769 #if defined(PETSC_USE_DEBUG)
1770     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1771 #endif
1772     rp   = aj + ai[row];
1773     ap   = aa + bs2*ai[row];
1774     rmax = imax[row];
1775     nrow = ailen[row];
1776     low  = 0;
1777     high = nrow;
1778     for (l=0; l<n; l++) { /* loop over added columns */
1779       if (in[l] < 0) continue;
1780 #if defined(PETSC_USE_DEBUG)
1781       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1782 #endif
1783       col = in[l];
1784       if (roworiented) {
1785         value = v + (k*(stepval+bs) + l)*bs;
1786       } else {
1787         value = v + (l*(stepval+bs) + k)*bs;
1788       }
1789       if (col <= lastcol) low = 0;
1790       else high = nrow;
1791       lastcol = col;
1792       while (high-low > 7) {
1793         t = (low+high)/2;
1794         if (rp[t] > col) high = t;
1795         else             low  = t;
1796       }
1797       for (i=low; i<high; i++) {
1798         if (rp[i] > col) break;
1799         if (rp[i] == col) {
1800           bap = ap +  bs2*i;
1801           if (roworiented) {
1802             if (is == ADD_VALUES) {
1803               for (ii=0; ii<bs; ii++,value+=stepval) {
1804                 for (jj=ii; jj<bs2; jj+=bs) {
1805                   bap[jj] += *value++;
1806                 }
1807               }
1808             } else {
1809               for (ii=0; ii<bs; ii++,value+=stepval) {
1810                 for (jj=ii; jj<bs2; jj+=bs) {
1811                   bap[jj] = *value++;
1812                 }
1813               }
1814             }
1815           } else {
1816             if (is == ADD_VALUES) {
1817               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1818                 for (jj=0; jj<bs; jj++) {
1819                   bap[jj] += value[jj];
1820                 }
1821                 bap += bs;
1822               }
1823             } else {
1824               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1825                 for (jj=0; jj<bs; jj++) {
1826                   bap[jj]  = value[jj];
1827                 }
1828                 bap += bs;
1829               }
1830             }
1831           }
1832           goto noinsert2;
1833         }
1834       }
1835       if (nonew == 1) goto noinsert2;
1836       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1837       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1838       N = nrow++ - 1; high++;
1839       /* shift up all the later entries in this row */
1840       for (ii=N; ii>=i; ii--) {
1841         rp[ii+1] = rp[ii];
1842         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1843       }
1844       if (N >= i) {
1845         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1846       }
1847       rp[i] = col;
1848       bap   = ap +  bs2*i;
1849       if (roworiented) {
1850         for (ii=0; ii<bs; ii++,value+=stepval) {
1851           for (jj=ii; jj<bs2; jj+=bs) {
1852             bap[jj] = *value++;
1853           }
1854         }
1855       } else {
1856         for (ii=0; ii<bs; ii++,value+=stepval) {
1857           for (jj=0; jj<bs; jj++) {
1858             *bap++ = *value++;
1859           }
1860         }
1861       }
1862 noinsert2:;
1863       low = i;
1864     }
1865     ailen[row] = nrow;
1866   }
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1871 {
1872   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1873   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1874   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1875   PetscErrorCode ierr;
1876   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1877   MatScalar      *aa  = a->a,*ap;
1878   PetscReal      ratio=0.6;
1879 
1880   PetscFunctionBegin;
1881   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1882 
1883   if (m) rmax = ailen[0];
1884   for (i=1; i<mbs; i++) {
1885     /* move each row back by the amount of empty slots (fshift) before it*/
1886     fshift += imax[i-1] - ailen[i-1];
1887     rmax    = PetscMax(rmax,ailen[i]);
1888     if (fshift) {
1889       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1890       N  = ailen[i];
1891       for (j=0; j<N; j++) {
1892         ip[j-fshift] = ip[j];
1893         if (!A->structure_only) {
1894           ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1895         }
1896       }
1897     }
1898     ai[i] = ai[i-1] + ailen[i-1];
1899   }
1900   if (mbs) {
1901     fshift += imax[mbs-1] - ailen[mbs-1];
1902     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1903   }
1904 
1905   /* reset ilen and imax for each row */
1906   a->nonzerorowcnt = 0;
1907   if (A->structure_only) {
1908     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1909   } else { /* !A->structure_only */
1910     for (i=0; i<mbs; i++) {
1911       ailen[i] = imax[i] = ai[i+1] - ai[i];
1912       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1913     }
1914   }
1915   a->nz = ai[mbs];
1916 
1917   /* diagonals may have moved, so kill the diagonal pointers */
1918   a->idiagvalid = PETSC_FALSE;
1919   if (fshift && a->diag) {
1920     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1921     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1922     a->diag = 0;
1923   }
1924   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1925   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1926   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1927   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1928 
1929   A->info.mallocs    += a->reallocs;
1930   a->reallocs         = 0;
1931   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1932   a->rmax             = rmax;
1933 
1934   if (!A->structure_only) {
1935     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1936   }
1937   PetscFunctionReturn(0);
1938 }
1939 
1940 /*
1941    This function returns an array of flags which indicate the locations of contiguous
1942    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1943    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1944    Assume: sizes should be long enough to hold all the values.
1945 */
1946 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1947 {
1948   PetscInt  i,j,k,row;
1949   PetscBool flg;
1950 
1951   PetscFunctionBegin;
1952   for (i=0,j=0; i<n; j++) {
1953     row = idx[i];
1954     if (row%bs!=0) { /* Not the begining of a block */
1955       sizes[j] = 1;
1956       i++;
1957     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1958       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1959       i++;
1960     } else { /* Begining of the block, so check if the complete block exists */
1961       flg = PETSC_TRUE;
1962       for (k=1; k<bs; k++) {
1963         if (row+k != idx[i+k]) { /* break in the block */
1964           flg = PETSC_FALSE;
1965           break;
1966         }
1967       }
1968       if (flg) { /* No break in the bs */
1969         sizes[j] = bs;
1970         i       += bs;
1971       } else {
1972         sizes[j] = 1;
1973         i++;
1974       }
1975     }
1976   }
1977   *bs_max = j;
1978   PetscFunctionReturn(0);
1979 }
1980 
1981 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1982 {
1983   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
1984   PetscErrorCode    ierr;
1985   PetscInt          i,j,k,count,*rows;
1986   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1987   PetscScalar       zero = 0.0;
1988   MatScalar         *aa;
1989   const PetscScalar *xx;
1990   PetscScalar       *bb;
1991 
1992   PetscFunctionBegin;
1993   /* fix right hand side if needed */
1994   if (x && b) {
1995     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1996     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1997     for (i=0; i<is_n; i++) {
1998       bb[is_idx[i]] = diag*xx[is_idx[i]];
1999     }
2000     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2001     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2002   }
2003 
2004   /* Make a copy of the IS and  sort it */
2005   /* allocate memory for rows,sizes */
2006   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2007 
2008   /* copy IS values to rows, and sort them */
2009   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2010   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2011 
2012   if (baij->keepnonzeropattern) {
2013     for (i=0; i<is_n; i++) sizes[i] = 1;
2014     bs_max          = is_n;
2015   } else {
2016     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2017     A->nonzerostate++;
2018   }
2019 
2020   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2021     row = rows[j];
2022     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2023     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2024     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2025     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2026       if (diag != (PetscScalar)0.0) {
2027         if (baij->ilen[row/bs] > 0) {
2028           baij->ilen[row/bs]       = 1;
2029           baij->j[baij->i[row/bs]] = row/bs;
2030 
2031           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2032         }
2033         /* Now insert all the diagonal values for this bs */
2034         for (k=0; k<bs; k++) {
2035           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2036         }
2037       } else { /* (diag == 0.0) */
2038         baij->ilen[row/bs] = 0;
2039       } /* end (diag == 0.0) */
2040     } else { /* (sizes[i] != bs) */
2041 #if defined(PETSC_USE_DEBUG)
2042       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2043 #endif
2044       for (k=0; k<count; k++) {
2045         aa[0] =  zero;
2046         aa   += bs;
2047       }
2048       if (diag != (PetscScalar)0.0) {
2049         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2050       }
2051     }
2052   }
2053 
2054   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2055   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2056   PetscFunctionReturn(0);
2057 }
2058 
2059 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2060 {
2061   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2062   PetscErrorCode    ierr;
2063   PetscInt          i,j,k,count;
2064   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2065   PetscScalar       zero = 0.0;
2066   MatScalar         *aa;
2067   const PetscScalar *xx;
2068   PetscScalar       *bb;
2069   PetscBool         *zeroed,vecs = PETSC_FALSE;
2070 
2071   PetscFunctionBegin;
2072   /* fix right hand side if needed */
2073   if (x && b) {
2074     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2075     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2076     vecs = PETSC_TRUE;
2077   }
2078 
2079   /* zero the columns */
2080   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2081   for (i=0; i<is_n; i++) {
2082     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2083     zeroed[is_idx[i]] = PETSC_TRUE;
2084   }
2085   for (i=0; i<A->rmap->N; i++) {
2086     if (!zeroed[i]) {
2087       row = i/bs;
2088       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2089         for (k=0; k<bs; k++) {
2090           col = bs*baij->j[j] + k;
2091           if (zeroed[col]) {
2092             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2093             if (vecs) bb[i] -= aa[0]*xx[col];
2094             aa[0] = 0.0;
2095           }
2096         }
2097       }
2098     } else if (vecs) bb[i] = diag*xx[i];
2099   }
2100   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2101   if (vecs) {
2102     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2103     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2104   }
2105 
2106   /* zero the rows */
2107   for (i=0; i<is_n; i++) {
2108     row   = is_idx[i];
2109     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2110     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2111     for (k=0; k<count; k++) {
2112       aa[0] =  zero;
2113       aa   += bs;
2114     }
2115     if (diag != (PetscScalar)0.0) {
2116       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2117     }
2118   }
2119   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2120   PetscFunctionReturn(0);
2121 }
2122 
2123 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2124 {
2125   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2126   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2127   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2128   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2129   PetscErrorCode ierr;
2130   PetscInt       ridx,cidx,bs2=a->bs2;
2131   PetscBool      roworiented=a->roworiented;
2132   MatScalar      *ap,value,*aa=a->a,*bap;
2133 
2134   PetscFunctionBegin;
2135   for (k=0; k<m; k++) { /* loop over added rows */
2136     row  = im[k];
2137     brow = row/bs;
2138     if (row < 0) continue;
2139 #if defined(PETSC_USE_DEBUG)
2140     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2141 #endif
2142     rp   = aj + ai[brow];
2143     if (!A->structure_only) ap = aa + bs2*ai[brow];
2144     rmax = imax[brow];
2145     nrow = ailen[brow];
2146     low  = 0;
2147     high = nrow;
2148     for (l=0; l<n; l++) { /* loop over added columns */
2149       if (in[l] < 0) continue;
2150 #if defined(PETSC_USE_DEBUG)
2151       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2152 #endif
2153       col  = in[l]; bcol = col/bs;
2154       ridx = row % bs; cidx = col % bs;
2155       if (!A->structure_only) {
2156         if (roworiented) {
2157           value = v[l + k*n];
2158         } else {
2159           value = v[k + l*m];
2160         }
2161       }
2162       if (col <= lastcol) low = 0; else high = nrow;
2163       lastcol = col;
2164       while (high-low > 7) {
2165         t = (low+high)/2;
2166         if (rp[t] > bcol) high = t;
2167         else              low  = t;
2168       }
2169       for (i=low; i<high; i++) {
2170         if (rp[i] > bcol) break;
2171         if (rp[i] == bcol) {
2172           bap = ap +  bs2*i + bs*cidx + ridx;
2173           if (!A->structure_only) {
2174             if (is == ADD_VALUES) *bap += value;
2175             else                  *bap  = value;
2176           }
2177           goto noinsert1;
2178         }
2179       }
2180       if (nonew == 1) goto noinsert1;
2181       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2182       if (A->structure_only) {
2183         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2184       } else {
2185         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2186       }
2187       N = nrow++ - 1; high++;
2188       /* shift up all the later entries in this row */
2189       for (ii=N; ii>=i; ii--) {
2190         rp[ii+1] = rp[ii];
2191         if (!A->structure_only) {
2192           ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2193         }
2194       }
2195       if (N>=i && !A->structure_only) {
2196         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2197       }
2198       rp[i]                      = bcol;
2199       if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value;
2200       a->nz++;
2201       A->nonzerostate++;
2202 noinsert1:;
2203       low = i;
2204     }
2205     ailen[brow] = nrow;
2206   }
2207   PetscFunctionReturn(0);
2208 }
2209 
2210 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2211 {
2212   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2213   Mat            outA;
2214   PetscErrorCode ierr;
2215   PetscBool      row_identity,col_identity;
2216 
2217   PetscFunctionBegin;
2218   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2219   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2220   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2221   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2222 
2223   outA            = inA;
2224   inA->factortype = MAT_FACTOR_LU;
2225   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2226   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2227 
2228   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2229 
2230   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2231   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2232   a->row = row;
2233   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2234   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2235   a->col = col;
2236 
2237   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2238   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2239   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2240   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2241 
2242   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2243   if (!a->solve_work) {
2244     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2245     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2246   }
2247   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2248   PetscFunctionReturn(0);
2249 }
2250 
2251 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2252 {
2253   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2254   PetscInt    i,nz,mbs;
2255 
2256   PetscFunctionBegin;
2257   nz  = baij->maxnz;
2258   mbs = baij->mbs;
2259   for (i=0; i<nz; i++) {
2260     baij->j[i] = indices[i];
2261   }
2262   baij->nz = nz;
2263   for (i=0; i<mbs; i++) {
2264     baij->ilen[i] = baij->imax[i];
2265   }
2266   PetscFunctionReturn(0);
2267 }
2268 
2269 /*@
2270     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2271        in the matrix.
2272 
2273   Input Parameters:
2274 +  mat - the SeqBAIJ matrix
2275 -  indices - the column indices
2276 
2277   Level: advanced
2278 
2279   Notes:
2280     This can be called if you have precomputed the nonzero structure of the
2281   matrix and want to provide it to the matrix object to improve the performance
2282   of the MatSetValues() operation.
2283 
2284     You MUST have set the correct numbers of nonzeros per row in the call to
2285   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2286 
2287     MUST be called before any calls to MatSetValues();
2288 
2289 @*/
2290 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2291 {
2292   PetscErrorCode ierr;
2293 
2294   PetscFunctionBegin;
2295   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2296   PetscValidPointer(indices,2);
2297   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2298   PetscFunctionReturn(0);
2299 }
2300 
2301 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2302 {
2303   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2304   PetscErrorCode ierr;
2305   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2306   PetscReal      atmp;
2307   PetscScalar    *x,zero = 0.0;
2308   MatScalar      *aa;
2309   PetscInt       ncols,brow,krow,kcol;
2310 
2311   PetscFunctionBegin;
2312   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2313   bs  = A->rmap->bs;
2314   aa  = a->a;
2315   ai  = a->i;
2316   aj  = a->j;
2317   mbs = a->mbs;
2318 
2319   ierr = VecSet(v,zero);CHKERRQ(ierr);
2320   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2321   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2322   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2323   for (i=0; i<mbs; i++) {
2324     ncols = ai[1] - ai[0]; ai++;
2325     brow  = bs*i;
2326     for (j=0; j<ncols; j++) {
2327       for (kcol=0; kcol<bs; kcol++) {
2328         for (krow=0; krow<bs; krow++) {
2329           atmp = PetscAbsScalar(*aa);aa++;
2330           row  = brow + krow;   /* row index */
2331           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2332         }
2333       }
2334       aj++;
2335     }
2336   }
2337   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2338   PetscFunctionReturn(0);
2339 }
2340 
2341 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2342 {
2343   PetscErrorCode ierr;
2344 
2345   PetscFunctionBegin;
2346   /* If the two matrices have the same copy implementation, use fast copy. */
2347   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2348     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2349     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2350     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2351 
2352     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2353     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2354     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2355   } else {
2356     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2357   }
2358   PetscFunctionReturn(0);
2359 }
2360 
2361 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2362 {
2363   PetscErrorCode ierr;
2364 
2365   PetscFunctionBegin;
2366   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2371 {
2372   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2373 
2374   PetscFunctionBegin;
2375   *array = a->a;
2376   PetscFunctionReturn(0);
2377 }
2378 
2379 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2380 {
2381   PetscFunctionBegin;
2382   PetscFunctionReturn(0);
2383 }
2384 
2385 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2386 {
2387   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2388   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2389   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2390   PetscErrorCode ierr;
2391 
2392   PetscFunctionBegin;
2393   /* Set the number of nonzeros in the new matrix */
2394   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2395   PetscFunctionReturn(0);
2396 }
2397 
2398 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2399 {
2400   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2401   PetscErrorCode ierr;
2402   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2403   PetscBLASInt   one=1;
2404 
2405   PetscFunctionBegin;
2406   if (str == SAME_NONZERO_PATTERN) {
2407     PetscScalar  alpha = a;
2408     PetscBLASInt bnz;
2409     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2410     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2411     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2412   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2413     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2414   } else {
2415     Mat      B;
2416     PetscInt *nnz;
2417     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2418     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2419     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2420     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2421     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2422     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2423     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2424     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2425     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2426     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2427     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2428     ierr = PetscFree(nnz);CHKERRQ(ierr);
2429   }
2430   PetscFunctionReturn(0);
2431 }
2432 
2433 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2434 {
2435   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2436   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2437   MatScalar   *aa = a->a;
2438 
2439   PetscFunctionBegin;
2440   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2441   PetscFunctionReturn(0);
2442 }
2443 
2444 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2445 {
2446   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2447   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2448   MatScalar   *aa = a->a;
2449 
2450   PetscFunctionBegin;
2451   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 /*
2456     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2457 */
2458 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2459 {
2460   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2461   PetscErrorCode ierr;
2462   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2463   PetscInt       nz = a->i[m],row,*jj,mr,col;
2464 
2465   PetscFunctionBegin;
2466   *nn = n;
2467   if (!ia) PetscFunctionReturn(0);
2468   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2469   else {
2470     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2471     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2472     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2473     jj   = a->j;
2474     for (i=0; i<nz; i++) {
2475       collengths[jj[i]]++;
2476     }
2477     cia[0] = oshift;
2478     for (i=0; i<n; i++) {
2479       cia[i+1] = cia[i] + collengths[i];
2480     }
2481     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2482     jj   = a->j;
2483     for (row=0; row<m; row++) {
2484       mr = a->i[row+1] - a->i[row];
2485       for (i=0; i<mr; i++) {
2486         col = *jj++;
2487 
2488         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2489       }
2490     }
2491     ierr = PetscFree(collengths);CHKERRQ(ierr);
2492     *ia  = cia; *ja = cja;
2493   }
2494   PetscFunctionReturn(0);
2495 }
2496 
2497 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2498 {
2499   PetscErrorCode ierr;
2500 
2501   PetscFunctionBegin;
2502   if (!ia) PetscFunctionReturn(0);
2503   ierr = PetscFree(*ia);CHKERRQ(ierr);
2504   ierr = PetscFree(*ja);CHKERRQ(ierr);
2505   PetscFunctionReturn(0);
2506 }
2507 
2508 /*
2509  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2510  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2511  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2512  */
2513 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2514 {
2515   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2516   PetscErrorCode ierr;
2517   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2518   PetscInt       nz = a->i[m],row,*jj,mr,col;
2519   PetscInt       *cspidx;
2520 
2521   PetscFunctionBegin;
2522   *nn = n;
2523   if (!ia) PetscFunctionReturn(0);
2524 
2525   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2526   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2527   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2528   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2529   jj   = a->j;
2530   for (i=0; i<nz; i++) {
2531     collengths[jj[i]]++;
2532   }
2533   cia[0] = oshift;
2534   for (i=0; i<n; i++) {
2535     cia[i+1] = cia[i] + collengths[i];
2536   }
2537   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2538   jj   = a->j;
2539   for (row=0; row<m; row++) {
2540     mr = a->i[row+1] - a->i[row];
2541     for (i=0; i<mr; i++) {
2542       col = *jj++;
2543       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2544       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2545     }
2546   }
2547   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2548   *ia    = cia; *ja = cja;
2549   *spidx = cspidx;
2550   PetscFunctionReturn(0);
2551 }
2552 
2553 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2554 {
2555   PetscErrorCode ierr;
2556 
2557   PetscFunctionBegin;
2558   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2559   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2560   PetscFunctionReturn(0);
2561 }
2562 
2563 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2564 {
2565   PetscErrorCode ierr;
2566   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2567 
2568   PetscFunctionBegin;
2569   if (!Y->preallocated || !aij->nz) {
2570     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2571   }
2572   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2573   PetscFunctionReturn(0);
2574 }
2575 
2576 /* -------------------------------------------------------------------*/
2577 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2578                                        MatGetRow_SeqBAIJ,
2579                                        MatRestoreRow_SeqBAIJ,
2580                                        MatMult_SeqBAIJ_N,
2581                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2582                                        MatMultTranspose_SeqBAIJ,
2583                                        MatMultTransposeAdd_SeqBAIJ,
2584                                        0,
2585                                        0,
2586                                        0,
2587                                /* 10*/ 0,
2588                                        MatLUFactor_SeqBAIJ,
2589                                        0,
2590                                        0,
2591                                        MatTranspose_SeqBAIJ,
2592                                /* 15*/ MatGetInfo_SeqBAIJ,
2593                                        MatEqual_SeqBAIJ,
2594                                        MatGetDiagonal_SeqBAIJ,
2595                                        MatDiagonalScale_SeqBAIJ,
2596                                        MatNorm_SeqBAIJ,
2597                                /* 20*/ 0,
2598                                        MatAssemblyEnd_SeqBAIJ,
2599                                        MatSetOption_SeqBAIJ,
2600                                        MatZeroEntries_SeqBAIJ,
2601                                /* 24*/ MatZeroRows_SeqBAIJ,
2602                                        0,
2603                                        0,
2604                                        0,
2605                                        0,
2606                                /* 29*/ MatSetUp_SeqBAIJ,
2607                                        0,
2608                                        0,
2609                                        0,
2610                                        0,
2611                                /* 34*/ MatDuplicate_SeqBAIJ,
2612                                        0,
2613                                        0,
2614                                        MatILUFactor_SeqBAIJ,
2615                                        0,
2616                                /* 39*/ MatAXPY_SeqBAIJ,
2617                                        MatCreateSubMatrices_SeqBAIJ,
2618                                        MatIncreaseOverlap_SeqBAIJ,
2619                                        MatGetValues_SeqBAIJ,
2620                                        MatCopy_SeqBAIJ,
2621                                /* 44*/ 0,
2622                                        MatScale_SeqBAIJ,
2623                                        MatShift_SeqBAIJ,
2624                                        0,
2625                                        MatZeroRowsColumns_SeqBAIJ,
2626                                /* 49*/ 0,
2627                                        MatGetRowIJ_SeqBAIJ,
2628                                        MatRestoreRowIJ_SeqBAIJ,
2629                                        MatGetColumnIJ_SeqBAIJ,
2630                                        MatRestoreColumnIJ_SeqBAIJ,
2631                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2632                                        0,
2633                                        0,
2634                                        0,
2635                                        MatSetValuesBlocked_SeqBAIJ,
2636                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2637                                        MatDestroy_SeqBAIJ,
2638                                        MatView_SeqBAIJ,
2639                                        0,
2640                                        0,
2641                                /* 64*/ 0,
2642                                        0,
2643                                        0,
2644                                        0,
2645                                        0,
2646                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2647                                        0,
2648                                        MatConvert_Basic,
2649                                        0,
2650                                        0,
2651                                /* 74*/ 0,
2652                                        MatFDColoringApply_BAIJ,
2653                                        0,
2654                                        0,
2655                                        0,
2656                                /* 79*/ 0,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                        MatLoad_SeqBAIJ,
2661                                /* 84*/ 0,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                        0,
2666                                /* 89*/ 0,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                        0,
2671                                /* 94*/ 0,
2672                                        0,
2673                                        0,
2674                                        0,
2675                                        0,
2676                                /* 99*/ 0,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                        0,
2681                                /*104*/ 0,
2682                                        MatRealPart_SeqBAIJ,
2683                                        MatImaginaryPart_SeqBAIJ,
2684                                        0,
2685                                        0,
2686                                /*109*/ 0,
2687                                        0,
2688                                        0,
2689                                        0,
2690                                        MatMissingDiagonal_SeqBAIJ,
2691                                /*114*/ 0,
2692                                        0,
2693                                        0,
2694                                        0,
2695                                        0,
2696                                /*119*/ 0,
2697                                        0,
2698                                        MatMultHermitianTranspose_SeqBAIJ,
2699                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2700                                        0,
2701                                /*124*/ 0,
2702                                        0,
2703                                        MatInvertBlockDiagonal_SeqBAIJ,
2704                                        0,
2705                                        0,
2706                                /*129*/ 0,
2707                                        0,
2708                                        0,
2709                                        0,
2710                                        0,
2711                                /*134*/ 0,
2712                                        0,
2713                                        0,
2714                                        0,
2715                                        0,
2716                                /*139*/ MatSetBlockSizes_Default,
2717                                        0,
2718                                        0,
2719                                        MatFDColoringSetUp_SeqXAIJ,
2720                                        0,
2721                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2722 };
2723 
2724 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2725 {
2726   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2727   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2728   PetscErrorCode ierr;
2729 
2730   PetscFunctionBegin;
2731   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2732 
2733   /* allocate space for values if not already there */
2734   if (!aij->saved_values) {
2735     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2736     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2737   }
2738 
2739   /* copy values over */
2740   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2741   PetscFunctionReturn(0);
2742 }
2743 
2744 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2745 {
2746   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2747   PetscErrorCode ierr;
2748   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2749 
2750   PetscFunctionBegin;
2751   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2752   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2753 
2754   /* copy values over */
2755   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2756   PetscFunctionReturn(0);
2757 }
2758 
2759 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2760 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2761 
2762 static PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2763 {
2764   Mat_SeqBAIJ    *b;
2765   PetscErrorCode ierr;
2766   PetscInt       i,mbs,nbs,bs2;
2767   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2768 
2769   PetscFunctionBegin;
2770   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2771   if (nz == MAT_SKIP_ALLOCATION) {
2772     skipallocation = PETSC_TRUE;
2773     nz             = 0;
2774   }
2775 
2776   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2777   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2778   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2779   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2780 
2781   B->preallocated = PETSC_TRUE;
2782 
2783   mbs = B->rmap->n/bs;
2784   nbs = B->cmap->n/bs;
2785   bs2 = bs*bs;
2786 
2787   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2788 
2789   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2790   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2791   if (nnz) {
2792     for (i=0; i<mbs; i++) {
2793       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2794       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2795     }
2796   }
2797 
2798   b    = (Mat_SeqBAIJ*)B->data;
2799   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2800   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2801   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2802 
2803   if (!flg) {
2804     switch (bs) {
2805     case 1:
2806       B->ops->mult    = MatMult_SeqBAIJ_1;
2807       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2808       break;
2809     case 2:
2810       B->ops->mult    = MatMult_SeqBAIJ_2;
2811       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2812       break;
2813     case 3:
2814       B->ops->mult    = MatMult_SeqBAIJ_3;
2815       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2816       break;
2817     case 4:
2818       B->ops->mult    = MatMult_SeqBAIJ_4;
2819       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2820       break;
2821     case 5:
2822       B->ops->mult    = MatMult_SeqBAIJ_5;
2823       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2824       break;
2825     case 6:
2826       B->ops->mult    = MatMult_SeqBAIJ_6;
2827       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2828       break;
2829     case 7:
2830       B->ops->mult    = MatMult_SeqBAIJ_7;
2831       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2832       break;
2833     case 11:
2834       B->ops->mult    = MatMult_SeqBAIJ_11;
2835       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2836       break;
2837     case 15:
2838       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2839       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2840       break;
2841     default:
2842       B->ops->mult    = MatMult_SeqBAIJ_N;
2843       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2844       break;
2845     }
2846   }
2847   B->ops->sor = MatSOR_SeqBAIJ;
2848   b->mbs = mbs;
2849   b->nbs = nbs;
2850   if (!skipallocation) {
2851     if (!b->imax) {
2852       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2853       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2854 
2855       b->free_imax_ilen = PETSC_TRUE;
2856     }
2857     /* b->ilen will count nonzeros in each block row so far. */
2858     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2859     if (!nnz) {
2860       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2861       else if (nz < 0) nz = 1;
2862       for (i=0; i<mbs; i++) b->imax[i] = nz;
2863       nz = nz*mbs;
2864     } else {
2865       nz = 0;
2866       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2867     }
2868 
2869     /* allocate the matrix space */
2870     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2871     if (B->structure_only) {
2872       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
2873       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
2874       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
2875     } else {
2876       ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2877       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2878       ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2879     }
2880     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2881 
2882     if (B->structure_only) {
2883       b->singlemalloc = PETSC_FALSE;
2884       b->free_a       = PETSC_FALSE;
2885     } else {
2886       b->singlemalloc = PETSC_TRUE;
2887       b->free_a       = PETSC_TRUE;
2888     }
2889     b->free_ij = PETSC_TRUE;
2890 
2891     b->i[0] = 0;
2892     for (i=1; i<mbs+1; i++) {
2893       b->i[i] = b->i[i-1] + b->imax[i-1];
2894     }
2895 
2896   } else {
2897     b->free_a  = PETSC_FALSE;
2898     b->free_ij = PETSC_FALSE;
2899   }
2900 
2901   b->bs2              = bs2;
2902   b->mbs              = mbs;
2903   b->nz               = 0;
2904   b->maxnz            = nz;
2905   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2906   B->was_assembled    = PETSC_FALSE;
2907   B->assembled        = PETSC_FALSE;
2908   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2909   PetscFunctionReturn(0);
2910 }
2911 
2912 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2913 {
2914   PetscInt       i,m,nz,nz_max=0,*nnz;
2915   PetscScalar    *values=0;
2916   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2917   PetscErrorCode ierr;
2918 
2919   PetscFunctionBegin;
2920   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2921   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2922   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2923   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2924   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2925   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2926   m    = B->rmap->n/bs;
2927 
2928   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2929   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2930   for (i=0; i<m; i++) {
2931     nz = ii[i+1]- ii[i];
2932     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2933     nz_max = PetscMax(nz_max, nz);
2934     nnz[i] = nz;
2935   }
2936   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2937   ierr = PetscFree(nnz);CHKERRQ(ierr);
2938 
2939   values = (PetscScalar*)V;
2940   if (!values) {
2941     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2942   }
2943   for (i=0; i<m; i++) {
2944     PetscInt          ncols  = ii[i+1] - ii[i];
2945     const PetscInt    *icols = jj + ii[i];
2946     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2947     if (!roworiented) {
2948       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2949     } else {
2950       PetscInt j;
2951       for (j=0; j<ncols; j++) {
2952         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2953         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2954       }
2955     }
2956   }
2957   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2958   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2959   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2960   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2961   PetscFunctionReturn(0);
2962 }
2963 
2964 /*MC
2965    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2966    block sparse compressed row format.
2967 
2968    Options Database Keys:
2969 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2970 
2971   Level: beginner
2972 
2973 .seealso: MatCreateSeqBAIJ()
2974 M*/
2975 
2976 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
2977 
2978 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
2979 {
2980   PetscErrorCode ierr;
2981   PetscMPIInt    size;
2982   Mat_SeqBAIJ    *b;
2983 
2984   PetscFunctionBegin;
2985   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2986   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2987 
2988   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2989   B->data = (void*)b;
2990   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2991 
2992   b->row          = 0;
2993   b->col          = 0;
2994   b->icol         = 0;
2995   b->reallocs     = 0;
2996   b->saved_values = 0;
2997 
2998   b->roworiented        = PETSC_TRUE;
2999   b->nonew              = 0;
3000   b->diag               = 0;
3001   B->spptr              = 0;
3002   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3003   b->keepnonzeropattern = PETSC_FALSE;
3004 
3005   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3006   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3007   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3008   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3009   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3010   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3011   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3012   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3013   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3014 #if defined(PETSC_HAVE_HYPRE)
3015   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3016 #endif
3017   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3018   PetscFunctionReturn(0);
3019 }
3020 
3021 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3022 {
3023   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3024   PetscErrorCode ierr;
3025   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3026 
3027   PetscFunctionBegin;
3028   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3029 
3030   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3031     c->imax           = a->imax;
3032     c->ilen           = a->ilen;
3033     c->free_imax_ilen = PETSC_FALSE;
3034   } else {
3035     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3036     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3037     for (i=0; i<mbs; i++) {
3038       c->imax[i] = a->imax[i];
3039       c->ilen[i] = a->ilen[i];
3040     }
3041     c->free_imax_ilen = PETSC_TRUE;
3042   }
3043 
3044   /* allocate the matrix space */
3045   if (mallocmatspace) {
3046     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3047       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3048       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3049 
3050       c->i            = a->i;
3051       c->j            = a->j;
3052       c->singlemalloc = PETSC_FALSE;
3053       c->free_a       = PETSC_TRUE;
3054       c->free_ij      = PETSC_FALSE;
3055       c->parent       = A;
3056       C->preallocated = PETSC_TRUE;
3057       C->assembled    = PETSC_TRUE;
3058 
3059       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3060       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3061       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3062     } else {
3063       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3064       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3065 
3066       c->singlemalloc = PETSC_TRUE;
3067       c->free_a       = PETSC_TRUE;
3068       c->free_ij      = PETSC_TRUE;
3069 
3070       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3071       if (mbs > 0) {
3072         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3073         if (cpvalues == MAT_COPY_VALUES) {
3074           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3075         } else {
3076           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3077         }
3078       }
3079       C->preallocated = PETSC_TRUE;
3080       C->assembled    = PETSC_TRUE;
3081     }
3082   }
3083 
3084   c->roworiented = a->roworiented;
3085   c->nonew       = a->nonew;
3086 
3087   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3088   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3089 
3090   c->bs2         = a->bs2;
3091   c->mbs         = a->mbs;
3092   c->nbs         = a->nbs;
3093 
3094   if (a->diag) {
3095     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3096       c->diag      = a->diag;
3097       c->free_diag = PETSC_FALSE;
3098     } else {
3099       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3100       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3101       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3102       c->free_diag = PETSC_TRUE;
3103     }
3104   } else c->diag = 0;
3105 
3106   c->nz         = a->nz;
3107   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3108   c->solve_work = NULL;
3109   c->mult_work  = NULL;
3110   c->sor_workt  = NULL;
3111   c->sor_work   = NULL;
3112 
3113   c->compressedrow.use   = a->compressedrow.use;
3114   c->compressedrow.nrows = a->compressedrow.nrows;
3115   if (a->compressedrow.use) {
3116     i    = a->compressedrow.nrows;
3117     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3118     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3119     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3120     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3121   } else {
3122     c->compressedrow.use    = PETSC_FALSE;
3123     c->compressedrow.i      = NULL;
3124     c->compressedrow.rindex = NULL;
3125   }
3126   C->nonzerostate = A->nonzerostate;
3127 
3128   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3129   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3130   PetscFunctionReturn(0);
3131 }
3132 
3133 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3134 {
3135   PetscErrorCode ierr;
3136 
3137   PetscFunctionBegin;
3138   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3139   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3140   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3141   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3142   PetscFunctionReturn(0);
3143 }
3144 
3145 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3146 {
3147   Mat_SeqBAIJ    *a;
3148   PetscErrorCode ierr;
3149   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3150   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3151   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3152   PetscInt       *masked,nmask,tmp,bs2,ishift;
3153   PetscMPIInt    size;
3154   int            fd;
3155   PetscScalar    *aa;
3156   MPI_Comm       comm;
3157 
3158   PetscFunctionBegin;
3159   /* force binary viewer to load .info file if it has not yet done so */
3160   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3161   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3162   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3163   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3164   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3165   if (bs < 0) bs = 1;
3166   bs2  = bs*bs;
3167 
3168   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3169   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3170   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3171   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3172   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3173   M = header[1]; N = header[2]; nz = header[3];
3174 
3175   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3176   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3177 
3178   /*
3179      This code adds extra rows to make sure the number of rows is
3180     divisible by the blocksize
3181   */
3182   mbs        = M/bs;
3183   extra_rows = bs - M + bs*(mbs);
3184   if (extra_rows == bs) extra_rows = 0;
3185   else mbs++;
3186   if (extra_rows) {
3187     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3188   }
3189 
3190   /* Set global sizes if not already set */
3191   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3192     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3193   } else { /* Check if the matrix global sizes are correct */
3194     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3195     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3196       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3197     }
3198     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3199   }
3200 
3201   /* read in row lengths */
3202   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3203   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3204   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3205 
3206   /* read in column indices */
3207   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3208   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3209   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3210 
3211   /* loop over row lengths determining block row lengths */
3212   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3213   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3214   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3215   rowcount = 0;
3216   nzcount  = 0;
3217   for (i=0; i<mbs; i++) {
3218     nmask = 0;
3219     for (j=0; j<bs; j++) {
3220       kmax = rowlengths[rowcount];
3221       for (k=0; k<kmax; k++) {
3222         tmp = jj[nzcount++]/bs;
3223         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3224       }
3225       rowcount++;
3226     }
3227     browlengths[i] += nmask;
3228     /* zero out the mask elements we set */
3229     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3230   }
3231 
3232   /* Do preallocation  */
3233   ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr);
3234   a    = (Mat_SeqBAIJ*)newmat->data;
3235 
3236   /* set matrix "i" values */
3237   a->i[0] = 0;
3238   for (i=1; i<= mbs; i++) {
3239     a->i[i]      = a->i[i-1] + browlengths[i-1];
3240     a->ilen[i-1] = browlengths[i-1];
3241   }
3242   a->nz = 0;
3243   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3244 
3245   /* read in nonzero values */
3246   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3247   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3248   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3249 
3250   /* set "a" and "j" values into matrix */
3251   nzcount = 0; jcount = 0;
3252   for (i=0; i<mbs; i++) {
3253     nzcountb = nzcount;
3254     nmask    = 0;
3255     for (j=0; j<bs; j++) {
3256       kmax = rowlengths[i*bs+j];
3257       for (k=0; k<kmax; k++) {
3258         tmp = jj[nzcount++]/bs;
3259         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3260       }
3261     }
3262     /* sort the masked values */
3263     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3264 
3265     /* set "j" values into matrix */
3266     maskcount = 1;
3267     for (j=0; j<nmask; j++) {
3268       a->j[jcount++]  = masked[j];
3269       mask[masked[j]] = maskcount++;
3270     }
3271     /* set "a" values into matrix */
3272     ishift = bs2*a->i[i];
3273     for (j=0; j<bs; j++) {
3274       kmax = rowlengths[i*bs+j];
3275       for (k=0; k<kmax; k++) {
3276         tmp       = jj[nzcountb]/bs;
3277         block     = mask[tmp] - 1;
3278         point     = jj[nzcountb] - bs*tmp;
3279         idx       = ishift + bs2*block + j + bs*point;
3280         a->a[idx] = (MatScalar)aa[nzcountb++];
3281       }
3282     }
3283     /* zero out the mask elements we set */
3284     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3285   }
3286   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3287 
3288   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3289   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3290   ierr = PetscFree(aa);CHKERRQ(ierr);
3291   ierr = PetscFree(jj);CHKERRQ(ierr);
3292   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3293 
3294   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3295   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3296   PetscFunctionReturn(0);
3297 }
3298 
3299 /*@C
3300    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3301    compressed row) format.  For good matrix assembly performance the
3302    user should preallocate the matrix storage by setting the parameter nz
3303    (or the array nnz).  By setting these parameters accurately, performance
3304    during matrix assembly can be increased by more than a factor of 50.
3305 
3306    Collective on MPI_Comm
3307 
3308    Input Parameters:
3309 +  comm - MPI communicator, set to PETSC_COMM_SELF
3310 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3311           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3312 .  m - number of rows
3313 .  n - number of columns
3314 .  nz - number of nonzero blocks  per block row (same for all rows)
3315 -  nnz - array containing the number of nonzero blocks in the various block rows
3316          (possibly different for each block row) or NULL
3317 
3318    Output Parameter:
3319 .  A - the matrix
3320 
3321    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3322    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3323    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3324 
3325    Options Database Keys:
3326 .   -mat_no_unroll - uses code that does not unroll the loops in the
3327                      block calculations (much slower)
3328 .    -mat_block_size - size of the blocks to use
3329 
3330    Level: intermediate
3331 
3332    Notes:
3333    The number of rows and columns must be divisible by blocksize.
3334 
3335    If the nnz parameter is given then the nz parameter is ignored
3336 
3337    A nonzero block is any block that as 1 or more nonzeros in it
3338 
3339    The block AIJ format is fully compatible with standard Fortran 77
3340    storage.  That is, the stored row and column indices can begin at
3341    either one (as in Fortran) or zero.  See the users' manual for details.
3342 
3343    Specify the preallocated storage with either nz or nnz (not both).
3344    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3345    allocation.  See Users-Manual: ch_mat for details.
3346    matrices.
3347 
3348 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3349 @*/
3350 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3351 {
3352   PetscErrorCode ierr;
3353 
3354   PetscFunctionBegin;
3355   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3356   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3357   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3358   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3359   PetscFunctionReturn(0);
3360 }
3361 
3362 /*@C
3363    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3364    per row in the matrix. For good matrix assembly performance the
3365    user should preallocate the matrix storage by setting the parameter nz
3366    (or the array nnz).  By setting these parameters accurately, performance
3367    during matrix assembly can be increased by more than a factor of 50.
3368 
3369    Collective on MPI_Comm
3370 
3371    Input Parameters:
3372 +  B - the matrix
3373 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3374           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3375 .  nz - number of block nonzeros per block row (same for all rows)
3376 -  nnz - array containing the number of block nonzeros in the various block rows
3377          (possibly different for each block row) or NULL
3378 
3379    Options Database Keys:
3380 .   -mat_no_unroll - uses code that does not unroll the loops in the
3381                      block calculations (much slower)
3382 .   -mat_block_size - size of the blocks to use
3383 
3384    Level: intermediate
3385 
3386    Notes:
3387    If the nnz parameter is given then the nz parameter is ignored
3388 
3389    You can call MatGetInfo() to get information on how effective the preallocation was;
3390    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3391    You can also run with the option -info and look for messages with the string
3392    malloc in them to see if additional memory allocation was needed.
3393 
3394    The block AIJ format is fully compatible with standard Fortran 77
3395    storage.  That is, the stored row and column indices can begin at
3396    either one (as in Fortran) or zero.  See the users' manual for details.
3397 
3398    Specify the preallocated storage with either nz or nnz (not both).
3399    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3400    allocation.  See Users-Manual: ch_mat for details.
3401 
3402 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3403 @*/
3404 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3405 {
3406   PetscErrorCode ierr;
3407 
3408   PetscFunctionBegin;
3409   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3410   PetscValidType(B,1);
3411   PetscValidLogicalCollectiveInt(B,bs,2);
3412   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3413   PetscFunctionReturn(0);
3414 }
3415 
3416 /*@C
3417    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3418    (the default sequential PETSc format).
3419 
3420    Collective on MPI_Comm
3421 
3422    Input Parameters:
3423 +  B - the matrix
3424 .  i - the indices into j for the start of each local row (starts with zero)
3425 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3426 -  v - optional values in the matrix
3427 
3428    Level: developer
3429 
3430    Notes:
3431    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3432    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3433    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3434    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3435    block column and the second index is over columns within a block.
3436 
3437 .keywords: matrix, aij, compressed row, sparse
3438 
3439 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3440 @*/
3441 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3442 {
3443   PetscErrorCode ierr;
3444 
3445   PetscFunctionBegin;
3446   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3447   PetscValidType(B,1);
3448   PetscValidLogicalCollectiveInt(B,bs,2);
3449   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3450   PetscFunctionReturn(0);
3451 }
3452 
3453 
3454 /*@
3455      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3456 
3457      Collective on MPI_Comm
3458 
3459    Input Parameters:
3460 +  comm - must be an MPI communicator of size 1
3461 .  bs - size of block
3462 .  m - number of rows
3463 .  n - number of columns
3464 .  i - row indices
3465 .  j - column indices
3466 -  a - matrix values
3467 
3468    Output Parameter:
3469 .  mat - the matrix
3470 
3471    Level: advanced
3472 
3473    Notes:
3474        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3475     once the matrix is destroyed
3476 
3477        You cannot set new nonzero locations into this matrix, that will generate an error.
3478 
3479        The i and j indices are 0 based
3480 
3481        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3482 
3483       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3484       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3485       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3486       with column-major ordering within blocks.
3487 
3488 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3489 
3490 @*/
3491 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3492 {
3493   PetscErrorCode ierr;
3494   PetscInt       ii;
3495   Mat_SeqBAIJ    *baij;
3496 
3497   PetscFunctionBegin;
3498   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3499   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3500 
3501   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3502   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3503   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3504   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3505   baij = (Mat_SeqBAIJ*)(*mat)->data;
3506   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3507   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3508 
3509   baij->i = i;
3510   baij->j = j;
3511   baij->a = a;
3512 
3513   baij->singlemalloc = PETSC_FALSE;
3514   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3515   baij->free_a       = PETSC_FALSE;
3516   baij->free_ij      = PETSC_FALSE;
3517 
3518   for (ii=0; ii<m; ii++) {
3519     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3520 #if defined(PETSC_USE_DEBUG)
3521     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3522 #endif
3523   }
3524 #if defined(PETSC_USE_DEBUG)
3525   for (ii=0; ii<baij->i[m]; ii++) {
3526     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3527     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3528   }
3529 #endif
3530 
3531   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3532   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3533   PetscFunctionReturn(0);
3534 }
3535 
3536 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3537 {
3538   PetscErrorCode ierr;
3539   PetscMPIInt    size;
3540 
3541   PetscFunctionBegin;
3542   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3543   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3544     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3545   } else {
3546     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3547   }
3548   PetscFunctionReturn(0);
3549 }
3550