xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 7fa8f2d3c229ab24d2648dd75b628cc252fb1e95)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
21 
22   PetscFunctionBegin;
23   allowzeropivot = PetscNot(A->erroriffailure);
24 
25   if (a->idiagvalid) {
26     if (values) *values = a->idiag;
27     PetscFunctionReturn(0);
28   }
29   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
30   diag_offset = a->diag;
31   if (!a->idiag) {
32     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
33     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
34   }
35   diag  = a->idiag;
36   mdiag = a->idiag+bs2*mbs;
37   if (values) *values = a->idiag;
38   /* factor and invert each block */
39   switch (bs) {
40   case 1:
41     for (i=0; i<mbs; i++) {
42       odiag    = v + 1*diag_offset[i];
43       diag[0]  = odiag[0];
44       mdiag[0] = odiag[0];
45 
46       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
47         if (allowzeropivot) {
48           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
49           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
50           A->factorerror_zeropivot_row   = i;
51           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
52         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
53       }
54 
55       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
56       diag    += 1;
57       mdiag   += 1;
58     }
59     break;
60   case 2:
61     for (i=0; i<mbs; i++) {
62       odiag    = v + 4*diag_offset[i];
63       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
64       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
65       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
66       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
67       diag    += 4;
68       mdiag   += 4;
69     }
70     break;
71   case 3:
72     for (i=0; i<mbs; i++) {
73       odiag    = v + 9*diag_offset[i];
74       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
75       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
76       diag[8]  = odiag[8];
77       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
78       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
79       mdiag[8] = odiag[8];
80       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
81       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
82       diag    += 9;
83       mdiag   += 9;
84     }
85     break;
86   case 4:
87     for (i=0; i<mbs; i++) {
88       odiag  = v + 16*diag_offset[i];
89       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
90       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
91       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
92       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
93       diag  += 16;
94       mdiag += 16;
95     }
96     break;
97   case 5:
98     for (i=0; i<mbs; i++) {
99       odiag  = v + 25*diag_offset[i];
100       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
101       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
103       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
104       diag  += 25;
105       mdiag += 25;
106     }
107     break;
108   case 6:
109     for (i=0; i<mbs; i++) {
110       odiag  = v + 36*diag_offset[i];
111       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
112       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
113       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
114       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
115       diag  += 36;
116       mdiag += 36;
117     }
118     break;
119   case 7:
120     for (i=0; i<mbs; i++) {
121       odiag  = v + 49*diag_offset[i];
122       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
123       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
124       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
125       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
126       diag  += 49;
127       mdiag += 49;
128     }
129     break;
130   default:
131     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
132     for (i=0; i<mbs; i++) {
133       odiag  = v + bs2*diag_offset[i];
134       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
135       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
136       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
137       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
138       diag  += bs2;
139       mdiag += bs2;
140     }
141     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
142   }
143   a->idiagvalid = PETSC_TRUE;
144   PetscFunctionReturn(0);
145 }
146 
147 #undef __FUNCT__
148 #define __FUNCT__ "MatSOR_SeqBAIJ"
149 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
150 {
151   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
152   PetscScalar       *x,*work,*w,*workt,*t;
153   const MatScalar   *v,*aa = a->a, *idiag;
154   const PetscScalar *b,*xb;
155   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
156   PetscErrorCode    ierr;
157   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
158   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
159 
160   PetscFunctionBegin;
161   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
162   its = its*lits;
163   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
164   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
165   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
166   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
167   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
168 
169   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
170 
171   if (!m) PetscFunctionReturn(0);
172   diag  = a->diag;
173   idiag = a->idiag;
174   k    = PetscMax(A->rmap->n,A->cmap->n);
175   if (!a->mult_work) {
176     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
177   }
178   if (!a->sor_workt) {
179     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
180   }
181   if (!a->sor_work) {
182     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
183   }
184   work = a->mult_work;
185   t    = a->sor_workt;
186   w    = a->sor_work;
187 
188   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
189   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
190 
191   if (flag & SOR_ZERO_INITIAL_GUESS) {
192     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
193       switch (bs) {
194       case 1:
195         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
196         t[0] = b[0];
197         i2     = 1;
198         idiag += 1;
199         for (i=1; i<m; i++) {
200           v  = aa + ai[i];
201           vi = aj + ai[i];
202           nz = diag[i] - ai[i];
203           s[0] = b[i2];
204           for (j=0; j<nz; j++) {
205             xw[0] = x[vi[j]];
206             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
207           }
208           t[i2] = s[0];
209           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
210           x[i2]  = xw[0];
211           idiag += 1;
212           i2    += 1;
213         }
214         break;
215       case 2:
216         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
217         t[0] = b[0]; t[1] = b[1];
218         i2     = 2;
219         idiag += 4;
220         for (i=1; i<m; i++) {
221           v  = aa + 4*ai[i];
222           vi = aj + ai[i];
223           nz = diag[i] - ai[i];
224           s[0] = b[i2]; s[1] = b[i2+1];
225           for (j=0; j<nz; j++) {
226             idx = 2*vi[j];
227             it  = 4*j;
228             xw[0] = x[idx]; xw[1] = x[1+idx];
229             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
230           }
231           t[i2] = s[0]; t[i2+1] = s[1];
232           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
233           x[i2]   = xw[0]; x[i2+1] = xw[1];
234           idiag  += 4;
235           i2     += 2;
236         }
237         break;
238       case 3:
239         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
240         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
241         i2     = 3;
242         idiag += 9;
243         for (i=1; i<m; i++) {
244           v  = aa + 9*ai[i];
245           vi = aj + ai[i];
246           nz = diag[i] - ai[i];
247           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
248           while (nz--) {
249             idx = 3*(*vi++);
250             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
251             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
252             v  += 9;
253           }
254           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
255           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
256           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
257           idiag  += 9;
258           i2     += 3;
259         }
260         break;
261       case 4:
262         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
263         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
264         i2     = 4;
265         idiag += 16;
266         for (i=1; i<m; i++) {
267           v  = aa + 16*ai[i];
268           vi = aj + ai[i];
269           nz = diag[i] - ai[i];
270           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
271           while (nz--) {
272             idx = 4*(*vi++);
273             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
274             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
275             v  += 16;
276           }
277           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
278           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
279           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
280           idiag  += 16;
281           i2     += 4;
282         }
283         break;
284       case 5:
285         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
286         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
287         i2     = 5;
288         idiag += 25;
289         for (i=1; i<m; i++) {
290           v  = aa + 25*ai[i];
291           vi = aj + ai[i];
292           nz = diag[i] - ai[i];
293           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
294           while (nz--) {
295             idx = 5*(*vi++);
296             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
297             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
298             v  += 25;
299           }
300           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
301           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
302           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
303           idiag  += 25;
304           i2     += 5;
305         }
306         break;
307       case 6:
308         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
309         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
310         i2     = 6;
311         idiag += 36;
312         for (i=1; i<m; i++) {
313           v  = aa + 36*ai[i];
314           vi = aj + ai[i];
315           nz = diag[i] - ai[i];
316           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
317           while (nz--) {
318             idx = 6*(*vi++);
319             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
320             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
321             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
322             v  += 36;
323           }
324           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
325           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
326           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
327           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
328           idiag  += 36;
329           i2     += 6;
330         }
331         break;
332       case 7:
333         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
334         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
335         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
336         i2     = 7;
337         idiag += 49;
338         for (i=1; i<m; i++) {
339           v  = aa + 49*ai[i];
340           vi = aj + ai[i];
341           nz = diag[i] - ai[i];
342           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
343           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
344           while (nz--) {
345             idx = 7*(*vi++);
346             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
347             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
348             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
349             v  += 49;
350           }
351           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
352           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
353           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
354           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
355           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
356           idiag  += 49;
357           i2     += 7;
358         }
359         break;
360       default:
361         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
362         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
363         i2     = bs;
364         idiag += bs2;
365         for (i=1; i<m; i++) {
366           v  = aa + bs2*ai[i];
367           vi = aj + ai[i];
368           nz = diag[i] - ai[i];
369 
370           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
371           /* copy all rows of x that are needed into contiguous space */
372           workt = work;
373           for (j=0; j<nz; j++) {
374             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
375             workt += bs;
376           }
377           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
378           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
379           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
380 
381           idiag += bs2;
382           i2    += bs;
383         }
384         break;
385       }
386       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
387       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
388       xb = t;
389     }
390     else xb = b;
391     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
392       idiag = a->idiag+bs2*(a->mbs-1);
393       i2 = bs * (m-1);
394       switch (bs) {
395       case 1:
396         s[0]  = xb[i2];
397         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
398         x[i2] = xw[0];
399         i2   -= 1;
400         for (i=m-2; i>=0; i--) {
401           v  = aa + (diag[i]+1);
402           vi = aj + diag[i] + 1;
403           nz = ai[i+1] - diag[i] - 1;
404           s[0] = xb[i2];
405           for (j=0; j<nz; j++) {
406             xw[0] = x[vi[j]];
407             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
408           }
409           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
410           x[i2]  = xw[0];
411           idiag -= 1;
412           i2    -= 1;
413         }
414         break;
415       case 2:
416         s[0]  = xb[i2]; s[1] = xb[i2+1];
417         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
418         x[i2] = xw[0]; x[i2+1] = xw[1];
419         i2    -= 2;
420         idiag -= 4;
421         for (i=m-2; i>=0; i--) {
422           v  = aa + 4*(diag[i] + 1);
423           vi = aj + diag[i] + 1;
424           nz = ai[i+1] - diag[i] - 1;
425           s[0] = xb[i2]; s[1] = xb[i2+1];
426           for (j=0; j<nz; j++) {
427             idx = 2*vi[j];
428             it  = 4*j;
429             xw[0] = x[idx]; xw[1] = x[1+idx];
430             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
431           }
432           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
433           x[i2]   = xw[0]; x[i2+1] = xw[1];
434           idiag  -= 4;
435           i2     -= 2;
436         }
437         break;
438       case 3:
439         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
440         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
441         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
442         i2    -= 3;
443         idiag -= 9;
444         for (i=m-2; i>=0; i--) {
445           v  = aa + 9*(diag[i]+1);
446           vi = aj + diag[i] + 1;
447           nz = ai[i+1] - diag[i] - 1;
448           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
449           while (nz--) {
450             idx = 3*(*vi++);
451             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
452             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
453             v  += 9;
454           }
455           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
456           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
457           idiag  -= 9;
458           i2     -= 3;
459         }
460         break;
461       case 4:
462         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
463         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
464         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
465         i2    -= 4;
466         idiag -= 16;
467         for (i=m-2; i>=0; i--) {
468           v  = aa + 16*(diag[i]+1);
469           vi = aj + diag[i] + 1;
470           nz = ai[i+1] - diag[i] - 1;
471           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
472           while (nz--) {
473             idx = 4*(*vi++);
474             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
475             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
476             v  += 16;
477           }
478           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
479           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
480           idiag  -= 16;
481           i2     -= 4;
482         }
483         break;
484       case 5:
485         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
486         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
487         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
488         i2    -= 5;
489         idiag -= 25;
490         for (i=m-2; i>=0; i--) {
491           v  = aa + 25*(diag[i]+1);
492           vi = aj + diag[i] + 1;
493           nz = ai[i+1] - diag[i] - 1;
494           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
495           while (nz--) {
496             idx = 5*(*vi++);
497             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
498             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
499             v  += 25;
500           }
501           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
502           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
503           idiag  -= 25;
504           i2     -= 5;
505         }
506         break;
507       case 6:
508         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
509         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
510         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
511         i2    -= 6;
512         idiag -= 36;
513         for (i=m-2; i>=0; i--) {
514           v  = aa + 36*(diag[i]+1);
515           vi = aj + diag[i] + 1;
516           nz = ai[i+1] - diag[i] - 1;
517           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
518           while (nz--) {
519             idx = 6*(*vi++);
520             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
521             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
522             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
523             v  += 36;
524           }
525           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
526           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
527           idiag  -= 36;
528           i2     -= 6;
529         }
530         break;
531       case 7:
532         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
533         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
534         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
535         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
536         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
537         i2    -= 7;
538         idiag -= 49;
539         for (i=m-2; i>=0; i--) {
540           v  = aa + 49*(diag[i]+1);
541           vi = aj + diag[i] + 1;
542           nz = ai[i+1] - diag[i] - 1;
543           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
544           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
545           while (nz--) {
546             idx = 7*(*vi++);
547             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
548             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
549             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
550             v  += 49;
551           }
552           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
553           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
554           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
555           idiag  -= 49;
556           i2     -= 7;
557         }
558         break;
559       default:
560         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
561         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
562         i2    -= bs;
563         idiag -= bs2;
564         for (i=m-2; i>=0; i--) {
565           v  = aa + bs2*(diag[i]+1);
566           vi = aj + diag[i] + 1;
567           nz = ai[i+1] - diag[i] - 1;
568 
569           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
570           /* copy all rows of x that are needed into contiguous space */
571           workt = work;
572           for (j=0; j<nz; j++) {
573             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
574             workt += bs;
575           }
576           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
577           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
578 
579           idiag -= bs2;
580           i2    -= bs;
581         }
582         break;
583       }
584       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
585     }
586     its--;
587   }
588   while (its--) {
589     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
590       idiag = a->idiag;
591       i2 = 0;
592       switch (bs) {
593       case 1:
594         for (i=0; i<m; i++) {
595           v  = aa + ai[i];
596           vi = aj + ai[i];
597           nz = ai[i+1] - ai[i];
598           s[0] = b[i2];
599           for (j=0; j<nz; j++) {
600             xw[0] = x[vi[j]];
601             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
602           }
603           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
604           x[i2] += xw[0];
605           idiag += 1;
606           i2    += 1;
607         }
608         break;
609       case 2:
610         for (i=0; i<m; i++) {
611           v  = aa + 4*ai[i];
612           vi = aj + ai[i];
613           nz = ai[i+1] - ai[i];
614           s[0] = b[i2]; s[1] = b[i2+1];
615           for (j=0; j<nz; j++) {
616             idx = 2*vi[j];
617             it  = 4*j;
618             xw[0] = x[idx]; xw[1] = x[1+idx];
619             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
620           }
621           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
622           x[i2]  += xw[0]; x[i2+1] += xw[1];
623           idiag  += 4;
624           i2     += 2;
625         }
626         break;
627       case 3:
628         for (i=0; i<m; i++) {
629           v  = aa + 9*ai[i];
630           vi = aj + ai[i];
631           nz = ai[i+1] - ai[i];
632           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
633           while (nz--) {
634             idx = 3*(*vi++);
635             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
636             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
637             v  += 9;
638           }
639           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
640           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
641           idiag  += 9;
642           i2     += 3;
643         }
644         break;
645       case 4:
646         for (i=0; i<m; i++) {
647           v  = aa + 16*ai[i];
648           vi = aj + ai[i];
649           nz = ai[i+1] - ai[i];
650           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
651           while (nz--) {
652             idx = 4*(*vi++);
653             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
654             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
655             v  += 16;
656           }
657           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
658           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
659           idiag  += 16;
660           i2     += 4;
661         }
662         break;
663       case 5:
664         for (i=0; i<m; i++) {
665           v  = aa + 25*ai[i];
666           vi = aj + ai[i];
667           nz = ai[i+1] - ai[i];
668           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
669           while (nz--) {
670             idx = 5*(*vi++);
671             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
672             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
673             v  += 25;
674           }
675           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
676           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
677           idiag  += 25;
678           i2     += 5;
679         }
680         break;
681       case 6:
682         for (i=0; i<m; i++) {
683           v  = aa + 36*ai[i];
684           vi = aj + ai[i];
685           nz = ai[i+1] - ai[i];
686           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
687           while (nz--) {
688             idx = 6*(*vi++);
689             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
690             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
691             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
692             v  += 36;
693           }
694           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
695           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
696           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
697           idiag  += 36;
698           i2     += 6;
699         }
700         break;
701       case 7:
702         for (i=0; i<m; i++) {
703           v  = aa + 49*ai[i];
704           vi = aj + ai[i];
705           nz = ai[i+1] - ai[i];
706           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
707           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
708           while (nz--) {
709             idx = 7*(*vi++);
710             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
711             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
712             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
713             v  += 49;
714           }
715           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
716           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
717           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
718           idiag  += 49;
719           i2     += 7;
720         }
721         break;
722       default:
723         for (i=0; i<m; i++) {
724           v  = aa + bs2*ai[i];
725           vi = aj + ai[i];
726           nz = ai[i+1] - ai[i];
727 
728           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
729           /* copy all rows of x that are needed into contiguous space */
730           workt = work;
731           for (j=0; j<nz; j++) {
732             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
733             workt += bs;
734           }
735           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
736           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
737 
738           idiag += bs2;
739           i2    += bs;
740         }
741         break;
742       }
743       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
744     }
745     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
746       idiag = a->idiag+bs2*(a->mbs-1);
747       i2 = bs * (m-1);
748       switch (bs) {
749       case 1:
750         for (i=m-1; i>=0; i--) {
751           v  = aa + ai[i];
752           vi = aj + ai[i];
753           nz = ai[i+1] - ai[i];
754           s[0] = b[i2];
755           for (j=0; j<nz; j++) {
756             xw[0] = x[vi[j]];
757             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
758           }
759           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
760           x[i2] += xw[0];
761           idiag -= 1;
762           i2    -= 1;
763         }
764         break;
765       case 2:
766         for (i=m-1; i>=0; i--) {
767           v  = aa + 4*ai[i];
768           vi = aj + ai[i];
769           nz = ai[i+1] - ai[i];
770           s[0] = b[i2]; s[1] = b[i2+1];
771           for (j=0; j<nz; j++) {
772             idx = 2*vi[j];
773             it  = 4*j;
774             xw[0] = x[idx]; xw[1] = x[1+idx];
775             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
776           }
777           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
778           x[i2]  += xw[0]; x[i2+1] += xw[1];
779           idiag  -= 4;
780           i2     -= 2;
781         }
782         break;
783       case 3:
784         for (i=m-1; i>=0; i--) {
785           v  = aa + 9*ai[i];
786           vi = aj + ai[i];
787           nz = ai[i+1] - ai[i];
788           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
789           while (nz--) {
790             idx = 3*(*vi++);
791             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
792             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
793             v  += 9;
794           }
795           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
796           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
797           idiag  -= 9;
798           i2     -= 3;
799         }
800         break;
801       case 4:
802         for (i=m-1; i>=0; i--) {
803           v  = aa + 16*ai[i];
804           vi = aj + ai[i];
805           nz = ai[i+1] - ai[i];
806           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
807           while (nz--) {
808             idx = 4*(*vi++);
809             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
810             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
811             v  += 16;
812           }
813           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
814           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
815           idiag  -= 16;
816           i2     -= 4;
817         }
818         break;
819       case 5:
820         for (i=m-1; i>=0; i--) {
821           v  = aa + 25*ai[i];
822           vi = aj + ai[i];
823           nz = ai[i+1] - ai[i];
824           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
825           while (nz--) {
826             idx = 5*(*vi++);
827             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
828             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
829             v  += 25;
830           }
831           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
832           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
833           idiag  -= 25;
834           i2     -= 5;
835         }
836         break;
837       case 6:
838         for (i=m-1; i>=0; i--) {
839           v  = aa + 36*ai[i];
840           vi = aj + ai[i];
841           nz = ai[i+1] - ai[i];
842           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
843           while (nz--) {
844             idx = 6*(*vi++);
845             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
846             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
847             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
848             v  += 36;
849           }
850           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
851           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
852           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
853           idiag  -= 36;
854           i2     -= 6;
855         }
856         break;
857       case 7:
858         for (i=m-1; i>=0; i--) {
859           v  = aa + 49*ai[i];
860           vi = aj + ai[i];
861           nz = ai[i+1] - ai[i];
862           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
863           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
864           while (nz--) {
865             idx = 7*(*vi++);
866             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
867             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
868             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
869             v  += 49;
870           }
871           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
872           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
873           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
874           idiag  -= 49;
875           i2     -= 7;
876         }
877         break;
878       default:
879         for (i=m-1; i>=0; i--) {
880           v  = aa + bs2*ai[i];
881           vi = aj + ai[i];
882           nz = ai[i+1] - ai[i];
883 
884           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
885           /* copy all rows of x that are needed into contiguous space */
886           workt = work;
887           for (j=0; j<nz; j++) {
888             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
889             workt += bs;
890           }
891           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
892           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
893 
894           idiag -= bs2;
895           i2    -= bs;
896         }
897         break;
898       }
899       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
900     }
901   }
902   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
903   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
904   PetscFunctionReturn(0);
905 }
906 
907 
908 /*
909     Special version for direct calls from Fortran (Used in PETSc-fun3d)
910 */
911 #if defined(PETSC_HAVE_FORTRAN_CAPS)
912 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
913 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
914 #define matsetvaluesblocked4_ matsetvaluesblocked4
915 #endif
916 
917 #undef __FUNCT__
918 #define __FUNCT__ "matsetvaluesblocked4_"
919 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
920 {
921   Mat               A  = *AA;
922   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
923   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
924   PetscInt          *ai    =a->i,*ailen=a->ilen;
925   PetscInt          *aj    =a->j,stepval,lastcol = -1;
926   const PetscScalar *value = v;
927   MatScalar         *ap,*aa = a->a,*bap;
928 
929   PetscFunctionBegin;
930   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
931   stepval = (n-1)*4;
932   for (k=0; k<m; k++) { /* loop over added rows */
933     row  = im[k];
934     rp   = aj + ai[row];
935     ap   = aa + 16*ai[row];
936     nrow = ailen[row];
937     low  = 0;
938     high = nrow;
939     for (l=0; l<n; l++) { /* loop over added columns */
940       col = in[l];
941       if (col <= lastcol)  low = 0;
942       else                high = nrow;
943       lastcol = col;
944       value   = v + k*(stepval+4 + l)*4;
945       while (high-low > 7) {
946         t = (low+high)/2;
947         if (rp[t] > col) high = t;
948         else             low  = t;
949       }
950       for (i=low; i<high; i++) {
951         if (rp[i] > col) break;
952         if (rp[i] == col) {
953           bap = ap +  16*i;
954           for (ii=0; ii<4; ii++,value+=stepval) {
955             for (jj=ii; jj<16; jj+=4) {
956               bap[jj] += *value++;
957             }
958           }
959           goto noinsert2;
960         }
961       }
962       N = nrow++ - 1;
963       high++; /* added new column index thus must search to one higher than before */
964       /* shift up all the later entries in this row */
965       for (ii=N; ii>=i; ii--) {
966         rp[ii+1] = rp[ii];
967         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
968       }
969       if (N >= i) {
970         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
971       }
972       rp[i] = col;
973       bap   = ap +  16*i;
974       for (ii=0; ii<4; ii++,value+=stepval) {
975         for (jj=ii; jj<16; jj+=4) {
976           bap[jj] = *value++;
977         }
978       }
979       noinsert2:;
980       low = i;
981     }
982     ailen[row] = nrow;
983   }
984   PetscFunctionReturnVoid();
985 }
986 
987 #if defined(PETSC_HAVE_FORTRAN_CAPS)
988 #define matsetvalues4_ MATSETVALUES4
989 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
990 #define matsetvalues4_ matsetvalues4
991 #endif
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatSetValues4_"
995 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
996 {
997   Mat         A  = *AA;
998   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
999   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1000   PetscInt    *ai=a->i,*ailen=a->ilen;
1001   PetscInt    *aj=a->j,brow,bcol;
1002   PetscInt    ridx,cidx,lastcol = -1;
1003   MatScalar   *ap,value,*aa=a->a,*bap;
1004 
1005   PetscFunctionBegin;
1006   for (k=0; k<m; k++) { /* loop over added rows */
1007     row  = im[k]; brow = row/4;
1008     rp   = aj + ai[brow];
1009     ap   = aa + 16*ai[brow];
1010     nrow = ailen[brow];
1011     low  = 0;
1012     high = nrow;
1013     for (l=0; l<n; l++) { /* loop over added columns */
1014       col   = in[l]; bcol = col/4;
1015       ridx  = row % 4; cidx = col % 4;
1016       value = v[l + k*n];
1017       if (col <= lastcol)  low = 0;
1018       else                high = nrow;
1019       lastcol = col;
1020       while (high-low > 7) {
1021         t = (low+high)/2;
1022         if (rp[t] > bcol) high = t;
1023         else              low  = t;
1024       }
1025       for (i=low; i<high; i++) {
1026         if (rp[i] > bcol) break;
1027         if (rp[i] == bcol) {
1028           bap   = ap +  16*i + 4*cidx + ridx;
1029           *bap += value;
1030           goto noinsert1;
1031         }
1032       }
1033       N = nrow++ - 1;
1034       high++; /* added new column thus must search to one higher than before */
1035       /* shift up all the later entries in this row */
1036       for (ii=N; ii>=i; ii--) {
1037         rp[ii+1] = rp[ii];
1038         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1039       }
1040       if (N>=i) {
1041         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1042       }
1043       rp[i]                    = bcol;
1044       ap[16*i + 4*cidx + ridx] = value;
1045 noinsert1:;
1046       low = i;
1047     }
1048     ailen[brow] = nrow;
1049   }
1050   PetscFunctionReturnVoid();
1051 }
1052 
1053 /*
1054      Checks for missing diagonals
1055 */
1056 #undef __FUNCT__
1057 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1058 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1059 {
1060   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1061   PetscErrorCode ierr;
1062   PetscInt       *diag,*ii = a->i,i;
1063 
1064   PetscFunctionBegin;
1065   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1066   *missing = PETSC_FALSE;
1067   if (A->rmap->n > 0 && !ii) {
1068     *missing = PETSC_TRUE;
1069     if (d) *d = 0;
1070     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1071   } else {
1072     diag = a->diag;
1073     for (i=0; i<a->mbs; i++) {
1074       if (diag[i] >= ii[i+1]) {
1075         *missing = PETSC_TRUE;
1076         if (d) *d = i;
1077         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1078         break;
1079       }
1080     }
1081   }
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 #undef __FUNCT__
1086 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1087 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1088 {
1089   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1090   PetscErrorCode ierr;
1091   PetscInt       i,j,m = a->mbs;
1092 
1093   PetscFunctionBegin;
1094   if (!a->diag) {
1095     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1096     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1097     a->free_diag = PETSC_TRUE;
1098   }
1099   for (i=0; i<m; i++) {
1100     a->diag[i] = a->i[i+1];
1101     for (j=a->i[i]; j<a->i[i+1]; j++) {
1102       if (a->j[j] == i) {
1103         a->diag[i] = j;
1104         break;
1105       }
1106     }
1107   }
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 
1112 #undef __FUNCT__
1113 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1114 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1115 {
1116   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1117   PetscErrorCode ierr;
1118   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1119   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1120 
1121   PetscFunctionBegin;
1122   *nn = n;
1123   if (!ia) PetscFunctionReturn(0);
1124   if (symmetric) {
1125     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1126     nz   = tia[n];
1127   } else {
1128     tia = a->i; tja = a->j;
1129   }
1130 
1131   if (!blockcompressed && bs > 1) {
1132     (*nn) *= bs;
1133     /* malloc & create the natural set of indices */
1134     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1135     if (n) {
1136       (*ia)[0] = oshift;
1137       for (j=1; j<bs; j++) {
1138         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1139       }
1140     }
1141 
1142     for (i=1; i<n; i++) {
1143       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1144       for (j=1; j<bs; j++) {
1145         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1146       }
1147     }
1148     if (n) {
1149       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1150     }
1151 
1152     if (inja) {
1153       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1154       cnt = 0;
1155       for (i=0; i<n; i++) {
1156         for (j=0; j<bs; j++) {
1157           for (k=tia[i]; k<tia[i+1]; k++) {
1158             for (l=0; l<bs; l++) {
1159               (*ja)[cnt++] = bs*tja[k] + l;
1160             }
1161           }
1162         }
1163       }
1164     }
1165 
1166     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1167       ierr = PetscFree(tia);CHKERRQ(ierr);
1168       ierr = PetscFree(tja);CHKERRQ(ierr);
1169     }
1170   } else if (oshift == 1) {
1171     if (symmetric) {
1172       nz = tia[A->rmap->n/bs];
1173       /*  add 1 to i and j indices */
1174       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1175       *ia = tia;
1176       if (ja) {
1177         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1178         *ja = tja;
1179       }
1180     } else {
1181       nz = a->i[A->rmap->n/bs];
1182       /* malloc space and  add 1 to i and j indices */
1183       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1184       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1185       if (ja) {
1186         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1187         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1188       }
1189     }
1190   } else {
1191     *ia = tia;
1192     if (ja) *ja = tja;
1193   }
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1199 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1200 {
1201   PetscErrorCode ierr;
1202 
1203   PetscFunctionBegin;
1204   if (!ia) PetscFunctionReturn(0);
1205   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1206     ierr = PetscFree(*ia);CHKERRQ(ierr);
1207     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1208   }
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 #undef __FUNCT__
1213 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1214 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1215 {
1216   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1217   PetscErrorCode ierr;
1218 
1219   PetscFunctionBegin;
1220 #if defined(PETSC_USE_LOG)
1221   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1222 #endif
1223   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1224   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1225   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1226   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1227   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1228   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1229   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1230   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1231   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1232   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1233   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1234   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1235   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1236 
1237   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1238   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1239   ierr = PetscFree(A->data);CHKERRQ(ierr);
1240 
1241   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1242   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1243   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1244   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1245   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1246   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1247   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1248   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1249   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1250   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1251   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1257 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1258 {
1259   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1260   PetscErrorCode ierr;
1261 
1262   PetscFunctionBegin;
1263   switch (op) {
1264   case MAT_ROW_ORIENTED:
1265     a->roworiented = flg;
1266     break;
1267   case MAT_KEEP_NONZERO_PATTERN:
1268     a->keepnonzeropattern = flg;
1269     break;
1270   case MAT_NEW_NONZERO_LOCATIONS:
1271     a->nonew = (flg ? 0 : 1);
1272     break;
1273   case MAT_NEW_NONZERO_LOCATION_ERR:
1274     a->nonew = (flg ? -1 : 0);
1275     break;
1276   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1277     a->nonew = (flg ? -2 : 0);
1278     break;
1279   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1280     a->nounused = (flg ? -1 : 0);
1281     break;
1282   case MAT_NEW_DIAGONALS:
1283   case MAT_IGNORE_OFF_PROC_ENTRIES:
1284   case MAT_USE_HASH_TABLE:
1285     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1286     break;
1287   case MAT_SPD:
1288   case MAT_SYMMETRIC:
1289   case MAT_STRUCTURALLY_SYMMETRIC:
1290   case MAT_HERMITIAN:
1291   case MAT_SYMMETRY_ETERNAL:
1292     /* These options are handled directly by MatSetOption() */
1293     break;
1294   default:
1295     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1296   }
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1301 #undef __FUNCT__
1302 #define __FUNCT__ "MatGetRow_SeqBAIJ_private"
1303 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1304 {
1305   PetscErrorCode ierr;
1306   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1307   MatScalar      *aa_i;
1308   PetscScalar    *v_i;
1309 
1310   PetscFunctionBegin;
1311   bs  = A->rmap->bs;
1312   bs2 = bs*bs;
1313   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1314 
1315   bn  = row/bs;   /* Block number */
1316   bp  = row % bs; /* Block Position */
1317   M   = ai[bn+1] - ai[bn];
1318   *nz = bs*M;
1319 
1320   if (v) {
1321     *v = 0;
1322     if (*nz) {
1323       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1324       for (i=0; i<M; i++) { /* for each block in the block row */
1325         v_i  = *v + i*bs;
1326         aa_i = aa + bs2*(ai[bn] + i);
1327         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1328       }
1329     }
1330   }
1331 
1332   if (idx) {
1333     *idx = 0;
1334     if (*nz) {
1335       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1336       for (i=0; i<M; i++) { /* for each block in the block row */
1337         idx_i = *idx + i*bs;
1338         itmp  = bs*aj[ai[bn] + i];
1339         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1340       }
1341     }
1342   }
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 #undef __FUNCT__
1347 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1348 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1349 {
1350   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1351   PetscErrorCode ierr;
1352 
1353   PetscFunctionBegin;
1354   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 #undef __FUNCT__
1359 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1360 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1361 {
1362   PetscErrorCode ierr;
1363 
1364   PetscFunctionBegin;
1365   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1366   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1371 
1372 #undef __FUNCT__
1373 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1374 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1375 {
1376   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1377   Mat            C;
1378   PetscErrorCode ierr;
1379   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1380   PetscInt       *rows,*cols,bs2=a->bs2;
1381   MatScalar      *array;
1382 
1383   PetscFunctionBegin;
1384   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1385   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1386     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1387 
1388     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1389     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1390     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1391     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1392     ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr);
1393     ierr = PetscFree(col);CHKERRQ(ierr);
1394   } else {
1395     C = *B;
1396   }
1397 
1398   array = a->a;
1399   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1400   for (i=0; i<mbs; i++) {
1401     cols[0] = i*bs;
1402     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1403     len = ai[i+1] - ai[i];
1404     for (j=0; j<len; j++) {
1405       rows[0] = (*aj++)*bs;
1406       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1407       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1408       array += bs2;
1409     }
1410   }
1411   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1412 
1413   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1414   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1415 
1416   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1417     *B = C;
1418   } else {
1419     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1420   }
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1426 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1427 {
1428   PetscErrorCode ierr;
1429   Mat            Btrans;
1430 
1431   PetscFunctionBegin;
1432   *f   = PETSC_FALSE;
1433   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1434   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1435   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1441 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1442 {
1443   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1444   PetscErrorCode ierr;
1445   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1446   int            fd;
1447   PetscScalar    *aa;
1448   FILE           *file;
1449 
1450   PetscFunctionBegin;
1451   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1452   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1453   col_lens[0] = MAT_FILE_CLASSID;
1454 
1455   col_lens[1] = A->rmap->N;
1456   col_lens[2] = A->cmap->n;
1457   col_lens[3] = a->nz*bs2;
1458 
1459   /* store lengths of each row and write (including header) to file */
1460   count = 0;
1461   for (i=0; i<a->mbs; i++) {
1462     for (j=0; j<bs; j++) {
1463       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1464     }
1465   }
1466   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1467   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1468 
1469   /* store column indices (zero start index) */
1470   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1471   count = 0;
1472   for (i=0; i<a->mbs; i++) {
1473     for (j=0; j<bs; j++) {
1474       for (k=a->i[i]; k<a->i[i+1]; k++) {
1475         for (l=0; l<bs; l++) {
1476           jj[count++] = bs*a->j[k] + l;
1477         }
1478       }
1479     }
1480   }
1481   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1482   ierr = PetscFree(jj);CHKERRQ(ierr);
1483 
1484   /* store nonzero values */
1485   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1486   count = 0;
1487   for (i=0; i<a->mbs; i++) {
1488     for (j=0; j<bs; j++) {
1489       for (k=a->i[i]; k<a->i[i+1]; k++) {
1490         for (l=0; l<bs; l++) {
1491           aa[count++] = a->a[bs2*k + l*bs + j];
1492         }
1493       }
1494     }
1495   }
1496   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1497   ierr = PetscFree(aa);CHKERRQ(ierr);
1498 
1499   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1500   if (file) {
1501     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1502   }
1503   PetscFunctionReturn(0);
1504 }
1505 
1506 #undef __FUNCT__
1507 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1508 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1509 {
1510   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1511   PetscErrorCode    ierr;
1512   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1513   PetscViewerFormat format;
1514 
1515   PetscFunctionBegin;
1516   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1517   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1518     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1519   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1520     const char *matname;
1521     Mat        aij;
1522     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1523     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1524     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1525     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1526     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1527   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1528       PetscFunctionReturn(0);
1529   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1530     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1531     for (i=0; i<a->mbs; i++) {
1532       for (j=0; j<bs; j++) {
1533         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1534         for (k=a->i[i]; k<a->i[i+1]; k++) {
1535           for (l=0; l<bs; l++) {
1536 #if defined(PETSC_USE_COMPLEX)
1537             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1538               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1539                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1540             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1541               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1542                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1543             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1544               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1545             }
1546 #else
1547             if (a->a[bs2*k + l*bs + j] != 0.0) {
1548               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1549             }
1550 #endif
1551           }
1552         }
1553         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1554       }
1555     }
1556     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1557   } else {
1558     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1559     for (i=0; i<a->mbs; i++) {
1560       for (j=0; j<bs; j++) {
1561         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1562         for (k=a->i[i]; k<a->i[i+1]; k++) {
1563           for (l=0; l<bs; l++) {
1564 #if defined(PETSC_USE_COMPLEX)
1565             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1566               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1567                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1568             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1569               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1570                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1571             } else {
1572               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1573             }
1574 #else
1575             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1576 #endif
1577           }
1578         }
1579         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1580       }
1581     }
1582     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1583   }
1584   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 #include <petscdraw.h>
1589 #undef __FUNCT__
1590 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1591 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1592 {
1593   Mat               A = (Mat) Aa;
1594   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1595   PetscErrorCode    ierr;
1596   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1597   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1598   MatScalar         *aa;
1599   PetscViewer       viewer;
1600   PetscViewerFormat format;
1601 
1602   PetscFunctionBegin;
1603   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1604   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1605   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1606 
1607   /* loop over matrix elements drawing boxes */
1608 
1609   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1610     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1611     /* Blue for negative, Cyan for zero and  Red for positive */
1612     color = PETSC_DRAW_BLUE;
1613     for (i=0,row=0; i<mbs; i++,row+=bs) {
1614       for (j=a->i[i]; j<a->i[i+1]; j++) {
1615         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1616         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1617         aa  = a->a + j*bs2;
1618         for (k=0; k<bs; k++) {
1619           for (l=0; l<bs; l++) {
1620             if (PetscRealPart(*aa++) >=  0.) continue;
1621             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1622           }
1623         }
1624       }
1625     }
1626     color = PETSC_DRAW_CYAN;
1627     for (i=0,row=0; i<mbs; i++,row+=bs) {
1628       for (j=a->i[i]; j<a->i[i+1]; j++) {
1629         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1630         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1631         aa  = a->a + j*bs2;
1632         for (k=0; k<bs; k++) {
1633           for (l=0; l<bs; l++) {
1634             if (PetscRealPart(*aa++) != 0.) continue;
1635             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1636           }
1637         }
1638       }
1639     }
1640     color = PETSC_DRAW_RED;
1641     for (i=0,row=0; i<mbs; i++,row+=bs) {
1642       for (j=a->i[i]; j<a->i[i+1]; j++) {
1643         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1644         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1645         aa  = a->a + j*bs2;
1646         for (k=0; k<bs; k++) {
1647           for (l=0; l<bs; l++) {
1648             if (PetscRealPart(*aa++) <= 0.) continue;
1649             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1650           }
1651         }
1652       }
1653     }
1654     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1655   } else {
1656     /* use contour shading to indicate magnitude of values */
1657     /* first determine max of all nonzero values */
1658     PetscReal minv = 0.0, maxv = 0.0;
1659     PetscDraw popup;
1660 
1661     for (i=0; i<a->nz*a->bs2; i++) {
1662       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1663     }
1664     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1665     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1666     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1667 
1668     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1669     for (i=0,row=0; i<mbs; i++,row+=bs) {
1670       for (j=a->i[i]; j<a->i[i+1]; j++) {
1671         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1672         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1673         aa  = a->a + j*bs2;
1674         for (k=0; k<bs; k++) {
1675           for (l=0; l<bs; l++) {
1676             MatScalar v = *aa++;
1677             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1678             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1679           }
1680         }
1681       }
1682     }
1683     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1684   }
1685   PetscFunctionReturn(0);
1686 }
1687 
1688 #undef __FUNCT__
1689 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1690 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1691 {
1692   PetscErrorCode ierr;
1693   PetscReal      xl,yl,xr,yr,w,h;
1694   PetscDraw      draw;
1695   PetscBool      isnull;
1696 
1697   PetscFunctionBegin;
1698   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1699   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1700   if (isnull) PetscFunctionReturn(0);
1701 
1702   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1703   xr  += w;          yr += h;        xl = -w;     yl = -h;
1704   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1705   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1706   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1707   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1708   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1709   PetscFunctionReturn(0);
1710 }
1711 
1712 #undef __FUNCT__
1713 #define __FUNCT__ "MatView_SeqBAIJ"
1714 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1715 {
1716   PetscErrorCode ierr;
1717   PetscBool      iascii,isbinary,isdraw;
1718 
1719   PetscFunctionBegin;
1720   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1721   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1722   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1723   if (iascii) {
1724     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1725   } else if (isbinary) {
1726     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1727   } else if (isdraw) {
1728     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1729   } else {
1730     Mat B;
1731     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1732     ierr = MatView(B,viewer);CHKERRQ(ierr);
1733     ierr = MatDestroy(&B);CHKERRQ(ierr);
1734   }
1735   PetscFunctionReturn(0);
1736 }
1737 
1738 
1739 #undef __FUNCT__
1740 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1741 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1742 {
1743   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1744   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1745   PetscInt    *ai = a->i,*ailen = a->ilen;
1746   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1747   MatScalar   *ap,*aa = a->a;
1748 
1749   PetscFunctionBegin;
1750   for (k=0; k<m; k++) { /* loop over rows */
1751     row = im[k]; brow = row/bs;
1752     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1753     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1754     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1755     nrow = ailen[brow];
1756     for (l=0; l<n; l++) { /* loop over columns */
1757       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1758       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1759       col  = in[l];
1760       bcol = col/bs;
1761       cidx = col%bs;
1762       ridx = row%bs;
1763       high = nrow;
1764       low  = 0; /* assume unsorted */
1765       while (high-low > 5) {
1766         t = (low+high)/2;
1767         if (rp[t] > bcol) high = t;
1768         else             low  = t;
1769       }
1770       for (i=low; i<high; i++) {
1771         if (rp[i] > bcol) break;
1772         if (rp[i] == bcol) {
1773           *v++ = ap[bs2*i+bs*cidx+ridx];
1774           goto finished;
1775         }
1776       }
1777       *v++ = 0.0;
1778 finished:;
1779     }
1780   }
1781   PetscFunctionReturn(0);
1782 }
1783 
1784 #undef __FUNCT__
1785 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1786 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1787 {
1788   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1789   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1790   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1791   PetscErrorCode    ierr;
1792   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1793   PetscBool         roworiented=a->roworiented;
1794   const PetscScalar *value     = v;
1795   MatScalar         *ap,*aa = a->a,*bap;
1796 
1797   PetscFunctionBegin;
1798   if (roworiented) {
1799     stepval = (n-1)*bs;
1800   } else {
1801     stepval = (m-1)*bs;
1802   }
1803   for (k=0; k<m; k++) { /* loop over added rows */
1804     row = im[k];
1805     if (row < 0) continue;
1806 #if defined(PETSC_USE_DEBUG)
1807     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1808 #endif
1809     rp   = aj + ai[row];
1810     ap   = aa + bs2*ai[row];
1811     rmax = imax[row];
1812     nrow = ailen[row];
1813     low  = 0;
1814     high = nrow;
1815     for (l=0; l<n; l++) { /* loop over added columns */
1816       if (in[l] < 0) continue;
1817 #if defined(PETSC_USE_DEBUG)
1818       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1819 #endif
1820       col = in[l];
1821       if (roworiented) {
1822         value = v + (k*(stepval+bs) + l)*bs;
1823       } else {
1824         value = v + (l*(stepval+bs) + k)*bs;
1825       }
1826       if (col <= lastcol) low = 0;
1827       else high = nrow;
1828       lastcol = col;
1829       while (high-low > 7) {
1830         t = (low+high)/2;
1831         if (rp[t] > col) high = t;
1832         else             low  = t;
1833       }
1834       for (i=low; i<high; i++) {
1835         if (rp[i] > col) break;
1836         if (rp[i] == col) {
1837           bap = ap +  bs2*i;
1838           if (roworiented) {
1839             if (is == ADD_VALUES) {
1840               for (ii=0; ii<bs; ii++,value+=stepval) {
1841                 for (jj=ii; jj<bs2; jj+=bs) {
1842                   bap[jj] += *value++;
1843                 }
1844               }
1845             } else {
1846               for (ii=0; ii<bs; ii++,value+=stepval) {
1847                 for (jj=ii; jj<bs2; jj+=bs) {
1848                   bap[jj] = *value++;
1849                 }
1850               }
1851             }
1852           } else {
1853             if (is == ADD_VALUES) {
1854               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1855                 for (jj=0; jj<bs; jj++) {
1856                   bap[jj] += value[jj];
1857                 }
1858                 bap += bs;
1859               }
1860             } else {
1861               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1862                 for (jj=0; jj<bs; jj++) {
1863                   bap[jj]  = value[jj];
1864                 }
1865                 bap += bs;
1866               }
1867             }
1868           }
1869           goto noinsert2;
1870         }
1871       }
1872       if (nonew == 1) goto noinsert2;
1873       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1874       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1875       N = nrow++ - 1; high++;
1876       /* shift up all the later entries in this row */
1877       for (ii=N; ii>=i; ii--) {
1878         rp[ii+1] = rp[ii];
1879         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1880       }
1881       if (N >= i) {
1882         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1883       }
1884       rp[i] = col;
1885       bap   = ap +  bs2*i;
1886       if (roworiented) {
1887         for (ii=0; ii<bs; ii++,value+=stepval) {
1888           for (jj=ii; jj<bs2; jj+=bs) {
1889             bap[jj] = *value++;
1890           }
1891         }
1892       } else {
1893         for (ii=0; ii<bs; ii++,value+=stepval) {
1894           for (jj=0; jj<bs; jj++) {
1895             *bap++ = *value++;
1896           }
1897         }
1898       }
1899 noinsert2:;
1900       low = i;
1901     }
1902     ailen[row] = nrow;
1903   }
1904   PetscFunctionReturn(0);
1905 }
1906 
1907 #undef __FUNCT__
1908 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1909 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1910 {
1911   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1912   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1913   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1914   PetscErrorCode ierr;
1915   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1916   MatScalar      *aa  = a->a,*ap;
1917   PetscReal      ratio=0.6;
1918 
1919   PetscFunctionBegin;
1920   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1921 
1922   if (m) rmax = ailen[0];
1923   for (i=1; i<mbs; i++) {
1924     /* move each row back by the amount of empty slots (fshift) before it*/
1925     fshift += imax[i-1] - ailen[i-1];
1926     rmax    = PetscMax(rmax,ailen[i]);
1927     if (fshift) {
1928       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1929       N  = ailen[i];
1930       for (j=0; j<N; j++) {
1931         ip[j-fshift] = ip[j];
1932 
1933         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1934       }
1935     }
1936     ai[i] = ai[i-1] + ailen[i-1];
1937   }
1938   if (mbs) {
1939     fshift += imax[mbs-1] - ailen[mbs-1];
1940     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1941   }
1942 
1943   /* reset ilen and imax for each row */
1944   a->nonzerorowcnt = 0;
1945   for (i=0; i<mbs; i++) {
1946     ailen[i] = imax[i] = ai[i+1] - ai[i];
1947     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1948   }
1949   a->nz = ai[mbs];
1950 
1951   /* diagonals may have moved, so kill the diagonal pointers */
1952   a->idiagvalid = PETSC_FALSE;
1953   if (fshift && a->diag) {
1954     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1955     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1956     a->diag = 0;
1957   }
1958   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1959   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1960   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1961   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1962 
1963   A->info.mallocs    += a->reallocs;
1964   a->reallocs         = 0;
1965   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1966   a->rmax             = rmax;
1967 
1968   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 /*
1973    This function returns an array of flags which indicate the locations of contiguous
1974    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1975    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1976    Assume: sizes should be long enough to hold all the values.
1977 */
1978 #undef __FUNCT__
1979 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1980 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1981 {
1982   PetscInt  i,j,k,row;
1983   PetscBool flg;
1984 
1985   PetscFunctionBegin;
1986   for (i=0,j=0; i<n; j++) {
1987     row = idx[i];
1988     if (row%bs!=0) { /* Not the begining of a block */
1989       sizes[j] = 1;
1990       i++;
1991     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1992       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1993       i++;
1994     } else { /* Begining of the block, so check if the complete block exists */
1995       flg = PETSC_TRUE;
1996       for (k=1; k<bs; k++) {
1997         if (row+k != idx[i+k]) { /* break in the block */
1998           flg = PETSC_FALSE;
1999           break;
2000         }
2001       }
2002       if (flg) { /* No break in the bs */
2003         sizes[j] = bs;
2004         i       += bs;
2005       } else {
2006         sizes[j] = 1;
2007         i++;
2008       }
2009     }
2010   }
2011   *bs_max = j;
2012   PetscFunctionReturn(0);
2013 }
2014 
2015 #undef __FUNCT__
2016 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2017 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2018 {
2019   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2020   PetscErrorCode    ierr;
2021   PetscInt          i,j,k,count,*rows;
2022   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2023   PetscScalar       zero = 0.0;
2024   MatScalar         *aa;
2025   const PetscScalar *xx;
2026   PetscScalar       *bb;
2027 
2028   PetscFunctionBegin;
2029   /* fix right hand side if needed */
2030   if (x && b) {
2031     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2032     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2033     for (i=0; i<is_n; i++) {
2034       bb[is_idx[i]] = diag*xx[is_idx[i]];
2035     }
2036     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2037     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2038   }
2039 
2040   /* Make a copy of the IS and  sort it */
2041   /* allocate memory for rows,sizes */
2042   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2043 
2044   /* copy IS values to rows, and sort them */
2045   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2046   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2047 
2048   if (baij->keepnonzeropattern) {
2049     for (i=0; i<is_n; i++) sizes[i] = 1;
2050     bs_max          = is_n;
2051   } else {
2052     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2053     A->nonzerostate++;
2054   }
2055 
2056   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2057     row = rows[j];
2058     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2059     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2060     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2061     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2062       if (diag != (PetscScalar)0.0) {
2063         if (baij->ilen[row/bs] > 0) {
2064           baij->ilen[row/bs]       = 1;
2065           baij->j[baij->i[row/bs]] = row/bs;
2066 
2067           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2068         }
2069         /* Now insert all the diagonal values for this bs */
2070         for (k=0; k<bs; k++) {
2071           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2072         }
2073       } else { /* (diag == 0.0) */
2074         baij->ilen[row/bs] = 0;
2075       } /* end (diag == 0.0) */
2076     } else { /* (sizes[i] != bs) */
2077 #if defined(PETSC_USE_DEBUG)
2078       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2079 #endif
2080       for (k=0; k<count; k++) {
2081         aa[0] =  zero;
2082         aa   += bs;
2083       }
2084       if (diag != (PetscScalar)0.0) {
2085         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2086       }
2087     }
2088   }
2089 
2090   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2091   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2092   PetscFunctionReturn(0);
2093 }
2094 
2095 #undef __FUNCT__
2096 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2097 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2098 {
2099   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2100   PetscErrorCode    ierr;
2101   PetscInt          i,j,k,count;
2102   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2103   PetscScalar       zero = 0.0;
2104   MatScalar         *aa;
2105   const PetscScalar *xx;
2106   PetscScalar       *bb;
2107   PetscBool         *zeroed,vecs = PETSC_FALSE;
2108 
2109   PetscFunctionBegin;
2110   /* fix right hand side if needed */
2111   if (x && b) {
2112     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2113     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2114     vecs = PETSC_TRUE;
2115   }
2116 
2117   /* zero the columns */
2118   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2119   for (i=0; i<is_n; i++) {
2120     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2121     zeroed[is_idx[i]] = PETSC_TRUE;
2122   }
2123   for (i=0; i<A->rmap->N; i++) {
2124     if (!zeroed[i]) {
2125       row = i/bs;
2126       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2127         for (k=0; k<bs; k++) {
2128           col = bs*baij->j[j] + k;
2129           if (zeroed[col]) {
2130             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2131             if (vecs) bb[i] -= aa[0]*xx[col];
2132             aa[0] = 0.0;
2133           }
2134         }
2135       }
2136     } else if (vecs) bb[i] = diag*xx[i];
2137   }
2138   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2139   if (vecs) {
2140     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2141     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2142   }
2143 
2144   /* zero the rows */
2145   for (i=0; i<is_n; i++) {
2146     row   = is_idx[i];
2147     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2148     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2149     for (k=0; k<count; k++) {
2150       aa[0] =  zero;
2151       aa   += bs;
2152     }
2153     if (diag != (PetscScalar)0.0) {
2154       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2155     }
2156   }
2157   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   PetscFunctionReturn(0);
2159 }
2160 
2161 #undef __FUNCT__
2162 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2163 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2164 {
2165   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2166   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2167   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2168   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2169   PetscErrorCode ierr;
2170   PetscInt       ridx,cidx,bs2=a->bs2;
2171   PetscBool      roworiented=a->roworiented;
2172   MatScalar      *ap,value,*aa=a->a,*bap;
2173 
2174   PetscFunctionBegin;
2175   for (k=0; k<m; k++) { /* loop over added rows */
2176     row  = im[k];
2177     brow = row/bs;
2178     if (row < 0) continue;
2179 #if defined(PETSC_USE_DEBUG)
2180     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2181 #endif
2182     rp   = aj + ai[brow];
2183     ap   = aa + bs2*ai[brow];
2184     rmax = imax[brow];
2185     nrow = ailen[brow];
2186     low  = 0;
2187     high = nrow;
2188     for (l=0; l<n; l++) { /* loop over added columns */
2189       if (in[l] < 0) continue;
2190 #if defined(PETSC_USE_DEBUG)
2191       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2192 #endif
2193       col  = in[l]; bcol = col/bs;
2194       ridx = row % bs; cidx = col % bs;
2195       if (roworiented) {
2196         value = v[l + k*n];
2197       } else {
2198         value = v[k + l*m];
2199       }
2200       if (col <= lastcol) low = 0; else high = nrow;
2201       lastcol = col;
2202       while (high-low > 7) {
2203         t = (low+high)/2;
2204         if (rp[t] > bcol) high = t;
2205         else              low  = t;
2206       }
2207       for (i=low; i<high; i++) {
2208         if (rp[i] > bcol) break;
2209         if (rp[i] == bcol) {
2210           bap = ap +  bs2*i + bs*cidx + ridx;
2211           if (is == ADD_VALUES) *bap += value;
2212           else                  *bap  = value;
2213           goto noinsert1;
2214         }
2215       }
2216       if (nonew == 1) goto noinsert1;
2217       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2218       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2219       N = nrow++ - 1; high++;
2220       /* shift up all the later entries in this row */
2221       for (ii=N; ii>=i; ii--) {
2222         rp[ii+1] = rp[ii];
2223         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2224       }
2225       if (N>=i) {
2226         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2227       }
2228       rp[i]                      = bcol;
2229       ap[bs2*i + bs*cidx + ridx] = value;
2230       a->nz++;
2231       A->nonzerostate++;
2232 noinsert1:;
2233       low = i;
2234     }
2235     ailen[brow] = nrow;
2236   }
2237   PetscFunctionReturn(0);
2238 }
2239 
2240 #undef __FUNCT__
2241 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2242 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2243 {
2244   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2245   Mat            outA;
2246   PetscErrorCode ierr;
2247   PetscBool      row_identity,col_identity;
2248 
2249   PetscFunctionBegin;
2250   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2251   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2252   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2253   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2254 
2255   outA            = inA;
2256   inA->factortype = MAT_FACTOR_LU;
2257   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2258   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2259 
2260   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2261 
2262   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2263   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2264   a->row = row;
2265   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2266   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2267   a->col = col;
2268 
2269   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2270   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2271   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2272   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2273 
2274   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2275   if (!a->solve_work) {
2276     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2277     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2278   }
2279   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2280   PetscFunctionReturn(0);
2281 }
2282 
2283 #undef __FUNCT__
2284 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2285 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2286 {
2287   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2288   PetscInt    i,nz,mbs;
2289 
2290   PetscFunctionBegin;
2291   nz  = baij->maxnz;
2292   mbs = baij->mbs;
2293   for (i=0; i<nz; i++) {
2294     baij->j[i] = indices[i];
2295   }
2296   baij->nz = nz;
2297   for (i=0; i<mbs; i++) {
2298     baij->ilen[i] = baij->imax[i];
2299   }
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 #undef __FUNCT__
2304 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2305 /*@
2306     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2307        in the matrix.
2308 
2309   Input Parameters:
2310 +  mat - the SeqBAIJ matrix
2311 -  indices - the column indices
2312 
2313   Level: advanced
2314 
2315   Notes:
2316     This can be called if you have precomputed the nonzero structure of the
2317   matrix and want to provide it to the matrix object to improve the performance
2318   of the MatSetValues() operation.
2319 
2320     You MUST have set the correct numbers of nonzeros per row in the call to
2321   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2322 
2323     MUST be called before any calls to MatSetValues();
2324 
2325 @*/
2326 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2327 {
2328   PetscErrorCode ierr;
2329 
2330   PetscFunctionBegin;
2331   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2332   PetscValidPointer(indices,2);
2333   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2334   PetscFunctionReturn(0);
2335 }
2336 
2337 #undef __FUNCT__
2338 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2339 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2340 {
2341   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2342   PetscErrorCode ierr;
2343   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2344   PetscReal      atmp;
2345   PetscScalar    *x,zero = 0.0;
2346   MatScalar      *aa;
2347   PetscInt       ncols,brow,krow,kcol;
2348 
2349   PetscFunctionBegin;
2350   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2351   bs  = A->rmap->bs;
2352   aa  = a->a;
2353   ai  = a->i;
2354   aj  = a->j;
2355   mbs = a->mbs;
2356 
2357   ierr = VecSet(v,zero);CHKERRQ(ierr);
2358   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2359   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2360   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2361   for (i=0; i<mbs; i++) {
2362     ncols = ai[1] - ai[0]; ai++;
2363     brow  = bs*i;
2364     for (j=0; j<ncols; j++) {
2365       for (kcol=0; kcol<bs; kcol++) {
2366         for (krow=0; krow<bs; krow++) {
2367           atmp = PetscAbsScalar(*aa);aa++;
2368           row  = brow + krow;   /* row index */
2369           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2370         }
2371       }
2372       aj++;
2373     }
2374   }
2375   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2376   PetscFunctionReturn(0);
2377 }
2378 
2379 #undef __FUNCT__
2380 #define __FUNCT__ "MatCopy_SeqBAIJ"
2381 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2382 {
2383   PetscErrorCode ierr;
2384 
2385   PetscFunctionBegin;
2386   /* If the two matrices have the same copy implementation, use fast copy. */
2387   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2388     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2389     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2390     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2391 
2392     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2393     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2394     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2395   } else {
2396     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2397   }
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2403 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2404 {
2405   PetscErrorCode ierr;
2406 
2407   PetscFunctionBegin;
2408   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2409   PetscFunctionReturn(0);
2410 }
2411 
2412 #undef __FUNCT__
2413 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2414 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2415 {
2416   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2417 
2418   PetscFunctionBegin;
2419   *array = a->a;
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 #undef __FUNCT__
2424 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2425 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2426 {
2427   PetscFunctionBegin;
2428   PetscFunctionReturn(0);
2429 }
2430 
2431 #undef __FUNCT__
2432 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ"
2433 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2434 {
2435   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2436   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2437   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2438   PetscErrorCode ierr;
2439 
2440   PetscFunctionBegin;
2441   /* Set the number of nonzeros in the new matrix */
2442   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2443   PetscFunctionReturn(0);
2444 }
2445 
2446 #undef __FUNCT__
2447 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2448 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2449 {
2450   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2451   PetscErrorCode ierr;
2452   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2453   PetscBLASInt   one=1;
2454 
2455   PetscFunctionBegin;
2456   if (str == SAME_NONZERO_PATTERN) {
2457     PetscScalar  alpha = a;
2458     PetscBLASInt bnz;
2459     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2460     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2461     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2462   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2463     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2464   } else {
2465     Mat      B;
2466     PetscInt *nnz;
2467     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2468     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2469     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2470     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2471     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2472     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2473     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2474     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2475     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2476     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2477     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2478     ierr = PetscFree(nnz);CHKERRQ(ierr);
2479   }
2480   PetscFunctionReturn(0);
2481 }
2482 
2483 #undef __FUNCT__
2484 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2485 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2486 {
2487   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2488   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2489   MatScalar   *aa = a->a;
2490 
2491   PetscFunctionBegin;
2492   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2493   PetscFunctionReturn(0);
2494 }
2495 
2496 #undef __FUNCT__
2497 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2498 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2499 {
2500   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2501   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2502   MatScalar   *aa = a->a;
2503 
2504   PetscFunctionBegin;
2505   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2506   PetscFunctionReturn(0);
2507 }
2508 
2509 #undef __FUNCT__
2510 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2511 /*
2512     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2513 */
2514 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2515 {
2516   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2517   PetscErrorCode ierr;
2518   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2519   PetscInt       nz = a->i[m],row,*jj,mr,col;
2520 
2521   PetscFunctionBegin;
2522   *nn = n;
2523   if (!ia) PetscFunctionReturn(0);
2524   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2525   else {
2526     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2527     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2528     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2529     jj   = a->j;
2530     for (i=0; i<nz; i++) {
2531       collengths[jj[i]]++;
2532     }
2533     cia[0] = oshift;
2534     for (i=0; i<n; i++) {
2535       cia[i+1] = cia[i] + collengths[i];
2536     }
2537     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2538     jj   = a->j;
2539     for (row=0; row<m; row++) {
2540       mr = a->i[row+1] - a->i[row];
2541       for (i=0; i<mr; i++) {
2542         col = *jj++;
2543 
2544         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2545       }
2546     }
2547     ierr = PetscFree(collengths);CHKERRQ(ierr);
2548     *ia  = cia; *ja = cja;
2549   }
2550   PetscFunctionReturn(0);
2551 }
2552 
2553 #undef __FUNCT__
2554 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2555 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2556 {
2557   PetscErrorCode ierr;
2558 
2559   PetscFunctionBegin;
2560   if (!ia) PetscFunctionReturn(0);
2561   ierr = PetscFree(*ia);CHKERRQ(ierr);
2562   ierr = PetscFree(*ja);CHKERRQ(ierr);
2563   PetscFunctionReturn(0);
2564 }
2565 
2566 /*
2567  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2568  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2569  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2570  */
2571 #undef __FUNCT__
2572 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color"
2573 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2574 {
2575   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2576   PetscErrorCode ierr;
2577   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2578   PetscInt       nz = a->i[m],row,*jj,mr,col;
2579   PetscInt       *cspidx;
2580 
2581   PetscFunctionBegin;
2582   *nn = n;
2583   if (!ia) PetscFunctionReturn(0);
2584 
2585   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2586   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2587   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2588   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2589   jj   = a->j;
2590   for (i=0; i<nz; i++) {
2591     collengths[jj[i]]++;
2592   }
2593   cia[0] = oshift;
2594   for (i=0; i<n; i++) {
2595     cia[i+1] = cia[i] + collengths[i];
2596   }
2597   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2598   jj   = a->j;
2599   for (row=0; row<m; row++) {
2600     mr = a->i[row+1] - a->i[row];
2601     for (i=0; i<mr; i++) {
2602       col = *jj++;
2603       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2604       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2605     }
2606   }
2607   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2608   *ia    = cia; *ja = cja;
2609   *spidx = cspidx;
2610   PetscFunctionReturn(0);
2611 }
2612 
2613 #undef __FUNCT__
2614 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color"
2615 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2616 {
2617   PetscErrorCode ierr;
2618 
2619   PetscFunctionBegin;
2620   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2621   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2622   PetscFunctionReturn(0);
2623 }
2624 
2625 #undef __FUNCT__
2626 #define __FUNCT__ "MatShift_SeqBAIJ"
2627 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2628 {
2629   PetscErrorCode ierr;
2630   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2631 
2632   PetscFunctionBegin;
2633   if (!Y->preallocated || !aij->nz) {
2634     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2635   }
2636   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2637   PetscFunctionReturn(0);
2638 }
2639 
2640 /* -------------------------------------------------------------------*/
2641 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2642                                        MatGetRow_SeqBAIJ,
2643                                        MatRestoreRow_SeqBAIJ,
2644                                        MatMult_SeqBAIJ_N,
2645                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2646                                        MatMultTranspose_SeqBAIJ,
2647                                        MatMultTransposeAdd_SeqBAIJ,
2648                                        0,
2649                                        0,
2650                                        0,
2651                                /* 10*/ 0,
2652                                        MatLUFactor_SeqBAIJ,
2653                                        0,
2654                                        0,
2655                                        MatTranspose_SeqBAIJ,
2656                                /* 15*/ MatGetInfo_SeqBAIJ,
2657                                        MatEqual_SeqBAIJ,
2658                                        MatGetDiagonal_SeqBAIJ,
2659                                        MatDiagonalScale_SeqBAIJ,
2660                                        MatNorm_SeqBAIJ,
2661                                /* 20*/ 0,
2662                                        MatAssemblyEnd_SeqBAIJ,
2663                                        MatSetOption_SeqBAIJ,
2664                                        MatZeroEntries_SeqBAIJ,
2665                                /* 24*/ MatZeroRows_SeqBAIJ,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                /* 29*/ MatSetUp_SeqBAIJ,
2671                                        0,
2672                                        0,
2673                                        0,
2674                                        0,
2675                                /* 34*/ MatDuplicate_SeqBAIJ,
2676                                        0,
2677                                        0,
2678                                        MatILUFactor_SeqBAIJ,
2679                                        0,
2680                                /* 39*/ MatAXPY_SeqBAIJ,
2681                                        MatGetSubMatrices_SeqBAIJ,
2682                                        MatIncreaseOverlap_SeqBAIJ,
2683                                        MatGetValues_SeqBAIJ,
2684                                        MatCopy_SeqBAIJ,
2685                                /* 44*/ 0,
2686                                        MatScale_SeqBAIJ,
2687                                        MatShift_SeqBAIJ,
2688                                        0,
2689                                        MatZeroRowsColumns_SeqBAIJ,
2690                                /* 49*/ 0,
2691                                        MatGetRowIJ_SeqBAIJ,
2692                                        MatRestoreRowIJ_SeqBAIJ,
2693                                        MatGetColumnIJ_SeqBAIJ,
2694                                        MatRestoreColumnIJ_SeqBAIJ,
2695                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2696                                        0,
2697                                        0,
2698                                        0,
2699                                        MatSetValuesBlocked_SeqBAIJ,
2700                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2701                                        MatDestroy_SeqBAIJ,
2702                                        MatView_SeqBAIJ,
2703                                        0,
2704                                        0,
2705                                /* 64*/ 0,
2706                                        0,
2707                                        0,
2708                                        0,
2709                                        0,
2710                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2711                                        0,
2712                                        MatConvert_Basic,
2713                                        0,
2714                                        0,
2715                                /* 74*/ 0,
2716                                        MatFDColoringApply_BAIJ,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                /* 79*/ 0,
2721                                        0,
2722                                        0,
2723                                        0,
2724                                        MatLoad_SeqBAIJ,
2725                                /* 84*/ 0,
2726                                        0,
2727                                        0,
2728                                        0,
2729                                        0,
2730                                /* 89*/ 0,
2731                                        0,
2732                                        0,
2733                                        0,
2734                                        0,
2735                                /* 94*/ 0,
2736                                        0,
2737                                        0,
2738                                        0,
2739                                        0,
2740                                /* 99*/ 0,
2741                                        0,
2742                                        0,
2743                                        0,
2744                                        0,
2745                                /*104*/ 0,
2746                                        MatRealPart_SeqBAIJ,
2747                                        MatImaginaryPart_SeqBAIJ,
2748                                        0,
2749                                        0,
2750                                /*109*/ 0,
2751                                        0,
2752                                        0,
2753                                        0,
2754                                        MatMissingDiagonal_SeqBAIJ,
2755                                /*114*/ 0,
2756                                        0,
2757                                        0,
2758                                        0,
2759                                        0,
2760                                /*119*/ 0,
2761                                        0,
2762                                        MatMultHermitianTranspose_SeqBAIJ,
2763                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2764                                        0,
2765                                /*124*/ 0,
2766                                        0,
2767                                        MatInvertBlockDiagonal_SeqBAIJ,
2768                                        0,
2769                                        0,
2770                                /*129*/ 0,
2771                                        0,
2772                                        0,
2773                                        0,
2774                                        0,
2775                                /*134*/ 0,
2776                                        0,
2777                                        0,
2778                                        0,
2779                                        0,
2780                                /*139*/ 0,
2781                                        0,
2782                                        0,
2783                                        MatFDColoringSetUp_SeqXAIJ,
2784                                        0,
2785                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2786 };
2787 
2788 #undef __FUNCT__
2789 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2790 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2791 {
2792   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2793   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2794   PetscErrorCode ierr;
2795 
2796   PetscFunctionBegin;
2797   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2798 
2799   /* allocate space for values if not already there */
2800   if (!aij->saved_values) {
2801     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2802     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2803   }
2804 
2805   /* copy values over */
2806   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2807   PetscFunctionReturn(0);
2808 }
2809 
2810 #undef __FUNCT__
2811 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2812 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2813 {
2814   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2815   PetscErrorCode ierr;
2816   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2817 
2818   PetscFunctionBegin;
2819   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2820   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2821 
2822   /* copy values over */
2823   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2824   PetscFunctionReturn(0);
2825 }
2826 
2827 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2828 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2829 
2830 #undef __FUNCT__
2831 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2832 static PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2833 {
2834   Mat_SeqBAIJ    *b;
2835   PetscErrorCode ierr;
2836   PetscInt       i,mbs,nbs,bs2;
2837   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2838 
2839   PetscFunctionBegin;
2840   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2841   if (nz == MAT_SKIP_ALLOCATION) {
2842     skipallocation = PETSC_TRUE;
2843     nz             = 0;
2844   }
2845 
2846   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2847   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2848   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2849   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2850 
2851   B->preallocated = PETSC_TRUE;
2852 
2853   mbs = B->rmap->n/bs;
2854   nbs = B->cmap->n/bs;
2855   bs2 = bs*bs;
2856 
2857   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2858 
2859   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2860   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2861   if (nnz) {
2862     for (i=0; i<mbs; i++) {
2863       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2864       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2865     }
2866   }
2867 
2868   b    = (Mat_SeqBAIJ*)B->data;
2869   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2870   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2871   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2872 
2873   if (!flg) {
2874     switch (bs) {
2875     case 1:
2876       B->ops->mult    = MatMult_SeqBAIJ_1;
2877       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2878       break;
2879     case 2:
2880       B->ops->mult    = MatMult_SeqBAIJ_2;
2881       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2882       break;
2883     case 3:
2884       B->ops->mult    = MatMult_SeqBAIJ_3;
2885       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2886       break;
2887     case 4:
2888       B->ops->mult    = MatMult_SeqBAIJ_4;
2889       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2890       break;
2891     case 5:
2892       B->ops->mult    = MatMult_SeqBAIJ_5;
2893       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2894       break;
2895     case 6:
2896       B->ops->mult    = MatMult_SeqBAIJ_6;
2897       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2898       break;
2899     case 7:
2900       B->ops->mult    = MatMult_SeqBAIJ_7;
2901       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2902       break;
2903     case 15:
2904       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2905       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2906       break;
2907     default:
2908       B->ops->mult    = MatMult_SeqBAIJ_N;
2909       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2910       break;
2911     }
2912   }
2913   B->ops->sor = MatSOR_SeqBAIJ;
2914   b->mbs = mbs;
2915   b->nbs = nbs;
2916   if (!skipallocation) {
2917     if (!b->imax) {
2918       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2919       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2920 
2921       b->free_imax_ilen = PETSC_TRUE;
2922     }
2923     /* b->ilen will count nonzeros in each block row so far. */
2924     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2925     if (!nnz) {
2926       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2927       else if (nz < 0) nz = 1;
2928       for (i=0; i<mbs; i++) b->imax[i] = nz;
2929       nz = nz*mbs;
2930     } else {
2931       nz = 0;
2932       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2933     }
2934 
2935     /* allocate the matrix space */
2936     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2937     ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2938     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2939     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2940     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2941 
2942     b->singlemalloc = PETSC_TRUE;
2943     b->i[0]         = 0;
2944     for (i=1; i<mbs+1; i++) {
2945       b->i[i] = b->i[i-1] + b->imax[i-1];
2946     }
2947     b->free_a  = PETSC_TRUE;
2948     b->free_ij = PETSC_TRUE;
2949   } else {
2950     b->free_a  = PETSC_FALSE;
2951     b->free_ij = PETSC_FALSE;
2952   }
2953 
2954   b->bs2              = bs2;
2955   b->mbs              = mbs;
2956   b->nz               = 0;
2957   b->maxnz            = nz;
2958   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2959   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2960   PetscFunctionReturn(0);
2961 }
2962 
2963 #undef __FUNCT__
2964 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2965 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2966 {
2967   PetscInt       i,m,nz,nz_max=0,*nnz;
2968   PetscScalar    *values=0;
2969   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2970   PetscErrorCode ierr;
2971 
2972   PetscFunctionBegin;
2973   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2974   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2975   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2976   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2977   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2978   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2979   m    = B->rmap->n/bs;
2980 
2981   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2982   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2983   for (i=0; i<m; i++) {
2984     nz = ii[i+1]- ii[i];
2985     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2986     nz_max = PetscMax(nz_max, nz);
2987     nnz[i] = nz;
2988   }
2989   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2990   ierr = PetscFree(nnz);CHKERRQ(ierr);
2991 
2992   values = (PetscScalar*)V;
2993   if (!values) {
2994     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2995   }
2996   for (i=0; i<m; i++) {
2997     PetscInt          ncols  = ii[i+1] - ii[i];
2998     const PetscInt    *icols = jj + ii[i];
2999     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3000     if (!roworiented) {
3001       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3002     } else {
3003       PetscInt j;
3004       for (j=0; j<ncols; j++) {
3005         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3006         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
3007       }
3008     }
3009   }
3010   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3011   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3012   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3013   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3014   PetscFunctionReturn(0);
3015 }
3016 
3017 /*MC
3018    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3019    block sparse compressed row format.
3020 
3021    Options Database Keys:
3022 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3023 
3024   Level: beginner
3025 
3026 .seealso: MatCreateSeqBAIJ()
3027 M*/
3028 
3029 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3030 
3031 #undef __FUNCT__
3032 #define __FUNCT__ "MatCreate_SeqBAIJ"
3033 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3034 {
3035   PetscErrorCode ierr;
3036   PetscMPIInt    size;
3037   Mat_SeqBAIJ    *b;
3038 
3039   PetscFunctionBegin;
3040   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3041   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3042 
3043   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3044   B->data = (void*)b;
3045   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3046 
3047   b->row          = 0;
3048   b->col          = 0;
3049   b->icol         = 0;
3050   b->reallocs     = 0;
3051   b->saved_values = 0;
3052 
3053   b->roworiented        = PETSC_TRUE;
3054   b->nonew              = 0;
3055   b->diag               = 0;
3056   B->spptr              = 0;
3057   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3058   b->keepnonzeropattern = PETSC_FALSE;
3059 
3060   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3062   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3064   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3065   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3066   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3067   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3068   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3069   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3070   PetscFunctionReturn(0);
3071 }
3072 
3073 #undef __FUNCT__
3074 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3075 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3076 {
3077   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3078   PetscErrorCode ierr;
3079   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3080 
3081   PetscFunctionBegin;
3082   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3083 
3084   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3085     c->imax           = a->imax;
3086     c->ilen           = a->ilen;
3087     c->free_imax_ilen = PETSC_FALSE;
3088   } else {
3089     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3090     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3091     for (i=0; i<mbs; i++) {
3092       c->imax[i] = a->imax[i];
3093       c->ilen[i] = a->ilen[i];
3094     }
3095     c->free_imax_ilen = PETSC_TRUE;
3096   }
3097 
3098   /* allocate the matrix space */
3099   if (mallocmatspace) {
3100     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3101       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3102       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3103 
3104       c->i            = a->i;
3105       c->j            = a->j;
3106       c->singlemalloc = PETSC_FALSE;
3107       c->free_a       = PETSC_TRUE;
3108       c->free_ij      = PETSC_FALSE;
3109       c->parent       = A;
3110       C->preallocated = PETSC_TRUE;
3111       C->assembled    = PETSC_TRUE;
3112 
3113       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3114       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3115       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3116     } else {
3117       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3118       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3119 
3120       c->singlemalloc = PETSC_TRUE;
3121       c->free_a       = PETSC_TRUE;
3122       c->free_ij      = PETSC_TRUE;
3123 
3124       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3125       if (mbs > 0) {
3126         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3127         if (cpvalues == MAT_COPY_VALUES) {
3128           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3129         } else {
3130           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3131         }
3132       }
3133       C->preallocated = PETSC_TRUE;
3134       C->assembled    = PETSC_TRUE;
3135     }
3136   }
3137 
3138   c->roworiented = a->roworiented;
3139   c->nonew       = a->nonew;
3140 
3141   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3142   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3143 
3144   c->bs2         = a->bs2;
3145   c->mbs         = a->mbs;
3146   c->nbs         = a->nbs;
3147 
3148   if (a->diag) {
3149     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3150       c->diag      = a->diag;
3151       c->free_diag = PETSC_FALSE;
3152     } else {
3153       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3154       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3155       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3156       c->free_diag = PETSC_TRUE;
3157     }
3158   } else c->diag = 0;
3159 
3160   c->nz         = a->nz;
3161   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3162   c->solve_work = NULL;
3163   c->mult_work  = NULL;
3164   c->sor_workt  = NULL;
3165   c->sor_work   = NULL;
3166 
3167   c->compressedrow.use   = a->compressedrow.use;
3168   c->compressedrow.nrows = a->compressedrow.nrows;
3169   if (a->compressedrow.use) {
3170     i    = a->compressedrow.nrows;
3171     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3172     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3173     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3174     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3175   } else {
3176     c->compressedrow.use    = PETSC_FALSE;
3177     c->compressedrow.i      = NULL;
3178     c->compressedrow.rindex = NULL;
3179   }
3180   C->nonzerostate = A->nonzerostate;
3181 
3182   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3183   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3184   PetscFunctionReturn(0);
3185 }
3186 
3187 #undef __FUNCT__
3188 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3189 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3190 {
3191   PetscErrorCode ierr;
3192 
3193   PetscFunctionBegin;
3194   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3195   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3196   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3197   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3198   PetscFunctionReturn(0);
3199 }
3200 
3201 #undef __FUNCT__
3202 #define __FUNCT__ "MatLoad_SeqBAIJ"
3203 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3204 {
3205   Mat_SeqBAIJ    *a;
3206   PetscErrorCode ierr;
3207   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3208   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3209   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3210   PetscInt       *masked,nmask,tmp,bs2,ishift;
3211   PetscMPIInt    size;
3212   int            fd;
3213   PetscScalar    *aa;
3214   MPI_Comm       comm;
3215 
3216   PetscFunctionBegin;
3217   /* force binary viewer to load .info file if it has not yet done so */
3218   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3219   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3220   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3221   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3222   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3223   if (bs < 0) bs = 1;
3224   bs2  = bs*bs;
3225 
3226   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3227   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3228   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3229   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3230   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3231   M = header[1]; N = header[2]; nz = header[3];
3232 
3233   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3234   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3235 
3236   /*
3237      This code adds extra rows to make sure the number of rows is
3238     divisible by the blocksize
3239   */
3240   mbs        = M/bs;
3241   extra_rows = bs - M + bs*(mbs);
3242   if (extra_rows == bs) extra_rows = 0;
3243   else mbs++;
3244   if (extra_rows) {
3245     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3246   }
3247 
3248   /* Set global sizes if not already set */
3249   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3250     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3251   } else { /* Check if the matrix global sizes are correct */
3252     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3253     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3254       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3255     }
3256     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3257   }
3258 
3259   /* read in row lengths */
3260   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3261   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3262   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3263 
3264   /* read in column indices */
3265   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3266   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3267   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3268 
3269   /* loop over row lengths determining block row lengths */
3270   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3271   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3272   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3273   rowcount = 0;
3274   nzcount  = 0;
3275   for (i=0; i<mbs; i++) {
3276     nmask = 0;
3277     for (j=0; j<bs; j++) {
3278       kmax = rowlengths[rowcount];
3279       for (k=0; k<kmax; k++) {
3280         tmp = jj[nzcount++]/bs;
3281         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3282       }
3283       rowcount++;
3284     }
3285     browlengths[i] += nmask;
3286     /* zero out the mask elements we set */
3287     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3288   }
3289 
3290   /* Do preallocation  */
3291   ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr);
3292   a    = (Mat_SeqBAIJ*)newmat->data;
3293 
3294   /* set matrix "i" values */
3295   a->i[0] = 0;
3296   for (i=1; i<= mbs; i++) {
3297     a->i[i]      = a->i[i-1] + browlengths[i-1];
3298     a->ilen[i-1] = browlengths[i-1];
3299   }
3300   a->nz = 0;
3301   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3302 
3303   /* read in nonzero values */
3304   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3305   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3306   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3307 
3308   /* set "a" and "j" values into matrix */
3309   nzcount = 0; jcount = 0;
3310   for (i=0; i<mbs; i++) {
3311     nzcountb = nzcount;
3312     nmask    = 0;
3313     for (j=0; j<bs; j++) {
3314       kmax = rowlengths[i*bs+j];
3315       for (k=0; k<kmax; k++) {
3316         tmp = jj[nzcount++]/bs;
3317         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3318       }
3319     }
3320     /* sort the masked values */
3321     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3322 
3323     /* set "j" values into matrix */
3324     maskcount = 1;
3325     for (j=0; j<nmask; j++) {
3326       a->j[jcount++]  = masked[j];
3327       mask[masked[j]] = maskcount++;
3328     }
3329     /* set "a" values into matrix */
3330     ishift = bs2*a->i[i];
3331     for (j=0; j<bs; j++) {
3332       kmax = rowlengths[i*bs+j];
3333       for (k=0; k<kmax; k++) {
3334         tmp       = jj[nzcountb]/bs;
3335         block     = mask[tmp] - 1;
3336         point     = jj[nzcountb] - bs*tmp;
3337         idx       = ishift + bs2*block + j + bs*point;
3338         a->a[idx] = (MatScalar)aa[nzcountb++];
3339       }
3340     }
3341     /* zero out the mask elements we set */
3342     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3343   }
3344   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3345 
3346   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3347   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3348   ierr = PetscFree(aa);CHKERRQ(ierr);
3349   ierr = PetscFree(jj);CHKERRQ(ierr);
3350   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3351 
3352   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3353   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3354   PetscFunctionReturn(0);
3355 }
3356 
3357 #undef __FUNCT__
3358 #define __FUNCT__ "MatCreateSeqBAIJ"
3359 /*@C
3360    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3361    compressed row) format.  For good matrix assembly performance the
3362    user should preallocate the matrix storage by setting the parameter nz
3363    (or the array nnz).  By setting these parameters accurately, performance
3364    during matrix assembly can be increased by more than a factor of 50.
3365 
3366    Collective on MPI_Comm
3367 
3368    Input Parameters:
3369 +  comm - MPI communicator, set to PETSC_COMM_SELF
3370 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3371           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3372 .  m - number of rows
3373 .  n - number of columns
3374 .  nz - number of nonzero blocks  per block row (same for all rows)
3375 -  nnz - array containing the number of nonzero blocks in the various block rows
3376          (possibly different for each block row) or NULL
3377 
3378    Output Parameter:
3379 .  A - the matrix
3380 
3381    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3382    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3383    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3384 
3385    Options Database Keys:
3386 .   -mat_no_unroll - uses code that does not unroll the loops in the
3387                      block calculations (much slower)
3388 .    -mat_block_size - size of the blocks to use
3389 
3390    Level: intermediate
3391 
3392    Notes:
3393    The number of rows and columns must be divisible by blocksize.
3394 
3395    If the nnz parameter is given then the nz parameter is ignored
3396 
3397    A nonzero block is any block that as 1 or more nonzeros in it
3398 
3399    The block AIJ format is fully compatible with standard Fortran 77
3400    storage.  That is, the stored row and column indices can begin at
3401    either one (as in Fortran) or zero.  See the users' manual for details.
3402 
3403    Specify the preallocated storage with either nz or nnz (not both).
3404    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3405    allocation.  See Users-Manual: ch_mat for details.
3406    matrices.
3407 
3408 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3409 @*/
3410 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3411 {
3412   PetscErrorCode ierr;
3413 
3414   PetscFunctionBegin;
3415   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3416   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3417   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3418   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3419   PetscFunctionReturn(0);
3420 }
3421 
3422 #undef __FUNCT__
3423 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3424 /*@C
3425    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3426    per row in the matrix. For good matrix assembly performance the
3427    user should preallocate the matrix storage by setting the parameter nz
3428    (or the array nnz).  By setting these parameters accurately, performance
3429    during matrix assembly can be increased by more than a factor of 50.
3430 
3431    Collective on MPI_Comm
3432 
3433    Input Parameters:
3434 +  B - the matrix
3435 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3436           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3437 .  nz - number of block nonzeros per block row (same for all rows)
3438 -  nnz - array containing the number of block nonzeros in the various block rows
3439          (possibly different for each block row) or NULL
3440 
3441    Options Database Keys:
3442 .   -mat_no_unroll - uses code that does not unroll the loops in the
3443                      block calculations (much slower)
3444 .   -mat_block_size - size of the blocks to use
3445 
3446    Level: intermediate
3447 
3448    Notes:
3449    If the nnz parameter is given then the nz parameter is ignored
3450 
3451    You can call MatGetInfo() to get information on how effective the preallocation was;
3452    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3453    You can also run with the option -info and look for messages with the string
3454    malloc in them to see if additional memory allocation was needed.
3455 
3456    The block AIJ format is fully compatible with standard Fortran 77
3457    storage.  That is, the stored row and column indices can begin at
3458    either one (as in Fortran) or zero.  See the users' manual for details.
3459 
3460    Specify the preallocated storage with either nz or nnz (not both).
3461    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3462    allocation.  See Users-Manual: ch_mat for details.
3463 
3464 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3465 @*/
3466 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3467 {
3468   PetscErrorCode ierr;
3469 
3470   PetscFunctionBegin;
3471   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3472   PetscValidType(B,1);
3473   PetscValidLogicalCollectiveInt(B,bs,2);
3474   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3475   PetscFunctionReturn(0);
3476 }
3477 
3478 #undef __FUNCT__
3479 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3480 /*@C
3481    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3482    (the default sequential PETSc format).
3483 
3484    Collective on MPI_Comm
3485 
3486    Input Parameters:
3487 +  B - the matrix
3488 .  i - the indices into j for the start of each local row (starts with zero)
3489 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3490 -  v - optional values in the matrix
3491 
3492    Level: developer
3493 
3494    Notes:
3495    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3496    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3497    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3498    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3499    block column and the second index is over columns within a block.
3500 
3501 .keywords: matrix, aij, compressed row, sparse
3502 
3503 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3504 @*/
3505 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3506 {
3507   PetscErrorCode ierr;
3508 
3509   PetscFunctionBegin;
3510   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3511   PetscValidType(B,1);
3512   PetscValidLogicalCollectiveInt(B,bs,2);
3513   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3514   PetscFunctionReturn(0);
3515 }
3516 
3517 
3518 #undef __FUNCT__
3519 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3520 /*@
3521      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3522 
3523      Collective on MPI_Comm
3524 
3525    Input Parameters:
3526 +  comm - must be an MPI communicator of size 1
3527 .  bs - size of block
3528 .  m - number of rows
3529 .  n - number of columns
3530 .  i - row indices
3531 .  j - column indices
3532 -  a - matrix values
3533 
3534    Output Parameter:
3535 .  mat - the matrix
3536 
3537    Level: advanced
3538 
3539    Notes:
3540        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3541     once the matrix is destroyed
3542 
3543        You cannot set new nonzero locations into this matrix, that will generate an error.
3544 
3545        The i and j indices are 0 based
3546 
3547        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3548 
3549       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3550       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3551       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3552       with column-major ordering within blocks.
3553 
3554 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3555 
3556 @*/
3557 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3558 {
3559   PetscErrorCode ierr;
3560   PetscInt       ii;
3561   Mat_SeqBAIJ    *baij;
3562 
3563   PetscFunctionBegin;
3564   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3565   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3566 
3567   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3568   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3569   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3570   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3571   baij = (Mat_SeqBAIJ*)(*mat)->data;
3572   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3573   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3574 
3575   baij->i = i;
3576   baij->j = j;
3577   baij->a = a;
3578 
3579   baij->singlemalloc = PETSC_FALSE;
3580   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3581   baij->free_a       = PETSC_FALSE;
3582   baij->free_ij      = PETSC_FALSE;
3583 
3584   for (ii=0; ii<m; ii++) {
3585     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3586 #if defined(PETSC_USE_DEBUG)
3587     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3588 #endif
3589   }
3590 #if defined(PETSC_USE_DEBUG)
3591   for (ii=0; ii<baij->i[m]; ii++) {
3592     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3593     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3594   }
3595 #endif
3596 
3597   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3598   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3599   PetscFunctionReturn(0);
3600 }
3601 
3602 #undef __FUNCT__
3603 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ"
3604 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3605 {
3606   PetscErrorCode ierr;
3607 
3608   PetscFunctionBegin;
3609   ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3610   PetscFunctionReturn(0);
3611 }
3612