xref: /petsc/src/mat/impls/baij/seq/baij.c (revision cc0fae110111c7ed8c32c86910770502c6cc577e)
1 
2 #ifdef PETSC_RCS_HEADER
3 static char vcid[] = "$Id: baij.c,v 1.118 1997/12/01 01:55:02 bsmith Exp bsmith $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 
11 #include "pinclude/pviewer.h"
12 #include "sys.h"
13 #include "src/mat/impls/baij/seq/baij.h"
14 #include "src/vec/vecimpl.h"
15 #include "src/inline/spops.h"
16 #include "petsc.h"
17 
18 
19 /*
20      Adds diagonal pointers to sparse matrix structure.
21 */
22 
23 #undef __FUNC__
24 #define __FUNC__ "MatMarkDiag_SeqBAIJ"
25 int MatMarkDiag_SeqBAIJ(Mat A)
26 {
27   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
28   int         i,j, *diag, m = a->mbs;
29 
30   PetscFunctionBegin;
31   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
32   PLogObjectMemory(A,(m+1)*sizeof(int));
33   for ( i=0; i<m; i++ ) {
34     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
35       if (a->j[j] == i) {
36         diag[i] = j;
37         break;
38       }
39     }
40   }
41   a->diag = diag;
42   PetscFunctionReturn(0);
43 }
44 
45 
46 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
47 
48 #undef __FUNC__
49 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
50 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
51                             PetscTruth *done)
52 {
53   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
54   int         ierr, n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59   if (symmetric) {
60     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
61   } else if (oshift == 1) {
62     /* temporarily add 1 to i and j indices */
63     int nz = a->i[n] + 1;
64     for ( i=0; i<nz; i++ ) a->j[i]++;
65     for ( i=0; i<n+1; i++ ) a->i[i]++;
66     *ia = a->i; *ja = a->j;
67   } else {
68     *ia = a->i; *ja = a->j;
69   }
70 
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNC__
75 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
76 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
77                                 PetscTruth *done)
78 {
79   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
80   int         i,n = a->mbs;
81 
82   PetscFunctionBegin;
83   if (!ia) PetscFunctionReturn(0);
84   if (symmetric) {
85     PetscFree(*ia);
86     PetscFree(*ja);
87   } else if (oshift == 1) {
88     int nz = a->i[n];
89     for ( i=0; i<nz; i++ ) a->j[i]--;
90     for ( i=0; i<n+1; i++ ) a->i[i]--;
91   }
92   PetscFunctionReturn(0);
93 }
94 
95 
96 #undef __FUNC__
97 #define __FUNC__ "MatView_SeqBAIJ_Binary"
98 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
99 {
100   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
101   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
102   Scalar      *aa;
103 
104   PetscFunctionBegin;
105   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
106   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
107   col_lens[0] = MAT_COOKIE;
108 
109   col_lens[1] = a->m;
110   col_lens[2] = a->n;
111   col_lens[3] = a->nz*bs2;
112 
113   /* store lengths of each row and write (including header) to file */
114   count = 0;
115   for ( i=0; i<a->mbs; i++ ) {
116     for ( j=0; j<bs; j++ ) {
117       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
118     }
119   }
120   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1); CHKERRQ(ierr);
121   PetscFree(col_lens);
122 
123   /* store column indices (zero start index) */
124   jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj);
125   count = 0;
126   for ( i=0; i<a->mbs; i++ ) {
127     for ( j=0; j<bs; j++ ) {
128       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
129         for ( l=0; l<bs; l++ ) {
130           jj[count++] = bs*a->j[k] + l;
131         }
132       }
133     }
134   }
135   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0); CHKERRQ(ierr);
136   PetscFree(jj);
137 
138   /* store nonzero values */
139   aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa);
140   count = 0;
141   for ( i=0; i<a->mbs; i++ ) {
142     for ( j=0; j<bs; j++ ) {
143       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
144         for ( l=0; l<bs; l++ ) {
145           aa[count++] = a->a[bs2*k + l*bs + j];
146         }
147       }
148     }
149   }
150   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0); CHKERRQ(ierr);
151   PetscFree(aa);
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNC__
156 #define __FUNC__ "MatView_SeqBAIJ_ASCII"
157 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
158 {
159   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
160   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
161   FILE        *fd;
162   char        *outputname;
163 
164   PetscFunctionBegin;
165   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
166   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
167   ierr = ViewerGetFormat(viewer,&format);
168   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
169     fprintf(fd,"  block size is %d\n",bs);
170   }
171   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
172     SETERRQ(PETSC_ERR_SUP,0,"Matlab format not supported");
173   }
174   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
175     for ( i=0; i<a->mbs; i++ ) {
176       for ( j=0; j<bs; j++ ) {
177         fprintf(fd,"row %d:",i*bs+j);
178         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
179           for ( l=0; l<bs; l++ ) {
180 #if defined(USE_PETSC_COMPLEX)
181           if (imag(a->a[bs2*k + l*bs + j]) > 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
182             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
183               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
184           else if (imag(a->a[bs2*k + l*bs + j]) < 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
185             fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
186               real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j]));
187           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
188             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
189 #else
190           if (a->a[bs2*k + l*bs + j] != 0.0)
191             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
192 #endif
193           }
194         }
195         fprintf(fd,"\n");
196       }
197     }
198   }
199   else {
200     for ( i=0; i<a->mbs; i++ ) {
201       for ( j=0; j<bs; j++ ) {
202         fprintf(fd,"row %d:",i*bs+j);
203         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
204           for ( l=0; l<bs; l++ ) {
205 #if defined(USE_PETSC_COMPLEX)
206           if (imag(a->a[bs2*k + l*bs + j]) > 0.0) {
207             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
208               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
209           }
210           else if (imag(a->a[bs2*k + l*bs + j]) < 0.0) {
211             fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
212               real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j]));
213           }
214           else {
215             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
216           }
217 #else
218             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
219 #endif
220           }
221         }
222         fprintf(fd,"\n");
223       }
224     }
225   }
226   fflush(fd);
227   PetscFunctionReturn(0);
228 }
229 
230 #undef __FUNC__
231 #define __FUNC__ "MatView_SeqBAIJ_Draw"
232 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
233 {
234   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
235   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
236   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
237   Scalar       *aa;
238   Draw         draw;
239   DrawButton   button;
240   PetscTruth   isnull;
241 
242   PetscFunctionBegin;
243   ierr = ViewerDrawGetDraw(viewer,&draw);CHKERRQ(ierr);
244   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
245 
246   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
247   xr += w;    yr += h;  xl = -w;     yl = -h;
248   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
249   /* loop over matrix elements drawing boxes */
250   color = DRAW_BLUE;
251   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
252     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
253       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
254       x_l = a->j[j]*bs; x_r = x_l + 1.0;
255       aa = a->a + j*bs2;
256       for ( k=0; k<bs; k++ ) {
257         for ( l=0; l<bs; l++ ) {
258           if (PetscReal(*aa++) >=  0.) continue;
259           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
260         }
261       }
262     }
263   }
264   color = DRAW_CYAN;
265   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
266     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
267       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
268       x_l = a->j[j]*bs; x_r = x_l + 1.0;
269       aa = a->a + j*bs2;
270       for ( k=0; k<bs; k++ ) {
271         for ( l=0; l<bs; l++ ) {
272           if (PetscReal(*aa++) != 0.) continue;
273           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
274         }
275       }
276     }
277   }
278 
279   color = DRAW_RED;
280   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
281     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
282       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
283       x_l = a->j[j]*bs; x_r = x_l + 1.0;
284       aa = a->a + j*bs2;
285       for ( k=0; k<bs; k++ ) {
286         for ( l=0; l<bs; l++ ) {
287           if (PetscReal(*aa++) <= 0.) continue;
288           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
289         }
290       }
291     }
292   }
293 
294   DrawSynchronizedFlush(draw);
295   DrawGetPause(draw,&pause);
296   if (pause >= 0) { PetscSleep(pause); PetscFunctionReturn(0);}
297 
298   /* allow the matrix to zoom or shrink */
299   ierr = DrawSynchronizedGetMouseButton(draw,&button,&xc,&yc,0,0);
300   while (button != BUTTON_RIGHT) {
301     DrawSynchronizedClear(draw);
302     if (button == BUTTON_LEFT) scale = .5;
303     else if (button == BUTTON_CENTER) scale = 2.;
304     xl = scale*(xl + w - xc) + xc - w*scale;
305     xr = scale*(xr - w - xc) + xc + w*scale;
306     yl = scale*(yl + h - yc) + yc - h*scale;
307     yr = scale*(yr - h - yc) + yc + h*scale;
308     w *= scale; h *= scale;
309     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
310     color = DRAW_BLUE;
311     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
312       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
313         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
314         x_l = a->j[j]*bs; x_r = x_l + 1.0;
315         aa = a->a + j*bs2;
316         for ( k=0; k<bs; k++ ) {
317           for ( l=0; l<bs; l++ ) {
318             if (PetscReal(*aa++) >=  0.) continue;
319             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
320           }
321         }
322       }
323     }
324     color = DRAW_CYAN;
325     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
326       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
327         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
328         x_l = a->j[j]*bs; x_r = x_l + 1.0;
329         aa = a->a + j*bs2;
330         for ( k=0; k<bs; k++ ) {
331           for ( l=0; l<bs; l++ ) {
332           if (PetscReal(*aa++) != 0.) continue;
333           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
334           }
335         }
336       }
337     }
338 
339     color = DRAW_RED;
340     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
341       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
342         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
343         x_l = a->j[j]*bs; x_r = x_l + 1.0;
344         aa = a->a + j*bs2;
345         for ( k=0; k<bs; k++ ) {
346           for ( l=0; l<bs; l++ ) {
347             if (PetscReal(*aa++) <= 0.) continue;
348             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
349           }
350         }
351       }
352     }
353     ierr = DrawSynchronizedGetMouseButton(draw,&button,&xc,&yc,0,0);
354   }
355   PetscFunctionReturn(0);
356 }
357 
358 #undef __FUNC__
359 #define __FUNC__ "MatView_SeqBAIJ"
360 int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
361 {
362   Mat         A = (Mat) obj;
363   ViewerType  vtype;
364   int         ierr;
365 
366   PetscFunctionBegin;
367   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
368   if (vtype == MATLAB_VIEWER) {
369     SETERRQ(PETSC_ERR_SUP,0,"Matlab viewer not supported");
370   } else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
371     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
372   } else if (vtype == BINARY_FILE_VIEWER) {
373     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
374   } else if (vtype == DRAW_VIEWER) {
375     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
376   }
377   PetscFunctionReturn(0);
378 }
379 
380 #define CHUNKSIZE  10
381 
382 #undef __FUNC__
383 #define __FUNC__ "MatSetValues_SeqBAIJ"
384 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
385 {
386   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
387   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
388   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
389   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
390   int          ridx,cidx,bs2=a->bs2;
391   Scalar      *ap,value,*aa=a->a,*bap;
392 
393   PetscFunctionBegin;
394   for ( k=0; k<m; k++ ) { /* loop over added rows */
395     row  = im[k]; brow = row/bs;
396 #if defined(USE_PETSC_BOPT_g)
397     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
398     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
399 #endif
400     rp   = aj + ai[brow];
401     ap   = aa + bs2*ai[brow];
402     rmax = imax[brow];
403     nrow = ailen[brow];
404     low  = 0;
405     for ( l=0; l<n; l++ ) { /* loop over added columns */
406 #if defined(USE_PETSC_BOPT_g)
407       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
408       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
409 #endif
410       col = in[l]; bcol = col/bs;
411       ridx = row % bs; cidx = col % bs;
412       if (roworiented) {
413         value = *v++;
414       } else {
415         value = v[k + l*m];
416       }
417       if (!sorted) low = 0; high = nrow;
418       while (high-low > 7) {
419         t = (low+high)/2;
420         if (rp[t] > bcol) high = t;
421         else              low  = t;
422       }
423       for ( i=low; i<high; i++ ) {
424         if (rp[i] > bcol) break;
425         if (rp[i] == bcol) {
426           bap  = ap +  bs2*i + bs*cidx + ridx;
427           if (is == ADD_VALUES) *bap += value;
428           else                  *bap  = value;
429           goto noinsert1;
430         }
431       }
432       if (nonew == 1) goto noinsert1;
433       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
434       if (nrow >= rmax) {
435         /* there is no extra room in row, therefore enlarge */
436         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
437         Scalar *new_a;
438 
439         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
440 
441         /* Malloc new storage space */
442         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
443         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
444         new_j   = (int *) (new_a + bs2*new_nz);
445         new_i   = new_j + new_nz;
446 
447         /* copy over old data into new slots */
448         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
449         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
450         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
451         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
452         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
453                                                            len*sizeof(int));
454         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
455         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
456         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
457                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
458         /* free up old matrix storage */
459         PetscFree(a->a);
460         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
461         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
462         a->singlemalloc = 1;
463 
464         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
465         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
466         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
467         a->maxnz += bs2*CHUNKSIZE;
468         a->reallocs++;
469         a->nz++;
470       }
471       N = nrow++ - 1;
472       /* shift up all the later entries in this row */
473       for ( ii=N; ii>=i; ii-- ) {
474         rp[ii+1] = rp[ii];
475         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
476       }
477       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
478       rp[i]                      = bcol;
479       ap[bs2*i + bs*cidx + ridx] = value;
480       noinsert1:;
481       low = i;
482     }
483     ailen[brow] = nrow;
484   }
485   PetscFunctionReturn(0);
486 }
487 
488 #undef __FUNC__
489 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
490 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
491 {
492   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
493   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
494   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented;
495   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval;
496   Scalar      *ap,*value=v,*aa=a->a,*bap;
497 
498   PetscFunctionBegin;
499   if (roworiented) {
500     stepval = (n-1)*bs;
501   } else {
502     stepval = (m-1)*bs;
503   }
504   for ( k=0; k<m; k++ ) { /* loop over added rows */
505     row  = im[k];
506 #if defined(USE_PETSC_BOPT_g)
507     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
508     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
509 #endif
510     rp   = aj + ai[row];
511     ap   = aa + bs2*ai[row];
512     rmax = imax[row];
513     nrow = ailen[row];
514     low  = 0;
515     for ( l=0; l<n; l++ ) { /* loop over added columns */
516 #if defined(USE_PETSC_BOPT_g)
517       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
518       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
519 #endif
520       col = in[l];
521       if (roworiented) {
522         value = v + k*(stepval+bs)*bs + l*bs;
523       } else {
524         value = v + l*(stepval+bs)*bs + k*bs;
525       }
526       if (!sorted) low = 0; high = nrow;
527       while (high-low > 7) {
528         t = (low+high)/2;
529         if (rp[t] > col) high = t;
530         else             low  = t;
531       }
532       for ( i=low; i<high; i++ ) {
533         if (rp[i] > col) break;
534         if (rp[i] == col) {
535           bap  = ap +  bs2*i;
536           if (roworiented) {
537             if (is == ADD_VALUES) {
538               for ( ii=0; ii<bs; ii++,value+=stepval ) {
539                 for (jj=ii; jj<bs2; jj+=bs ) {
540                   bap[jj] += *value++;
541                 }
542               }
543             } else {
544               for ( ii=0; ii<bs; ii++,value+=stepval ) {
545                 for (jj=ii; jj<bs2; jj+=bs ) {
546                   bap[jj] = *value++;
547                 }
548               }
549             }
550           } else {
551             if (is == ADD_VALUES) {
552               for ( ii=0; ii<bs; ii++,value+=stepval ) {
553                 for (jj=0; jj<bs; jj++ ) {
554                   *bap++ += *value++;
555                 }
556               }
557             } else {
558               for ( ii=0; ii<bs; ii++,value+=stepval ) {
559                 for (jj=0; jj<bs; jj++ ) {
560                   *bap++  = *value++;
561                 }
562               }
563             }
564           }
565           goto noinsert2;
566         }
567       }
568       if (nonew == 1) goto noinsert2;
569       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
570       if (nrow >= rmax) {
571         /* there is no extra room in row, therefore enlarge */
572         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
573         Scalar *new_a;
574 
575         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
576 
577         /* malloc new storage space */
578         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
579         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
580         new_j   = (int *) (new_a + bs2*new_nz);
581         new_i   = new_j + new_nz;
582 
583         /* copy over old data into new slots */
584         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
585         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
586         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
587         len = (new_nz - CHUNKSIZE - ai[row] - nrow);
588         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));
589         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar));
590         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
591         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar));
592         /* free up old matrix storage */
593         PetscFree(a->a);
594         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
595         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
596         a->singlemalloc = 1;
597 
598         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
599         rmax = imax[row] = imax[row] + CHUNKSIZE;
600         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
601         a->maxnz += bs2*CHUNKSIZE;
602         a->reallocs++;
603         a->nz++;
604       }
605       N = nrow++ - 1;
606       /* shift up all the later entries in this row */
607       for ( ii=N; ii>=i; ii-- ) {
608         rp[ii+1] = rp[ii];
609         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
610       }
611       if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
612       rp[i] = col;
613       bap   = ap +  bs2*i;
614       if (roworiented) {
615         for ( ii=0; ii<bs; ii++,value+=stepval) {
616           for (jj=ii; jj<bs2; jj+=bs ) {
617             bap[jj] = *value++;
618           }
619         }
620       } else {
621         for ( ii=0; ii<bs; ii++,value+=stepval ) {
622           for (jj=0; jj<bs; jj++ ) {
623             *bap++  = *value++;
624           }
625         }
626       }
627       noinsert2:;
628       low = i;
629     }
630     ailen[row] = nrow;
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 #undef __FUNC__
636 #define __FUNC__ "MatGetSize_SeqBAIJ"
637 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
638 {
639   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
640 
641   PetscFunctionBegin;
642   if (m) *m = a->m;
643   if (n) *n = a->n;
644   PetscFunctionReturn(0);
645 }
646 
647 #undef __FUNC__
648 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
649 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
650 {
651   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
652 
653   PetscFunctionBegin;
654   *m = 0; *n = a->m;
655   PetscFunctionReturn(0);
656 }
657 
658 #undef __FUNC__
659 #define __FUNC__ "MatGetRow_SeqBAIJ"
660 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
661 {
662   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
663   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
664   Scalar      *aa,*v_i,*aa_i;
665 
666   PetscFunctionBegin;
667   bs  = a->bs;
668   ai  = a->i;
669   aj  = a->j;
670   aa  = a->a;
671   bs2 = a->bs2;
672 
673   if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range");
674 
675   bn  = row/bs;   /* Block number */
676   bp  = row % bs; /* Block Position */
677   M   = ai[bn+1] - ai[bn];
678   *nz = bs*M;
679 
680   if (v) {
681     *v = 0;
682     if (*nz) {
683       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
684       for ( i=0; i<M; i++ ) { /* for each block in the block row */
685         v_i  = *v + i*bs;
686         aa_i = aa + bs2*(ai[bn] + i);
687         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
688       }
689     }
690   }
691 
692   if (idx) {
693     *idx = 0;
694     if (*nz) {
695       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
696       for ( i=0; i<M; i++ ) { /* for each block in the block row */
697         idx_i = *idx + i*bs;
698         itmp  = bs*aj[ai[bn] + i];
699         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
700       }
701     }
702   }
703   PetscFunctionReturn(0);
704 }
705 
706 #undef __FUNC__
707 #define __FUNC__ "MatRestoreRow_SeqBAIJ"
708 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
709 {
710   PetscFunctionBegin;
711   if (idx) {if (*idx) PetscFree(*idx);}
712   if (v)   {if (*v)   PetscFree(*v);}
713   PetscFunctionReturn(0);
714 }
715 
716 #undef __FUNC__
717 #define __FUNC__ "MatTranspose_SeqBAIJ"
718 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
719 {
720   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
721   Mat         C;
722   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
723   int         *rows,*cols,bs2=a->bs2;
724   Scalar      *array=a->a;
725 
726   PetscFunctionBegin;
727   if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place");
728   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
729   PetscMemzero(col,(1+nbs)*sizeof(int));
730 
731   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
732   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
733   PetscFree(col);
734   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
735   cols = rows + bs;
736   for ( i=0; i<mbs; i++ ) {
737     cols[0] = i*bs;
738     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
739     len = ai[i+1] - ai[i];
740     for ( j=0; j<len; j++ ) {
741       rows[0] = (*aj++)*bs;
742       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
743       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
744       array += bs2;
745     }
746   }
747   PetscFree(rows);
748 
749   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
750   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
751 
752   if (B != PETSC_NULL) {
753     *B = C;
754   } else {
755     /* This isn't really an in-place transpose */
756     PetscFree(a->a);
757     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
758     if (a->diag) PetscFree(a->diag);
759     if (a->ilen) PetscFree(a->ilen);
760     if (a->imax) PetscFree(a->imax);
761     if (a->solve_work) PetscFree(a->solve_work);
762     PetscFree(a);
763     PetscMemcpy(A,C,sizeof(struct _p_Mat));
764     PetscHeaderDestroy(C);
765   }
766   PetscFunctionReturn(0);
767 }
768 
769 
770 #undef __FUNC__
771 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
772 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
773 {
774   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
775   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
776   int        m = a->m,*ip, N, *ailen = a->ilen;
777   int        mbs = a->mbs, bs2 = a->bs2,rmax = 0;
778   Scalar     *aa = a->a, *ap;
779 
780   PetscFunctionBegin;
781   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
782 
783   if (m) rmax = ailen[0];
784   for ( i=1; i<mbs; i++ ) {
785     /* move each row back by the amount of empty slots (fshift) before it*/
786     fshift += imax[i-1] - ailen[i-1];
787     rmax   = PetscMax(rmax,ailen[i]);
788     if (fshift) {
789       ip = aj + ai[i]; ap = aa + bs2*ai[i];
790       N = ailen[i];
791       for ( j=0; j<N; j++ ) {
792         ip[j-fshift] = ip[j];
793         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
794       }
795     }
796     ai[i] = ai[i-1] + ailen[i-1];
797   }
798   if (mbs) {
799     fshift += imax[mbs-1] - ailen[mbs-1];
800     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
801   }
802   /* reset ilen and imax for each row */
803   for ( i=0; i<mbs; i++ ) {
804     ailen[i] = imax[i] = ai[i+1] - ai[i];
805   }
806   a->nz = ai[mbs];
807 
808   /* diagonals may have moved, so kill the diagonal pointers */
809   if (fshift && a->diag) {
810     PetscFree(a->diag);
811     PLogObjectMemory(A,-(m+1)*sizeof(int));
812     a->diag = 0;
813   }
814   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
815            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
816   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
817            a->reallocs);
818   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
819   a->reallocs          = 0;
820   A->info.nz_unneeded  = (double)fshift*bs2;
821 
822   PetscFunctionReturn(0);
823 }
824 
825 #undef __FUNC__
826 #define __FUNC__ "MatZeroEntries_SeqBAIJ"
827 int MatZeroEntries_SeqBAIJ(Mat A)
828 {
829   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
830 
831   PetscFunctionBegin;
832   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
833   PetscFunctionReturn(0);
834 }
835 
836 #undef __FUNC__
837 #define __FUNC__ "MatDestroy_SeqBAIJ"
838 int MatDestroy_SeqBAIJ(PetscObject obj)
839 {
840   Mat         A  = (Mat) obj;
841   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
842 
843 #if defined(USE_PETSC_LOG)
844   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
845 #endif
846   PetscFree(a->a);
847   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
848   if (a->diag) PetscFree(a->diag);
849   if (a->ilen) PetscFree(a->ilen);
850   if (a->imax) PetscFree(a->imax);
851   if (a->solve_work) PetscFree(a->solve_work);
852   if (a->mult_work) PetscFree(a->mult_work);
853   PetscFree(a);
854   PLogObjectDestroy(A);
855   PetscHeaderDestroy(A);
856   PetscFunctionReturn(0);
857 }
858 
859 #undef __FUNC__
860 #define __FUNC__ "MatSetOption_SeqBAIJ"
861 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
862 {
863   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
864 
865   PetscFunctionBegin;
866   if      (op == MAT_ROW_ORIENTED)                 a->roworiented = 1;
867   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented = 0;
868   else if (op == MAT_COLUMNS_SORTED)               a->sorted      = 1;
869   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted      = 0;
870   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew       = 1;
871   else if (op == MAT_NEW_NONZERO_LOCATION_ERROR)   a->nonew       = -1;
872   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew       = -2;
873   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew       = 0;
874   else if (op == MAT_ROWS_SORTED ||
875            op == MAT_ROWS_UNSORTED ||
876            op == MAT_SYMMETRIC ||
877            op == MAT_STRUCTURALLY_SYMMETRIC ||
878            op == MAT_YES_NEW_DIAGONALS ||
879            op == MAT_IGNORE_OFF_PROC_ENTRIES) {
880     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
881   } else if (op == MAT_NO_NEW_DIAGONALS) {
882     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
883   } else {
884     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
885   }
886   PetscFunctionReturn(0);
887 }
888 
889 
890 /* -------------------------------------------------------*/
891 /* Should check that shapes of vectors and matrices match */
892 /* -------------------------------------------------------*/
893 #include "pinclude/plapack.h"
894 
895 #undef __FUNC__
896 #define __FUNC__ "MatMult_SeqBAIJ_1"
897 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
898 {
899   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
900   register Scalar *x,*z,*v,sum;
901   int             mbs=a->mbs,i,*idx,*ii,n;
902 
903   PetscFunctionBegin;
904   VecGetArray_Fast(xx,x);
905   VecGetArray_Fast(zz,z);
906 
907   idx   = a->j;
908   v     = a->a;
909   ii    = a->i;
910 
911   for ( i=0; i<mbs; i++ ) {
912     n    = ii[1] - ii[0]; ii++;
913     sum  = 0.0;
914     while (n--) sum += *v++ * x[*idx++];
915     z[i] = sum;
916   }
917   VecRestoreArray_Fast(xx,x);
918   VecRestoreArray_Fast(zz,z);
919   PLogFlops(2*a->nz - a->m);
920   PetscFunctionReturn(0);
921 }
922 
923 #undef __FUNC__
924 #define __FUNC__ "MatMult_SeqBAIJ_2"
925 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
926 {
927   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
928   register Scalar *x,*z,*v,*xb,sum1,sum2;
929   register Scalar x1,x2;
930   int             mbs=a->mbs,i,*idx,*ii,j,n;
931 
932   PetscFunctionBegin;
933   VecGetArray_Fast(xx,x);
934   VecGetArray_Fast(zz,z);
935 
936   idx   = a->j;
937   v     = a->a;
938   ii    = a->i;
939 
940   for ( i=0; i<mbs; i++ ) {
941     n  = ii[1] - ii[0]; ii++;
942     sum1 = 0.0; sum2 = 0.0;
943     for ( j=0; j<n; j++ ) {
944       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
945       sum1 += v[0]*x1 + v[2]*x2;
946       sum2 += v[1]*x1 + v[3]*x2;
947       v += 4;
948     }
949     z[0] = sum1; z[1] = sum2;
950     z += 2;
951   }
952   VecRestoreArray_Fast(xx,x);
953   VecRestoreArray_Fast(zz,z);
954   PLogFlops(4*a->nz - a->m);
955   PetscFunctionReturn(0);
956 }
957 
958 #undef __FUNC__
959 #define __FUNC__ "MatMult_SeqBAIJ_3"
960 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
961 {
962   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
963   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
964   int             mbs=a->mbs,i,*idx,*ii,j,n;
965 
966   PetscFunctionBegin;
967   VecGetArray_Fast(xx,x);
968   VecGetArray_Fast(zz,z);
969 
970   idx   = a->j;
971   v     = a->a;
972   ii    = a->i;
973 
974   for ( i=0; i<mbs; i++ ) {
975     n  = ii[1] - ii[0]; ii++;
976     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
977     for ( j=0; j<n; j++ ) {
978       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
979       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
980       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
981       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
982       v += 9;
983     }
984     z[0] = sum1; z[1] = sum2; z[2] = sum3;
985     z += 3;
986   }
987   VecRestoreArray_Fast(xx,x);
988   VecRestoreArray_Fast(zz,z);
989   PLogFlops(18*a->nz - a->m);
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNC__
994 #define __FUNC__ "MatMult_SeqBAIJ_4"
995 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
996 {
997   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
998   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4;
999   int             mbs=a->mbs,i,*idx,*ii,j,n;
1000 
1001   PetscFunctionBegin;
1002   VecGetArray_Fast(xx,x);
1003   VecGetArray_Fast(zz,z);
1004 
1005   idx   = a->j;
1006   v     = a->a;
1007   ii    = a->i;
1008 
1009   for ( i=0; i<mbs; i++ ) {
1010     n  = ii[1] - ii[0]; ii++;
1011     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
1012     for ( j=0; j<n; j++ ) {
1013       xb = x + 4*(*idx++);
1014       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1015       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1016       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1017       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1018       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1019       v += 16;
1020     }
1021     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1022     z += 4;
1023   }
1024   VecRestoreArray_Fast(xx,x);
1025   VecRestoreArray_Fast(zz,z);
1026   PLogFlops(32*a->nz - a->m);
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 #undef __FUNC__
1031 #define __FUNC__ "MatMult_SeqBAIJ_5"
1032 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
1033 {
1034   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1035   register Scalar sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
1036   register Scalar * restrict v,* restrict xb,* restrict z, * restrict x;
1037   int             mbs=a->mbs,i,*idx,*ii,j,n;
1038 
1039   VecGetArray_Fast(xx,x);
1040   VecGetArray_Fast(zz,z);
1041 
1042   idx   = a->j;
1043   v     = a->a;
1044   ii    = a->i;
1045 
1046   for ( i=0; i<mbs; i++ ) {
1047     n  = ii[1] - ii[0]; ii++;
1048     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
1049     for ( j=0; j<n; j++ ) {
1050       xb = x + 5*(*idx++);
1051       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1052       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1053       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1054       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1055       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1056       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1057       v += 25;
1058     }
1059     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1060     z += 5;
1061   }
1062   VecRestoreArray_Fast(xx,x);
1063   VecRestoreArray_Fast(zz,z);
1064   PLogFlops(50*a->nz - a->m);
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNC__
1069 #define __FUNC__ "MatMult_SeqBAIJ_7"
1070 static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
1071 {
1072   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1073   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
1074   register Scalar x1,x2,x3,x4,x5,x6,x7;
1075   int             mbs=a->mbs,i,*idx,*ii,j,n;
1076 
1077   VecGetArray_Fast(xx,x);
1078   VecGetArray_Fast(zz,z);
1079 
1080   idx   = a->j;
1081   v     = a->a;
1082   ii    = a->i;
1083 
1084   for ( i=0; i<mbs; i++ ) {
1085     n  = ii[1] - ii[0]; ii++;
1086     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
1087     for ( j=0; j<n; j++ ) {
1088       xb = x + 7*(*idx++);
1089       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1090       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1091       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1092       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1093       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1094       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1095       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1096       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1097       v += 49;
1098     }
1099     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1100     z += 7;
1101   }
1102 
1103   VecRestoreArray_Fast(xx,x);
1104   VecRestoreArray_Fast(zz,z);
1105   PLogFlops(98*a->nz - a->m);
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNC__
1110 #define __FUNC__ "MatMult_SeqBAIJ_N"
1111 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
1112 {
1113   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1114   register Scalar *x,*z,*v,*xb;
1115   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
1116   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
1117   int             _One = 1,ncols,k;
1118 
1119   PetscFunctionBegin;
1120   VecGetArray_Fast(xx,x);
1121   VecGetArray_Fast(zz,z);
1122 
1123   idx   = a->j;
1124   v     = a->a;
1125   ii    = a->i;
1126 
1127 
1128   if (!a->mult_work) {
1129     k = PetscMax(a->m,a->n);
1130     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
1131   }
1132   work = a->mult_work;
1133   for ( i=0; i<mbs; i++ ) {
1134     n     = ii[1] - ii[0]; ii++;
1135     ncols = n*bs;
1136     workt = work;
1137     for ( j=0; j<n; j++ ) {
1138       xb = x + bs*(*idx++);
1139       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1140       workt += bs;
1141     }
1142     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
1143     v += n*bs2;
1144     z += bs;
1145   }
1146   VecRestoreArray_Fast(xx,x);
1147   VecRestoreArray_Fast(zz,z);
1148   PLogFlops(2*a->nz*bs2 - a->m);
1149   PetscFunctionReturn(0);
1150 }
1151 
1152 #undef __FUNC__
1153 #define __FUNC__ "MatMultAdd_SeqBAIJ_1"
1154 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
1155 {
1156   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1157   register Scalar *x,*y,*z,*v,sum;
1158   int             mbs=a->mbs,i,*idx,*ii,n;
1159 
1160   PetscFunctionBegin;
1161   VecGetArray_Fast(xx,x);
1162   VecGetArray_Fast(yy,y);
1163   VecGetArray_Fast(zz,z);
1164 
1165   idx   = a->j;
1166   v     = a->a;
1167   ii    = a->i;
1168 
1169   for ( i=0; i<mbs; i++ ) {
1170     n    = ii[1] - ii[0]; ii++;
1171     sum  = y[i];
1172     while (n--) sum += *v++ * x[*idx++];
1173     z[i] = sum;
1174   }
1175   VecRestoreArray_Fast(xx,x);
1176   VecRestoreArray_Fast(yy,y);
1177   VecRestoreArray_Fast(zz,z);
1178   PLogFlops(2*a->nz);
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 #undef __FUNC__
1183 #define __FUNC__ "MatMultAdd_SeqBAIJ_2"
1184 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
1185 {
1186   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1187   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
1188   register Scalar x1,x2;
1189   int             mbs=a->mbs,i,*idx,*ii,j,n;
1190 
1191   PetscFunctionBegin;
1192   VecGetArray_Fast(xx,x);
1193   VecGetArray_Fast(yy,y);
1194   VecGetArray_Fast(zz,z);
1195 
1196   idx   = a->j;
1197   v     = a->a;
1198   ii    = a->i;
1199 
1200   for ( i=0; i<mbs; i++ ) {
1201     n  = ii[1] - ii[0]; ii++;
1202     sum1 = y[0]; sum2 = y[1];
1203     for ( j=0; j<n; j++ ) {
1204       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
1205       sum1 += v[0]*x1 + v[2]*x2;
1206       sum2 += v[1]*x1 + v[3]*x2;
1207       v += 4;
1208     }
1209     z[0] = sum1; z[1] = sum2;
1210     z += 2; y += 2;
1211   }
1212   VecRestoreArray_Fast(xx,x);
1213   VecRestoreArray_Fast(yy,y);
1214   VecRestoreArray_Fast(zz,z);
1215   PLogFlops(4*a->nz);
1216   PetscFunctionReturn(0);
1217 }
1218 
1219 #undef __FUNC__
1220 #define __FUNC__ "MatMultAdd_SeqBAIJ_3"
1221 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
1222 {
1223   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1224   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
1225   int             mbs=a->mbs,i,*idx,*ii,j,n;
1226 
1227   PetscFunctionBegin;
1228   VecGetArray_Fast(xx,x);
1229   VecGetArray_Fast(yy,y);
1230   VecGetArray_Fast(zz,z);
1231 
1232   idx   = a->j;
1233   v     = a->a;
1234   ii    = a->i;
1235 
1236   for ( i=0; i<mbs; i++ ) {
1237     n  = ii[1] - ii[0]; ii++;
1238     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
1239     for ( j=0; j<n; j++ ) {
1240       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1241       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
1242       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
1243       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
1244       v += 9;
1245     }
1246     z[0] = sum1; z[1] = sum2; z[2] = sum3;
1247     z += 3; y += 3;
1248   }
1249   VecRestoreArray_Fast(xx,x);
1250   VecRestoreArray_Fast(yy,y);
1251   VecRestoreArray_Fast(zz,z);
1252   PLogFlops(18*a->nz);
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 #undef __FUNC__
1257 #define __FUNC__ "MatMultAdd_SeqBAIJ_4"
1258 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
1259 {
1260   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1261   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4;
1262   int             mbs=a->mbs,i,*idx,*ii;
1263   int             j,n;
1264 
1265   PetscFunctionBegin;
1266   VecGetArray_Fast(xx,x);
1267   VecGetArray_Fast(yy,y);
1268   VecGetArray_Fast(zz,z);
1269 
1270   idx   = a->j;
1271   v     = a->a;
1272   ii    = a->i;
1273 
1274   for ( i=0; i<mbs; i++ ) {
1275     n  = ii[1] - ii[0]; ii++;
1276     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1277     for ( j=0; j<n; j++ ) {
1278       xb = x + 4*(*idx++);
1279       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1280       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1281       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1282       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1283       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1284       v += 16;
1285     }
1286     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1287     z += 4; y += 4;
1288   }
1289   VecRestoreArray_Fast(xx,x);
1290   VecRestoreArray_Fast(yy,y);
1291   VecRestoreArray_Fast(zz,z);
1292   PLogFlops(32*a->nz);
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 #undef __FUNC__
1297 #define __FUNC__ "MatMultAdd_SeqBAIJ_5"
1298 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1299 {
1300   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1301   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
1302   int             mbs=a->mbs,i,*idx,*ii,j,n;
1303 
1304   PetscFunctionBegin;
1305   VecGetArray_Fast(xx,x);
1306   VecGetArray_Fast(yy,y);
1307   VecGetArray_Fast(zz,z);
1308 
1309   idx   = a->j;
1310   v     = a->a;
1311   ii    = a->i;
1312 
1313   for ( i=0; i<mbs; i++ ) {
1314     n  = ii[1] - ii[0]; ii++;
1315     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1316     for ( j=0; j<n; j++ ) {
1317       xb = x + 5*(*idx++);
1318       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1319       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1320       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1321       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1322       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1323       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1324       v += 25;
1325     }
1326     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1327     z += 5; y += 5;
1328   }
1329   VecRestoreArray_Fast(xx,x);
1330   VecRestoreArray_Fast(yy,y);
1331   VecRestoreArray_Fast(zz,z);
1332   PLogFlops(50*a->nz);
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 #undef __FUNC__
1337 #define __FUNC__ "MatMultAdd_SeqBAIJ_7"
1338 static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1339 {
1340   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1341   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
1342   register Scalar x1,x2,x3,x4,x5,x6,x7;
1343   int             mbs=a->mbs,i,*idx,*ii,j,n;
1344 
1345   PetscFunctionBegin;
1346   VecGetArray_Fast(xx,x);
1347   VecGetArray_Fast(yy,y);
1348   VecGetArray_Fast(zz,z);
1349 
1350   idx   = a->j;
1351   v     = a->a;
1352   ii    = a->i;
1353 
1354   for ( i=0; i<mbs; i++ ) {
1355     n  = ii[1] - ii[0]; ii++;
1356     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
1357     for ( j=0; j<n; j++ ) {
1358       xb = x + 7*(*idx++);
1359       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1360       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1361       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1362       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1363       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1364       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1365       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1366       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1367       v += 49;
1368     }
1369     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1370     z += 7; y += 7;
1371   }
1372   VecRestoreArray_Fast(xx,x);
1373   VecRestoreArray_Fast(yy,y);
1374   VecRestoreArray_Fast(zz,z);
1375   PLogFlops(98*a->nz);
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 
1380 #undef __FUNC__
1381 #define __FUNC__ "MatMultAdd_SeqBAIJ_N"
1382 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1383 {
1384   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1385   register Scalar *x,*z,*v,*xb;
1386   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1387   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1388 
1389   PetscFunctionBegin;
1390   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1391 
1392   VecGetArray_Fast(xx,x);
1393   VecGetArray_Fast(zz,z);
1394 
1395   idx   = a->j;
1396   v     = a->a;
1397   ii    = a->i;
1398 
1399 
1400   if (!a->mult_work) {
1401     k = PetscMax(a->m,a->n);
1402     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1403   }
1404   work = a->mult_work;
1405   for ( i=0; i<mbs; i++ ) {
1406     n     = ii[1] - ii[0]; ii++;
1407     ncols = n*bs;
1408     workt = work;
1409     for ( j=0; j<n; j++ ) {
1410       xb = x + bs*(*idx++);
1411       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1412       workt += bs;
1413     }
1414     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1415     v += n*bs2;
1416     z += bs;
1417   }
1418   VecRestoreArray_Fast(xx,x);
1419   VecRestoreArray_Fast(zz,z);
1420   PLogFlops(2*a->nz*bs2 );
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNC__
1425 #define __FUNC__ "MatMultTrans_SeqBAIJ"
1426 int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1427 {
1428   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1429   Scalar          *xg,*zg,*zb;
1430   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1431   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1432   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1433 
1434 
1435   PetscFunctionBegin;
1436   VecGetArray_Fast(xx,xg); x = xg;
1437   VecGetArray_Fast(zz,zg); z = zg;
1438   PetscMemzero(z,N*sizeof(Scalar));
1439 
1440   idx   = a->j;
1441   v     = a->a;
1442   ii    = a->i;
1443 
1444   switch (bs) {
1445   case 1:
1446     for ( i=0; i<mbs; i++ ) {
1447       n  = ii[1] - ii[0]; ii++;
1448       xb = x + i; x1 = xb[0];
1449       ib = idx + ai[i];
1450       for ( j=0; j<n; j++ ) {
1451         rval    = ib[j];
1452         z[rval] += *v++ * x1;
1453       }
1454     }
1455     break;
1456   case 2:
1457     for ( i=0; i<mbs; i++ ) {
1458       n  = ii[1] - ii[0]; ii++;
1459       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1460       ib = idx + ai[i];
1461       for ( j=0; j<n; j++ ) {
1462         rval      = ib[j]*2;
1463         z[rval++] += v[0]*x1 + v[1]*x2;
1464         z[rval++] += v[2]*x1 + v[3]*x2;
1465         v += 4;
1466       }
1467     }
1468     break;
1469   case 3:
1470     for ( i=0; i<mbs; i++ ) {
1471       n  = ii[1] - ii[0]; ii++;
1472       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1473       ib = idx + ai[i];
1474       for ( j=0; j<n; j++ ) {
1475         rval      = ib[j]*3;
1476         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1477         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1478         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1479         v += 9;
1480       }
1481     }
1482     break;
1483   case 4:
1484     for ( i=0; i<mbs; i++ ) {
1485       n  = ii[1] - ii[0]; ii++;
1486       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1487       ib = idx + ai[i];
1488       for ( j=0; j<n; j++ ) {
1489         rval      = ib[j]*4;
1490         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1491         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1492         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1493         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1494         v += 16;
1495       }
1496     }
1497     break;
1498   case 5:
1499     for ( i=0; i<mbs; i++ ) {
1500       n  = ii[1] - ii[0]; ii++;
1501       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1502       x4 = xb[3];   x5 = xb[4];
1503       ib = idx + ai[i];
1504       for ( j=0; j<n; j++ ) {
1505         rval      = ib[j]*5;
1506         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1507         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1508         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1509         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1510         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1511         v += 25;
1512       }
1513     }
1514     break;
1515       /* block sizes larger then 5 by 5 are handled by BLAS */
1516     default: {
1517       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1518       if (!a->mult_work) {
1519         k = PetscMax(a->m,a->n);
1520         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1521         CHKPTRQ(a->mult_work);
1522       }
1523       work = a->mult_work;
1524       for ( i=0; i<mbs; i++ ) {
1525         n     = ii[1] - ii[0]; ii++;
1526         ncols = n*bs;
1527         PetscMemzero(work,ncols*sizeof(Scalar));
1528         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1529         v += n*bs2;
1530         x += bs;
1531         workt = work;
1532         for ( j=0; j<n; j++ ) {
1533           zb = z + bs*(*idx++);
1534           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1535           workt += bs;
1536         }
1537       }
1538     }
1539   }
1540   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1541   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1542   PLogFlops(2*a->nz*a->bs2 - a->n);
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 #undef __FUNC__
1547 #define __FUNC__ "MatMultTransAdd_SeqBAIJ"
1548 int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1549 {
1550   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1551   Scalar          *xg,*zg,*zb;
1552   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1553   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1554   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1555 
1556   PetscFunctionBegin;
1557   VecGetArray_Fast(xx,xg); x = xg;
1558   VecGetArray_Fast(zz,zg); z = zg;
1559 
1560   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1561   else PetscMemzero(z,N*sizeof(Scalar));
1562 
1563   idx   = a->j;
1564   v     = a->a;
1565   ii    = a->i;
1566 
1567   switch (bs) {
1568   case 1:
1569     for ( i=0; i<mbs; i++ ) {
1570       n  = ii[1] - ii[0]; ii++;
1571       xb = x + i; x1 = xb[0];
1572       ib = idx + ai[i];
1573       for ( j=0; j<n; j++ ) {
1574         rval    = ib[j];
1575         z[rval] += *v++ * x1;
1576       }
1577     }
1578     break;
1579   case 2:
1580     for ( i=0; i<mbs; i++ ) {
1581       n  = ii[1] - ii[0]; ii++;
1582       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1583       ib = idx + ai[i];
1584       for ( j=0; j<n; j++ ) {
1585         rval      = ib[j]*2;
1586         z[rval++] += v[0]*x1 + v[1]*x2;
1587         z[rval++] += v[2]*x1 + v[3]*x2;
1588         v += 4;
1589       }
1590     }
1591     break;
1592   case 3:
1593     for ( i=0; i<mbs; i++ ) {
1594       n  = ii[1] - ii[0]; ii++;
1595       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1596       ib = idx + ai[i];
1597       for ( j=0; j<n; j++ ) {
1598         rval      = ib[j]*3;
1599         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1600         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1601         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1602         v += 9;
1603       }
1604     }
1605     break;
1606   case 4:
1607     for ( i=0; i<mbs; i++ ) {
1608       n  = ii[1] - ii[0]; ii++;
1609       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1610       ib = idx + ai[i];
1611       for ( j=0; j<n; j++ ) {
1612         rval      = ib[j]*4;
1613         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1614         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1615         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1616         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1617         v += 16;
1618       }
1619     }
1620     break;
1621   case 5:
1622     for ( i=0; i<mbs; i++ ) {
1623       n  = ii[1] - ii[0]; ii++;
1624       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1625       x4 = xb[3];   x5 = xb[4];
1626       ib = idx + ai[i];
1627       for ( j=0; j<n; j++ ) {
1628         rval      = ib[j]*5;
1629         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1630         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1631         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1632         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1633         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1634         v += 25;
1635       }
1636     }
1637     break;
1638       /* block sizes larger then 5 by 5 are handled by BLAS */
1639     default: {
1640       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1641       if (!a->mult_work) {
1642         k = PetscMax(a->m,a->n);
1643         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1644         CHKPTRQ(a->mult_work);
1645       }
1646       work = a->mult_work;
1647       for ( i=0; i<mbs; i++ ) {
1648         n     = ii[1] - ii[0]; ii++;
1649         ncols = n*bs;
1650         PetscMemzero(work,ncols*sizeof(Scalar));
1651         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1652         v += n*bs2;
1653         x += bs;
1654         workt = work;
1655         for ( j=0; j<n; j++ ) {
1656           zb = z + bs*(*idx++);
1657           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1658           workt += bs;
1659         }
1660       }
1661     }
1662   }
1663   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1664   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1665   PLogFlops(2*a->nz*a->bs2);
1666   PetscFunctionReturn(0);
1667 }
1668 
1669 #undef __FUNC__
1670 #define __FUNC__ "MatGetInfo_SeqBAIJ"
1671 int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1672 {
1673   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1674 
1675   PetscFunctionBegin;
1676   info->rows_global    = (double)a->m;
1677   info->columns_global = (double)a->n;
1678   info->rows_local     = (double)a->m;
1679   info->columns_local  = (double)a->n;
1680   info->block_size     = a->bs2;
1681   info->nz_allocated   = a->maxnz;
1682   info->nz_used        = a->bs2*a->nz;
1683   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1684   /*  if (info->nz_unneeded != A->info.nz_unneeded)
1685     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
1686   info->assemblies   = A->num_ass;
1687   info->mallocs      = a->reallocs;
1688   info->memory       = A->mem;
1689   if (A->factor) {
1690     info->fill_ratio_given  = A->info.fill_ratio_given;
1691     info->fill_ratio_needed = A->info.fill_ratio_needed;
1692     info->factor_mallocs    = A->info.factor_mallocs;
1693   } else {
1694     info->fill_ratio_given  = 0;
1695     info->fill_ratio_needed = 0;
1696     info->factor_mallocs    = 0;
1697   }
1698   PetscFunctionReturn(0);
1699 }
1700 
1701 #undef __FUNC__
1702 #define __FUNC__ "MatEqual_SeqBAIJ"
1703 int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
1704 {
1705   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1706 
1707   PetscFunctionBegin;
1708   if (B->type !=MATSEQBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1709 
1710   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1711   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| (a->nz != b->nz)) {
1712     *flg = PETSC_FALSE; PetscFunctionReturn(0);
1713   }
1714 
1715   /* if the a->i are the same */
1716   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
1717     *flg = PETSC_FALSE; PetscFunctionReturn(0);
1718   }
1719 
1720   /* if a->j are the same */
1721   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
1722     *flg = PETSC_FALSE; PetscFunctionReturn(0);
1723   }
1724 
1725   /* if a->a are the same */
1726   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
1727     *flg = PETSC_FALSE; PetscFunctionReturn(0);
1728   }
1729   *flg = PETSC_TRUE;
1730   PetscFunctionReturn(0);
1731 
1732 }
1733 
1734 #undef __FUNC__
1735 #define __FUNC__ "MatGetDiagonal_SeqBAIJ"
1736 int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1737 {
1738   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1739   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1740   Scalar      *x, zero = 0.0,*aa,*aa_j;
1741 
1742   PetscFunctionBegin;
1743   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Not for factored matrix");
1744   bs   = a->bs;
1745   aa   = a->a;
1746   ai   = a->i;
1747   aj   = a->j;
1748   ambs = a->mbs;
1749   bs2  = a->bs2;
1750 
1751   VecSet(&zero,v);
1752   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
1753   if (n != a->m) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming matrix and vector");
1754   for ( i=0; i<ambs; i++ ) {
1755     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1756       if (aj[j] == i) {
1757         row  = i*bs;
1758         aa_j = aa+j*bs2;
1759         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1760         break;
1761       }
1762     }
1763   }
1764   PetscFunctionReturn(0);
1765 }
1766 
1767 #undef __FUNC__
1768 #define __FUNC__ "MatDiagonalScale_SeqBAIJ"
1769 int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1770 {
1771   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1772   Scalar      *l,*r,x,*v,*aa,*li,*ri;
1773   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1774 
1775   PetscFunctionBegin;
1776   ai  = a->i;
1777   aj  = a->j;
1778   aa  = a->a;
1779   m   = a->m;
1780   n   = a->n;
1781   bs  = a->bs;
1782   mbs = a->mbs;
1783   bs2 = a->bs2;
1784   if (ll) {
1785     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
1786     if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector wrong length");
1787     for ( i=0; i<mbs; i++ ) { /* for each block row */
1788       M  = ai[i+1] - ai[i];
1789       li = l + i*bs;
1790       v  = aa + bs2*ai[i];
1791       for ( j=0; j<M; j++ ) { /* for each block */
1792         for ( k=0; k<bs2; k++ ) {
1793           (*v++) *= li[k%bs];
1794         }
1795       }
1796     }
1797   }
1798 
1799   if (rr) {
1800     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
1801     if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vector wrong length");
1802     for ( i=0; i<mbs; i++ ) { /* for each block row */
1803       M  = ai[i+1] - ai[i];
1804       v  = aa + bs2*ai[i];
1805       for ( j=0; j<M; j++ ) { /* for each block */
1806         ri = r + bs*aj[ai[i]+j];
1807         for ( k=0; k<bs; k++ ) {
1808           x = ri[k];
1809           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
1810         }
1811       }
1812     }
1813   }
1814   PetscFunctionReturn(0);
1815 }
1816 
1817 
1818 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1819 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1820 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1821 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1822 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
1823 
1824 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1825 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1826 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1827 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1828 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1829 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1830 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1831 
1832 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1833 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1834 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1835 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1836 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1837 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1838 
1839 #undef __FUNC__
1840 #define __FUNC__ "MatNorm_SeqBAIJ"
1841 int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
1842 {
1843   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1844   Scalar      *v = a->a;
1845   double      sum = 0.0;
1846   int         i,nz=a->nz,bs2=a->bs2;
1847 
1848   PetscFunctionBegin;
1849   if (type == NORM_FROBENIUS) {
1850     for (i=0; i< bs2*nz; i++ ) {
1851 #if defined(USE_PETSC_COMPLEX)
1852       sum += real(conj(*v)*(*v)); v++;
1853 #else
1854       sum += (*v)*(*v); v++;
1855 #endif
1856     }
1857     *norm = sqrt(sum);
1858   }
1859   else {
1860     SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
1861   }
1862   PetscFunctionReturn(0);
1863 }
1864 
1865 extern int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
1866 /*
1867      note: This can only work for identity for row and col. It would
1868    be good to check this and otherwise generate an error.
1869 */
1870 #undef __FUNC__
1871 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1872 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1873 {
1874   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1875   Mat         outA;
1876   int         ierr;
1877 
1878   PetscFunctionBegin;
1879   if (fill != 0) SETERRQ(PETSC_ERR_SUP,0,"Only fill=0 supported");
1880 
1881   outA          = inA;
1882   inA->factor   = FACTOR_LU;
1883   a->row        = row;
1884   a->col        = col;
1885 
1886   if (!a->solve_work) {
1887     a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1888     PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1889   }
1890 
1891   if (!a->diag) {
1892     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1893   }
1894   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1895 
1896   /*
1897       Blocksize 4 has a special faster solver for ILU(0) factorization
1898     with natural ordering
1899   */
1900   if (a->bs == 4) {
1901     inA->ops.solve = MatSolve_SeqBAIJ_4_NaturalOrdering;
1902   }
1903 
1904   PetscFunctionReturn(0);
1905 }
1906 
1907 #undef __FUNC__
1908 #define __FUNC__ "MatScale_SeqBAIJ"
1909 int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1910 {
1911   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1912   int         one = 1, totalnz = a->bs2*a->nz;
1913 
1914   PetscFunctionBegin;
1915   BLscal_( &totalnz, alpha, a->a, &one );
1916   PLogFlops(totalnz);
1917   PetscFunctionReturn(0);
1918 }
1919 
1920 #undef __FUNC__
1921 #define __FUNC__ "MatGetValues_SeqBAIJ"
1922 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1923 {
1924   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1925   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1926   int        *ai = a->i, *ailen = a->ilen;
1927   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1928   Scalar     *ap, *aa = a->a, zero = 0.0;
1929 
1930   PetscFunctionBegin;
1931   for ( k=0; k<m; k++ ) { /* loop over rows */
1932     row  = im[k]; brow = row/bs;
1933     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
1934     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
1935     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1936     nrow = ailen[brow];
1937     for ( l=0; l<n; l++ ) { /* loop over columns */
1938       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
1939       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
1940       col  = in[l] ;
1941       bcol = col/bs;
1942       cidx = col%bs;
1943       ridx = row%bs;
1944       high = nrow;
1945       low  = 0; /* assume unsorted */
1946       while (high-low > 5) {
1947         t = (low+high)/2;
1948         if (rp[t] > bcol) high = t;
1949         else             low  = t;
1950       }
1951       for ( i=low; i<high; i++ ) {
1952         if (rp[i] > bcol) break;
1953         if (rp[i] == bcol) {
1954           *v++ = ap[bs2*i+bs*cidx+ridx];
1955           goto finished;
1956         }
1957       }
1958       *v++ = zero;
1959       finished:;
1960     }
1961   }
1962   PetscFunctionReturn(0);
1963 }
1964 
1965 #undef __FUNC__
1966 #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
1967 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
1968 {
1969   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
1970 
1971   PetscFunctionBegin;
1972   *bs = baij->bs;
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 /* idx should be of length atlease bs */
1977 #undef __FUNC__
1978 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block"
1979 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1980 {
1981   int i,row;
1982 
1983   PetscFunctionBegin;
1984   row = idx[0];
1985   if (row%bs!=0) { *flg = PETSC_FALSE; PetscFunctionReturn(0); }
1986 
1987   for ( i=1; i<bs; i++ ) {
1988     if (row+i != idx[i]) { *flg = PETSC_FALSE; PetscFunctionReturn(0); }
1989   }
1990   *flg = PETSC_TRUE;
1991   PetscFunctionReturn(0);
1992 }
1993 
1994 #undef __FUNC__
1995 #define __FUNC__ "MatZeroRows_SeqBAIJ"
1996 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1997 {
1998   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1999   IS          is_local;
2000   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
2001   PetscTruth  flg;
2002   Scalar      zero = 0.0,*aa;
2003 
2004   PetscFunctionBegin;
2005   /* Make a copy of the IS and  sort it */
2006   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
2007   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
2008   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
2009   ierr = ISSort(is_local); CHKERRQ(ierr);
2010   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
2011 
2012   i = 0;
2013   while (i < is_n) {
2014     if (rows[i]<0 || rows[i]>m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"row out of range");
2015     flg = PETSC_FALSE;
2016     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
2017     if (flg) { /* There exists a block of rows to be Zerowed */
2018       baij->ilen[rows[i]/bs] = 0;
2019       i += bs;
2020     } else { /* Zero out only the requested row */
2021       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
2022       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
2023       for ( j=0; j<count; j++ ) {
2024         aa[0] = zero;
2025         aa+=bs;
2026       }
2027       i++;
2028     }
2029   }
2030   if (diag) {
2031     for ( j=0; j<is_n; j++ ) {
2032       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
2033     }
2034   }
2035   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
2036   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
2037   ierr = ISDestroy(is_local); CHKERRQ(ierr);
2038   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2039 
2040   PetscFunctionReturn(0);
2041 }
2042 
2043 #undef __FUNC__
2044 #define __FUNC__ "MatPrintHelp_SeqBAIJ"
2045 int MatPrintHelp_SeqBAIJ(Mat A)
2046 {
2047   static int called = 0;
2048   MPI_Comm   comm = A->comm;
2049 
2050   PetscFunctionBegin;
2051   if (called) {PetscFunctionReturn(0);} else called = 1;
2052   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
2053   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
2054   PetscFunctionReturn(0);
2055 }
2056 
2057 /* -------------------------------------------------------------------*/
2058 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
2059        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
2060        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
2061        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
2062        MatSolve_SeqBAIJ_N,0,
2063        0,0,
2064        MatLUFactor_SeqBAIJ,0,
2065        0,
2066        MatTranspose_SeqBAIJ,
2067        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
2068        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
2069        0,MatAssemblyEnd_SeqBAIJ,
2070        0,
2071        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
2072        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
2073        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
2074        MatILUFactorSymbolic_SeqBAIJ,0,
2075        0,0,
2076        MatConvertSameType_SeqBAIJ,0,0,
2077        MatILUFactor_SeqBAIJ,0,0,
2078        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
2079        MatGetValues_SeqBAIJ,0,
2080        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
2081        0,0,0,MatGetBlockSize_SeqBAIJ,
2082        MatGetRowIJ_SeqBAIJ,
2083        MatRestoreRowIJ_SeqBAIJ,
2084        0,0,0,0,0,0,
2085        MatSetValuesBlocked_SeqBAIJ};
2086 
2087 #undef __FUNC__
2088 #define __FUNC__ "MatCreateSeqBAIJ"
2089 /*@C
2090    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2091    compressed row) format.  For good matrix assembly performance the
2092    user should preallocate the matrix storage by setting the parameter nz
2093    (or the array nzz).  By setting these parameters accurately, performance
2094    during matrix assembly can be increased by more than a factor of 50.
2095 
2096    Input Parameters:
2097 .  comm - MPI communicator, set to PETSC_COMM_SELF
2098 .  bs - size of block
2099 .  m - number of rows
2100 .  n - number of columns
2101 .  nz - number of block nonzeros per block row (same for all rows)
2102 .  nzz - array containing the number of block nonzeros in the various block rows
2103          (possibly different for each block row) or PETSC_NULL
2104 
2105    Output Parameter:
2106 .  A - the matrix
2107 
2108    Options Database Keys:
2109 $    -mat_no_unroll - uses code that does not unroll the loops in the
2110 $                     block calculations (much slower)
2111 $    -mat_block_size - size of the blocks to use
2112 
2113    Notes:
2114    The block AIJ format is fully compatible with standard Fortran 77
2115    storage.  That is, the stored row and column indices can begin at
2116    either one (as in Fortran) or zero.  See the users' manual for details.
2117 
2118    Specify the preallocated storage with either nz or nnz (not both).
2119    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2120    allocation.  For additional details, see the users manual chapter on
2121    matrices.
2122 
2123 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2124 @*/
2125 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
2126 {
2127   Mat         B;
2128   Mat_SeqBAIJ *b;
2129   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
2130 
2131   PetscFunctionBegin;
2132   MPI_Comm_size(comm,&size);
2133   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1");
2134 
2135   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
2136 
2137   if (mbs*bs!=m || nbs*bs!=n) {
2138     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize");
2139   }
2140 
2141   *A = 0;
2142   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATSEQBAIJ,comm,MatDestroy,MatView);
2143   PLogObjectCreate(B);
2144   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
2145   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
2146   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
2147   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
2148   if (!flg) {
2149     switch (bs) {
2150     case 1:
2151       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2152       B->ops.solve           = MatSolve_SeqBAIJ_1;
2153       B->ops.mult            = MatMult_SeqBAIJ_1;
2154       B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
2155       break;
2156     case 2:
2157       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2158       B->ops.solve           = MatSolve_SeqBAIJ_2;
2159       B->ops.mult            = MatMult_SeqBAIJ_2;
2160       B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
2161       break;
2162     case 3:
2163       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2164       B->ops.solve           = MatSolve_SeqBAIJ_3;
2165       B->ops.mult            = MatMult_SeqBAIJ_3;
2166       B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
2167       break;
2168     case 4:
2169       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2170       B->ops.solve           = MatSolve_SeqBAIJ_4;
2171       B->ops.mult            = MatMult_SeqBAIJ_4;
2172       B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
2173       break;
2174     case 5:
2175       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2176       B->ops.solve           = MatSolve_SeqBAIJ_5;
2177       B->ops.mult            = MatMult_SeqBAIJ_5;
2178       B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
2179       break;
2180     case 7:
2181       B->ops.mult            = MatMult_SeqBAIJ_7;
2182       B->ops.solve           = MatSolve_SeqBAIJ_7;
2183       B->ops.multadd         = MatMultAdd_SeqBAIJ_7;
2184       break;
2185     }
2186   }
2187   B->destroy          = MatDestroy_SeqBAIJ;
2188   B->view             = MatView_SeqBAIJ;
2189   B->factor           = 0;
2190   B->lupivotthreshold = 1.0;
2191   B->mapping          = 0;
2192   b->row              = 0;
2193   b->col              = 0;
2194   b->reallocs         = 0;
2195 
2196   b->m       = m; B->m = m; B->M = m;
2197   b->n       = n; B->n = n; B->N = n;
2198   b->mbs     = mbs;
2199   b->nbs     = nbs;
2200   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
2201   if (nnz == PETSC_NULL) {
2202     if (nz == PETSC_DEFAULT) nz = 5;
2203     else if (nz <= 0)        nz = 1;
2204     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
2205     nz = nz*mbs;
2206   } else {
2207     nz = 0;
2208     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
2209   }
2210 
2211   /* allocate the matrix space */
2212   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
2213   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
2214   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
2215   b->j  = (int *) (b->a + nz*bs2);
2216   PetscMemzero(b->j,nz*sizeof(int));
2217   b->i  = b->j + nz;
2218   b->singlemalloc = 1;
2219 
2220   b->i[0] = 0;
2221   for (i=1; i<mbs+1; i++) {
2222     b->i[i] = b->i[i-1] + b->imax[i-1];
2223   }
2224 
2225   /* b->ilen will count nonzeros in each block row so far. */
2226   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
2227   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2228   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
2229 
2230   b->bs               = bs;
2231   b->bs2              = bs2;
2232   b->mbs              = mbs;
2233   b->nz               = 0;
2234   b->maxnz            = nz*bs2;
2235   b->sorted           = 0;
2236   b->roworiented      = 1;
2237   b->nonew            = 0;
2238   b->diag             = 0;
2239   b->solve_work       = 0;
2240   b->mult_work        = 0;
2241   b->spptr            = 0;
2242   B->info.nz_unneeded = (double)b->maxnz;
2243 
2244   *A = B;
2245   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
2246   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
2247   PetscFunctionReturn(0);
2248 }
2249 
2250 #undef __FUNC__
2251 #define __FUNC__ "MatConvertSameType_SeqBAIJ"
2252 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
2253 {
2254   Mat         C;
2255   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
2256   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
2257 
2258   PetscFunctionBegin;
2259   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix");
2260 
2261   *B = 0;
2262   PetscHeaderCreate(C,_p_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm,MatDestroy,MatView);
2263   PLogObjectCreate(C);
2264   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
2265   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
2266   C->destroy    = MatDestroy_SeqBAIJ;
2267   C->view       = MatView_SeqBAIJ;
2268   C->factor     = A->factor;
2269   c->row        = 0;
2270   c->col        = 0;
2271   C->assembled  = PETSC_TRUE;
2272 
2273   c->m = C->m   = a->m;
2274   c->n = C->n   = a->n;
2275   C->M          = a->m;
2276   C->N          = a->n;
2277 
2278   c->bs         = a->bs;
2279   c->bs2        = a->bs2;
2280   c->mbs        = a->mbs;
2281   c->nbs        = a->nbs;
2282 
2283   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
2284   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
2285   for ( i=0; i<mbs; i++ ) {
2286     c->imax[i] = a->imax[i];
2287     c->ilen[i] = a->ilen[i];
2288   }
2289 
2290   /* allocate the matrix space */
2291   c->singlemalloc = 1;
2292   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
2293   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
2294   c->j  = (int *) (c->a + nz*bs2);
2295   c->i  = c->j + nz;
2296   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
2297   if (mbs > 0) {
2298     PetscMemcpy(c->j,a->j,nz*sizeof(int));
2299     if (cpvalues == COPY_VALUES) {
2300       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
2301     }
2302   }
2303 
2304   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2305   c->sorted      = a->sorted;
2306   c->roworiented = a->roworiented;
2307   c->nonew       = a->nonew;
2308 
2309   if (a->diag) {
2310     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
2311     PLogObjectMemory(C,(mbs+1)*sizeof(int));
2312     for ( i=0; i<mbs; i++ ) {
2313       c->diag[i] = a->diag[i];
2314     }
2315   }
2316   else c->diag          = 0;
2317   c->nz                 = a->nz;
2318   c->maxnz              = a->maxnz;
2319   c->solve_work         = 0;
2320   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
2321   c->mult_work          = 0;
2322   *B = C;
2323   PetscFunctionReturn(0);
2324 }
2325 
2326 #undef __FUNC__
2327 #define __FUNC__ "MatLoad_SeqBAIJ"
2328 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
2329 {
2330   Mat_SeqBAIJ  *a;
2331   Mat          B;
2332   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
2333   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2334   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2335   int          *masked, nmask,tmp,bs2,ishift;
2336   Scalar       *aa;
2337   MPI_Comm     comm = ((PetscObject) viewer)->comm;
2338 
2339   PetscFunctionBegin;
2340   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2341   bs2  = bs*bs;
2342 
2343   MPI_Comm_size(comm,&size);
2344   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor");
2345   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2346   ierr = PetscBinaryRead(fd,header,4,PETSC_INT); CHKERRQ(ierr);
2347   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object");
2348   M = header[1]; N = header[2]; nz = header[3];
2349 
2350   if (header[3] < 0) {
2351     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ");
2352   }
2353 
2354   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2355 
2356   /*
2357      This code adds extra rows to make sure the number of rows is
2358     divisible by the blocksize
2359   */
2360   mbs        = M/bs;
2361   extra_rows = bs - M + bs*(mbs);
2362   if (extra_rows == bs) extra_rows = 0;
2363   else                  mbs++;
2364   if (extra_rows) {
2365     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
2366   }
2367 
2368   /* read in row lengths */
2369   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2370   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
2371   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2372 
2373   /* read in column indices */
2374   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
2375   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT); CHKERRQ(ierr);
2376   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
2377 
2378   /* loop over row lengths determining block row lengths */
2379   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
2380   PetscMemzero(browlengths,mbs*sizeof(int));
2381   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
2382   PetscMemzero(mask,mbs*sizeof(int));
2383   masked = mask + mbs;
2384   rowcount = 0; nzcount = 0;
2385   for ( i=0; i<mbs; i++ ) {
2386     nmask = 0;
2387     for ( j=0; j<bs; j++ ) {
2388       kmax = rowlengths[rowcount];
2389       for ( k=0; k<kmax; k++ ) {
2390         tmp = jj[nzcount++]/bs;
2391         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2392       }
2393       rowcount++;
2394     }
2395     browlengths[i] += nmask;
2396     /* zero out the mask elements we set */
2397     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2398   }
2399 
2400   /* create our matrix */
2401   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
2402   B = *A;
2403   a = (Mat_SeqBAIJ *) B->data;
2404 
2405   /* set matrix "i" values */
2406   a->i[0] = 0;
2407   for ( i=1; i<= mbs; i++ ) {
2408     a->i[i]      = a->i[i-1] + browlengths[i-1];
2409     a->ilen[i-1] = browlengths[i-1];
2410   }
2411   a->nz         = 0;
2412   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
2413 
2414   /* read in nonzero values */
2415   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
2416   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR); CHKERRQ(ierr);
2417   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2418 
2419   /* set "a" and "j" values into matrix */
2420   nzcount = 0; jcount = 0;
2421   for ( i=0; i<mbs; i++ ) {
2422     nzcountb = nzcount;
2423     nmask    = 0;
2424     for ( j=0; j<bs; j++ ) {
2425       kmax = rowlengths[i*bs+j];
2426       for ( k=0; k<kmax; k++ ) {
2427         tmp = jj[nzcount++]/bs;
2428 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2429       }
2430       rowcount++;
2431     }
2432     /* sort the masked values */
2433     PetscSortInt(nmask,masked);
2434 
2435     /* set "j" values into matrix */
2436     maskcount = 1;
2437     for ( j=0; j<nmask; j++ ) {
2438       a->j[jcount++]  = masked[j];
2439       mask[masked[j]] = maskcount++;
2440     }
2441     /* set "a" values into matrix */
2442     ishift = bs2*a->i[i];
2443     for ( j=0; j<bs; j++ ) {
2444       kmax = rowlengths[i*bs+j];
2445       for ( k=0; k<kmax; k++ ) {
2446         tmp    = jj[nzcountb]/bs ;
2447         block  = mask[tmp] - 1;
2448         point  = jj[nzcountb] - bs*tmp;
2449         idx    = ishift + bs2*block + j + bs*point;
2450         a->a[idx] = aa[nzcountb++];
2451       }
2452     }
2453     /* zero out the mask elements we set */
2454     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2455   }
2456   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix");
2457 
2458   PetscFree(rowlengths);
2459   PetscFree(browlengths);
2460   PetscFree(aa);
2461   PetscFree(jj);
2462   PetscFree(mask);
2463 
2464   B->assembled = PETSC_TRUE;
2465 
2466   ierr = MatView_Private(B); CHKERRQ(ierr);
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 
2471 
2472