xref: /petsc/src/mat/impls/baij/seq/baij.c (revision dd5f02e74e3cee25737ecfe33a063618e1ecd118)
1 #ifndef lint
2 static char vcid[] = "$Id: baij.c,v 1.76 1996/11/30 20:35:21 curfman Exp balay $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 #include "src/mat/impls/baij/seq/baij.h"
10 #include "src/vec/vecimpl.h"
11 #include "src/inline/spops.h"
12 #include "petsc.h"
13 
14 
15 /*
16      Adds diagonal pointers to sparse matrix structure.
17 */
18 
19 int MatMarkDiag_SeqBAIJ(Mat A)
20 {
21   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
22   int         i,j, *diag, m = a->mbs;
23 
24   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
25   PLogObjectMemory(A,(m+1)*sizeof(int));
26   for ( i=0; i<m; i++ ) {
27     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
28       if (a->j[j] == i) {
29         diag[i] = j;
30         break;
31       }
32     }
33   }
34   a->diag = diag;
35   return 0;
36 }
37 
38 #include "draw.h"
39 #include "pinclude/pviewer.h"
40 #include "sys.h"
41 
42 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
43 
44 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
45                             PetscTruth *done)
46 {
47   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
48   int         ierr, n = a->mbs,i;
49 
50   *nn = n;
51   if (!ia) return 0;
52   if (symmetric) {
53     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
54   } else if (oshift == 1) {
55     /* temporarily add 1 to i and j indices */
56     int nz = a->i[n] + 1;
57     for ( i=0; i<nz; i++ ) a->j[i]++;
58     for ( i=0; i<n+1; i++ ) a->i[i]++;
59     *ia = a->i; *ja = a->j;
60   } else {
61     *ia = a->i; *ja = a->j;
62   }
63 
64   return 0;
65 }
66 
67 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
68                                 PetscTruth *done)
69 {
70   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
71   int         i,n = a->mbs;
72 
73   if (!ia) return 0;
74   if (symmetric) {
75     PetscFree(*ia);
76     PetscFree(*ja);
77   } else if (oshift == 1) {
78     int nz = a->i[n];
79     for ( i=0; i<nz; i++ ) a->j[i]--;
80     for ( i=0; i<n+1; i++ ) a->i[i]--;
81   }
82   return 0;
83 }
84 
85 
86 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
87 {
88   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
89   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
90   Scalar      *aa;
91 
92   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
93   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
94   col_lens[0] = MAT_COOKIE;
95 
96   col_lens[1] = a->m;
97   col_lens[2] = a->n;
98   col_lens[3] = a->nz*bs2;
99 
100   /* store lengths of each row and write (including header) to file */
101   count = 0;
102   for ( i=0; i<a->mbs; i++ ) {
103     for ( j=0; j<bs; j++ ) {
104       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
105     }
106   }
107   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
108   PetscFree(col_lens);
109 
110   /* store column indices (zero start index) */
111   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
112   count = 0;
113   for ( i=0; i<a->mbs; i++ ) {
114     for ( j=0; j<bs; j++ ) {
115       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
116         for ( l=0; l<bs; l++ ) {
117           jj[count++] = bs*a->j[k] + l;
118         }
119       }
120     }
121   }
122   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
123   PetscFree(jj);
124 
125   /* store nonzero values */
126   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
127   count = 0;
128   for ( i=0; i<a->mbs; i++ ) {
129     for ( j=0; j<bs; j++ ) {
130       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
131         for ( l=0; l<bs; l++ ) {
132           aa[count++] = a->a[bs2*k + l*bs + j];
133         }
134       }
135     }
136   }
137   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
138   PetscFree(aa);
139   return 0;
140 }
141 
142 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
143 {
144   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
145   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
146   FILE        *fd;
147   char        *outputname;
148 
149   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
150   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
151   ierr = ViewerGetFormat(viewer,&format);
152   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
153     fprintf(fd,"  block size is %d\n",bs);
154   }
155   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
156     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
157   }
158   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
159     for ( i=0; i<a->mbs; i++ ) {
160       for ( j=0; j<bs; j++ ) {
161         fprintf(fd,"row %d:",i*bs+j);
162         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
163           for ( l=0; l<bs; l++ ) {
164 #if defined(PETSC_COMPLEX)
165           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
166             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
167               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
168           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
169             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
170 #else
171           if (a->a[bs2*k + l*bs + j] != 0.0)
172             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
173 #endif
174           }
175         }
176         fprintf(fd,"\n");
177       }
178     }
179   }
180   else {
181     for ( i=0; i<a->mbs; i++ ) {
182       for ( j=0; j<bs; j++ ) {
183         fprintf(fd,"row %d:",i*bs+j);
184         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
185           for ( l=0; l<bs; l++ ) {
186 #if defined(PETSC_COMPLEX)
187           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
188             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
189               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
190           }
191           else {
192             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
193           }
194 #else
195             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
196 #endif
197           }
198         }
199         fprintf(fd,"\n");
200       }
201     }
202   }
203   fflush(fd);
204   return 0;
205 }
206 
207 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
208 {
209   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
210   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
211   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
212   Scalar       *aa;
213   Draw         draw;
214   DrawButton   button;
215   PetscTruth   isnull;
216 
217   ViewerDrawGetDraw(viewer,&draw);
218   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
219 
220   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
221   xr += w;    yr += h;  xl = -w;     yl = -h;
222   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
223   /* loop over matrix elements drawing boxes */
224   color = DRAW_BLUE;
225   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
226     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
227       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
228       x_l = a->j[j]*bs; x_r = x_l + 1.0;
229       aa = a->a + j*bs2;
230       for ( k=0; k<bs; k++ ) {
231         for ( l=0; l<bs; l++ ) {
232           if (PetscReal(*aa++) >=  0.) continue;
233           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
234         }
235       }
236     }
237   }
238   color = DRAW_CYAN;
239   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
240     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
241       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
242       x_l = a->j[j]*bs; x_r = x_l + 1.0;
243       aa = a->a + j*bs2;
244       for ( k=0; k<bs; k++ ) {
245         for ( l=0; l<bs; l++ ) {
246           if (PetscReal(*aa++) != 0.) continue;
247           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
248         }
249       }
250     }
251   }
252 
253   color = DRAW_RED;
254   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
255     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
256       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
257       x_l = a->j[j]*bs; x_r = x_l + 1.0;
258       aa = a->a + j*bs2;
259       for ( k=0; k<bs; k++ ) {
260         for ( l=0; l<bs; l++ ) {
261           if (PetscReal(*aa++) <= 0.) continue;
262           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
263         }
264       }
265     }
266   }
267 
268   DrawFlush(draw);
269   DrawGetPause(draw,&pause);
270   if (pause >= 0) { PetscSleep(pause); return 0;}
271 
272   /* allow the matrix to zoom or shrink */
273   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
274   while (button != BUTTON_RIGHT) {
275     DrawClear(draw);
276     if (button == BUTTON_LEFT) scale = .5;
277     else if (button == BUTTON_CENTER) scale = 2.;
278     xl = scale*(xl + w - xc) + xc - w*scale;
279     xr = scale*(xr - w - xc) + xc + w*scale;
280     yl = scale*(yl + h - yc) + yc - h*scale;
281     yr = scale*(yr - h - yc) + yc + h*scale;
282     w *= scale; h *= scale;
283     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
284     color = DRAW_BLUE;
285     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
286       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
287         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
288         x_l = a->j[j]*bs; x_r = x_l + 1.0;
289         aa = a->a + j*bs2;
290         for ( k=0; k<bs; k++ ) {
291           for ( l=0; l<bs; l++ ) {
292             if (PetscReal(*aa++) >=  0.) continue;
293             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
294           }
295         }
296       }
297     }
298     color = DRAW_CYAN;
299     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
300       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
301         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
302         x_l = a->j[j]*bs; x_r = x_l + 1.0;
303         aa = a->a + j*bs2;
304         for ( k=0; k<bs; k++ ) {
305           for ( l=0; l<bs; l++ ) {
306           if (PetscReal(*aa++) != 0.) continue;
307           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
308           }
309         }
310       }
311     }
312 
313     color = DRAW_RED;
314     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
315       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
316         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
317         x_l = a->j[j]*bs; x_r = x_l + 1.0;
318         aa = a->a + j*bs2;
319         for ( k=0; k<bs; k++ ) {
320           for ( l=0; l<bs; l++ ) {
321             if (PetscReal(*aa++) <= 0.) continue;
322             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
323           }
324         }
325       }
326     }
327     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
328   }
329   return 0;
330 }
331 
332 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
333 {
334   Mat         A = (Mat) obj;
335   ViewerType  vtype;
336   int         ierr;
337 
338   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
339   if (vtype == MATLAB_VIEWER) {
340     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
341   }
342   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
343     return MatView_SeqBAIJ_ASCII(A,viewer);
344   }
345   else if (vtype == BINARY_FILE_VIEWER) {
346     return MatView_SeqBAIJ_Binary(A,viewer);
347   }
348   else if (vtype == DRAW_VIEWER) {
349     return MatView_SeqBAIJ_Draw(A,viewer);
350   }
351   return 0;
352 }
353 
354 #define CHUNKSIZE  10
355 
356 /* This version has row oriented v  */
357 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
358 {
359   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
360   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
361   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
362   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
363   int          ridx,cidx,bs2=a->bs2;
364   Scalar      *ap,value,*aa=a->a,*bap;
365 
366   for ( k=0; k<m; k++ ) { /* loop over added rows */
367     row  = im[k]; brow = row/bs;
368 #if defined(PETSC_BOPT_g)
369     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
370     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
371 #endif
372     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
373     rmax = imax[brow]; nrow = ailen[brow];
374     low = 0;
375     for ( l=0; l<n; l++ ) { /* loop over added columns */
376 #if defined(PETSC_BOPT_g)
377       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
378       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
379 #endif
380       col = in[l]; bcol = col/bs;
381       ridx = row % bs; cidx = col % bs;
382       if (roworiented) {
383         value = *v++;
384       }
385       else {
386         value = v[k + l*m];
387       }
388       if (!sorted) low = 0; high = nrow;
389       while (high-low > 5) {
390         t = (low+high)/2;
391         if (rp[t] > bcol) high = t;
392         else              low  = t;
393       }
394       for ( i=low; i<high; i++ ) {
395         if (rp[i] > bcol) break;
396         if (rp[i] == bcol) {
397           bap  = ap +  bs2*i + bs*cidx + ridx;
398           if (is == ADD_VALUES) *bap += value;
399           else                  *bap  = value;
400           goto noinsert;
401         }
402       }
403       if (nonew) goto noinsert;
404       if (nrow >= rmax) {
405         /* there is no extra room in row, therefore enlarge */
406         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
407         Scalar *new_a;
408 
409         /* malloc new storage space */
410         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
411         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
412         new_j   = (int *) (new_a + bs2*new_nz);
413         new_i   = new_j + new_nz;
414 
415         /* copy over old data into new slots */
416         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
417         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
418         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
419         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
420         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
421                                                            len*sizeof(int));
422         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
423         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
424         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
425                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
426         /* free up old matrix storage */
427         PetscFree(a->a);
428         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
429         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
430         a->singlemalloc = 1;
431 
432         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
433         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
434         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
435         a->maxnz += bs2*CHUNKSIZE;
436         a->reallocs++;
437         a->nz++;
438       }
439       N = nrow++ - 1;
440       /* shift up all the later entries in this row */
441       for ( ii=N; ii>=i; ii-- ) {
442         rp[ii+1] = rp[ii];
443         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
444       }
445       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
446       rp[i]                      = bcol;
447       ap[bs2*i + bs*cidx + ridx] = value;
448       noinsert:;
449       low = i;
450     }
451     ailen[brow] = nrow;
452   }
453   return 0;
454 }
455 
456 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
457 {
458   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
459   *m = a->m; *n = a->n;
460   return 0;
461 }
462 
463 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
464 {
465   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
466   *m = 0; *n = a->m;
467   return 0;
468 }
469 
470 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
471 {
472   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
473   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
474   Scalar      *aa,*v_i,*aa_i;
475 
476   bs  = a->bs;
477   ai  = a->i;
478   aj  = a->j;
479   aa  = a->a;
480   bs2 = a->bs2;
481 
482   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
483 
484   bn  = row/bs;   /* Block number */
485   bp  = row % bs; /* Block Position */
486   M   = ai[bn+1] - ai[bn];
487   *nz = bs*M;
488 
489   if (v) {
490     *v = 0;
491     if (*nz) {
492       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
493       for ( i=0; i<M; i++ ) { /* for each block in the block row */
494         v_i  = *v + i*bs;
495         aa_i = aa + bs2*(ai[bn] + i);
496         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
497       }
498     }
499   }
500 
501   if (idx) {
502     *idx = 0;
503     if (*nz) {
504       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
505       for ( i=0; i<M; i++ ) { /* for each block in the block row */
506         idx_i = *idx + i*bs;
507         itmp  = bs*aj[ai[bn] + i];
508         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
509       }
510     }
511   }
512   return 0;
513 }
514 
515 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
516 {
517   if (idx) {if (*idx) PetscFree(*idx);}
518   if (v)   {if (*v)   PetscFree(*v);}
519   return 0;
520 }
521 
522 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
523 {
524   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
525   Mat         C;
526   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
527   int         *rows,*cols,bs2=a->bs2;
528   Scalar      *array=a->a;
529 
530   if (B==PETSC_NULL && mbs!=nbs)
531     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
532   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
533   PetscMemzero(col,(1+nbs)*sizeof(int));
534 
535   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
536   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
537   PetscFree(col);
538   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
539   cols = rows + bs;
540   for ( i=0; i<mbs; i++ ) {
541     cols[0] = i*bs;
542     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
543     len = ai[i+1] - ai[i];
544     for ( j=0; j<len; j++ ) {
545       rows[0] = (*aj++)*bs;
546       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
547       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
548       array += bs2;
549     }
550   }
551   PetscFree(rows);
552 
553   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
554   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
555 
556   if (B != PETSC_NULL) {
557     *B = C;
558   } else {
559     /* This isn't really an in-place transpose */
560     PetscFree(a->a);
561     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
562     if (a->diag) PetscFree(a->diag);
563     if (a->ilen) PetscFree(a->ilen);
564     if (a->imax) PetscFree(a->imax);
565     if (a->solve_work) PetscFree(a->solve_work);
566     PetscFree(a);
567     PetscMemcpy(A,C,sizeof(struct _Mat));
568     PetscHeaderDestroy(C);
569   }
570   return 0;
571 }
572 
573 
574 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
575 {
576   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
577   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
578   int        m = a->m,*ip, N, *ailen = a->ilen;
579   int        mbs = a->mbs, bs2 = a->bs2;
580   Scalar     *aa = a->a, *ap;
581 
582   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
583 
584   for ( i=1; i<mbs; i++ ) {
585     /* move each row back by the amount of empty slots (fshift) before it*/
586     fshift += imax[i-1] - ailen[i-1];
587     if (fshift) {
588       ip = aj + ai[i]; ap = aa + bs2*ai[i];
589       N = ailen[i];
590       for ( j=0; j<N; j++ ) {
591         ip[j-fshift] = ip[j];
592         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
593       }
594     }
595     ai[i] = ai[i-1] + ailen[i-1];
596   }
597   if (mbs) {
598     fshift += imax[mbs-1] - ailen[mbs-1];
599     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
600   }
601   /* reset ilen and imax for each row */
602   for ( i=0; i<mbs; i++ ) {
603     ailen[i] = imax[i] = ai[i+1] - ai[i];
604   }
605   a->nz = ai[mbs];
606 
607   /* diagonals may have moved, so kill the diagonal pointers */
608   if (fshift && a->diag) {
609     PetscFree(a->diag);
610     PLogObjectMemory(A,-(m+1)*sizeof(int));
611     a->diag = 0;
612   }
613   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
614            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
615   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
616            a->reallocs);
617   a->reallocs          = 0;
618   A->info.nz_unneeded  = (double)fshift*bs2;
619 
620   return 0;
621 }
622 
623 static int MatZeroEntries_SeqBAIJ(Mat A)
624 {
625   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
626   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
627   return 0;
628 }
629 
630 int MatDestroy_SeqBAIJ(PetscObject obj)
631 {
632   Mat         A  = (Mat) obj;
633   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
634   int         ierr;
635 
636 #if defined(PETSC_LOG)
637   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
638 #endif
639   PetscFree(a->a);
640   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
641   if (a->diag) PetscFree(a->diag);
642   if (a->ilen) PetscFree(a->ilen);
643   if (a->imax) PetscFree(a->imax);
644   if (a->solve_work) PetscFree(a->solve_work);
645   if (a->mult_work) PetscFree(a->mult_work);
646   PetscFree(a);
647   if (A->mapping) {
648     ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr);
649   }
650   PLogObjectDestroy(A);
651   PetscHeaderDestroy(A);
652   return 0;
653 }
654 
655 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
656 {
657   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
658   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
659   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
660   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
661   else if (op == MAT_COLUMNS_UNSORTED)          a->sorted      = 0;
662   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
663   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
664   else if (op == MAT_ROWS_SORTED ||
665            op == MAT_ROWS_UNSORTED ||
666            op == MAT_SYMMETRIC ||
667            op == MAT_STRUCTURALLY_SYMMETRIC ||
668            op == MAT_YES_NEW_DIAGONALS ||
669            op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES)
670     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
671   else if (op == MAT_NO_NEW_DIAGONALS)
672     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");}
673   else
674     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
675   return 0;
676 }
677 
678 
679 /* -------------------------------------------------------*/
680 /* Should check that shapes of vectors and matrices match */
681 /* -------------------------------------------------------*/
682 #include "pinclude/plapack.h"
683 
684 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
685 {
686   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
687   register Scalar *x,*z,*v,sum;
688   int             mbs=a->mbs,i,*idx,*ii,n;
689 
690   VecGetArray_Fast(xx,x);
691   VecGetArray_Fast(zz,z);
692 
693   idx   = a->j;
694   v     = a->a;
695   ii    = a->i;
696 
697   for ( i=0; i<mbs; i++ ) {
698     n    = ii[1] - ii[0]; ii++;
699     sum  = 0.0;
700     while (n--) sum += *v++ * x[*idx++];
701     z[i] = sum;
702   }
703   VecRestoreArray_Fast(xx,x);
704   VecRestoreArray_Fast(zz,z);
705   PLogFlops(2*a->nz - a->m);
706   return 0;
707 }
708 
709 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
710 {
711   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
712   register Scalar *x,*z,*v,*xb,sum1,sum2;
713   register Scalar x1,x2;
714   int             mbs=a->mbs,i,*idx,*ii;
715   int             j,n;
716 
717   VecGetArray_Fast(xx,x);
718   VecGetArray_Fast(zz,z);
719 
720   idx   = a->j;
721   v     = a->a;
722   ii    = a->i;
723 
724   for ( i=0; i<mbs; i++ ) {
725     n  = ii[1] - ii[0]; ii++;
726     sum1 = 0.0; sum2 = 0.0;
727     for ( j=0; j<n; j++ ) {
728       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
729       sum1 += v[0]*x1 + v[2]*x2;
730       sum2 += v[1]*x1 + v[3]*x2;
731       v += 4;
732     }
733     z[0] = sum1; z[1] = sum2;
734     z += 2;
735   }
736   VecRestoreArray_Fast(xx,x);
737   VecRestoreArray_Fast(zz,z);
738   PLogFlops(4*a->nz - a->m);
739   return 0;
740 }
741 
742 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
743 {
744   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
745   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
746   int             mbs=a->mbs,i,*idx,*ii,j,n;
747 
748   VecGetArray_Fast(xx,x);
749   VecGetArray_Fast(zz,z);
750 
751   idx   = a->j;
752   v     = a->a;
753   ii    = a->i;
754 
755   for ( i=0; i<mbs; i++ ) {
756     n  = ii[1] - ii[0]; ii++;
757     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
758     for ( j=0; j<n; j++ ) {
759       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
760       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
761       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
762       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
763       v += 9;
764     }
765     z[0] = sum1; z[1] = sum2; z[2] = sum3;
766     z += 3;
767   }
768   VecRestoreArray_Fast(xx,x);
769   VecRestoreArray_Fast(zz,z);
770   PLogFlops(18*a->nz - a->m);
771   return 0;
772 }
773 
774 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
775 {
776   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
777   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
778   register Scalar x1,x2,x3,x4;
779   int             mbs=a->mbs,i,*idx,*ii;
780   int             j,n;
781 
782   VecGetArray_Fast(xx,x);
783   VecGetArray_Fast(zz,z);
784 
785   idx   = a->j;
786   v     = a->a;
787   ii    = a->i;
788 
789   for ( i=0; i<mbs; i++ ) {
790     n  = ii[1] - ii[0]; ii++;
791     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
792     for ( j=0; j<n; j++ ) {
793       xb = x + 4*(*idx++);
794       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
795       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
796       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
797       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
798       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
799       v += 16;
800     }
801     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
802     z += 4;
803   }
804   VecRestoreArray_Fast(xx,x);
805   VecRestoreArray_Fast(zz,z);
806   PLogFlops(32*a->nz - a->m);
807   return 0;
808 }
809 
810 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
811 {
812   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
813   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
814   register Scalar x1,x2,x3,x4,x5;
815   int             mbs=a->mbs,i,*idx,*ii,j,n;
816 
817   VecGetArray_Fast(xx,x);
818   VecGetArray_Fast(zz,z);
819 
820   idx   = a->j;
821   v     = a->a;
822   ii    = a->i;
823 
824   for ( i=0; i<mbs; i++ ) {
825     n  = ii[1] - ii[0]; ii++;
826     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
827     for ( j=0; j<n; j++ ) {
828       xb = x + 5*(*idx++);
829       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
830       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
831       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
832       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
833       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
834       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
835       v += 25;
836     }
837     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
838     z += 5;
839   }
840   VecRestoreArray_Fast(xx,x);
841   VecRestoreArray_Fast(zz,z);
842   PLogFlops(50*a->nz - a->m);
843   return 0;
844 }
845 
846 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
847 {
848   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
849   register Scalar *x,*z,*v,*xb;
850   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
851   int             _One = 1,ncols,k;
852   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
853 
854   VecGetArray_Fast(xx,x);
855   VecGetArray_Fast(zz,z);
856 
857   idx   = a->j;
858   v     = a->a;
859   ii    = a->i;
860 
861 
862   if (!a->mult_work) {
863     k = PetscMax(a->m,a->n);
864     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
865   }
866   work = a->mult_work;
867   for ( i=0; i<mbs; i++ ) {
868     n     = ii[1] - ii[0]; ii++;
869     ncols = n*bs;
870     workt = work;
871     for ( j=0; j<n; j++ ) {
872       xb = x + bs*(*idx++);
873       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
874       workt += bs;
875     }
876     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
877     v += n*bs2;
878     z += bs;
879   }
880   VecRestoreArray_Fast(xx,x);
881   VecRestoreArray_Fast(zz,z);
882   PLogFlops(2*a->nz*bs2 - a->m);
883   return 0;
884 }
885 
886 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
887 {
888   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
889   register Scalar *x,*y,*z,*v,sum;
890   int             mbs=a->mbs,i,*idx,*ii,n;
891 
892   VecGetArray_Fast(xx,x);
893   VecGetArray_Fast(yy,y);
894   VecGetArray_Fast(zz,z);
895 
896   idx   = a->j;
897   v     = a->a;
898   ii    = a->i;
899 
900   for ( i=0; i<mbs; i++ ) {
901     n    = ii[1] - ii[0]; ii++;
902     sum  = y[i];
903     while (n--) sum += *v++ * x[*idx++];
904     z[i] = sum;
905   }
906   VecRestoreArray_Fast(xx,x);
907   VecRestoreArray_Fast(yy,y);
908   VecRestoreArray_Fast(zz,z);
909   PLogFlops(2*a->nz);
910   return 0;
911 }
912 
913 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
914 {
915   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
916   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
917   register Scalar x1,x2;
918   int             mbs=a->mbs,i,*idx,*ii;
919   int             j,n;
920 
921   VecGetArray_Fast(xx,x);
922   VecGetArray_Fast(yy,y);
923   VecGetArray_Fast(zz,z);
924 
925   idx   = a->j;
926   v     = a->a;
927   ii    = a->i;
928 
929   for ( i=0; i<mbs; i++ ) {
930     n  = ii[1] - ii[0]; ii++;
931     sum1 = y[0]; sum2 = y[1];
932     for ( j=0; j<n; j++ ) {
933       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
934       sum1 += v[0]*x1 + v[2]*x2;
935       sum2 += v[1]*x1 + v[3]*x2;
936       v += 4;
937     }
938     z[0] = sum1; z[1] = sum2;
939     z += 2; y += 2;
940   }
941   VecRestoreArray_Fast(xx,x);
942   VecRestoreArray_Fast(yy,y);
943   VecRestoreArray_Fast(zz,z);
944   PLogFlops(4*a->nz);
945   return 0;
946 }
947 
948 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
949 {
950   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
951   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
952   int             mbs=a->mbs,i,*idx,*ii,j,n;
953 
954   VecGetArray_Fast(xx,x);
955   VecGetArray_Fast(yy,y);
956   VecGetArray_Fast(zz,z);
957 
958   idx   = a->j;
959   v     = a->a;
960   ii    = a->i;
961 
962   for ( i=0; i<mbs; i++ ) {
963     n  = ii[1] - ii[0]; ii++;
964     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
965     for ( j=0; j<n; j++ ) {
966       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
967       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
968       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
969       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
970       v += 9;
971     }
972     z[0] = sum1; z[1] = sum2; z[2] = sum3;
973     z += 3; y += 3;
974   }
975   VecRestoreArray_Fast(xx,x);
976   VecRestoreArray_Fast(yy,y);
977   VecRestoreArray_Fast(zz,z);
978   PLogFlops(18*a->nz);
979   return 0;
980 }
981 
982 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
983 {
984   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
985   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
986   register Scalar x1,x2,x3,x4;
987   int             mbs=a->mbs,i,*idx,*ii;
988   int             j,n;
989 
990   VecGetArray_Fast(xx,x);
991   VecGetArray_Fast(yy,y);
992   VecGetArray_Fast(zz,z);
993 
994   idx   = a->j;
995   v     = a->a;
996   ii    = a->i;
997 
998   for ( i=0; i<mbs; i++ ) {
999     n  = ii[1] - ii[0]; ii++;
1000     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1001     for ( j=0; j<n; j++ ) {
1002       xb = x + 4*(*idx++);
1003       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1004       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1005       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1006       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1007       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1008       v += 16;
1009     }
1010     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1011     z += 4; y += 4;
1012   }
1013   VecRestoreArray_Fast(xx,x);
1014   VecRestoreArray_Fast(yy,y);
1015   VecRestoreArray_Fast(zz,z);
1016   PLogFlops(32*a->nz);
1017   return 0;
1018 }
1019 
1020 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1021 {
1022   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1023   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1024   register Scalar x1,x2,x3,x4,x5;
1025   int             mbs=a->mbs,i,*idx,*ii,j,n;
1026 
1027   VecGetArray_Fast(xx,x);
1028   VecGetArray_Fast(yy,y);
1029   VecGetArray_Fast(zz,z);
1030 
1031   idx   = a->j;
1032   v     = a->a;
1033   ii    = a->i;
1034 
1035   for ( i=0; i<mbs; i++ ) {
1036     n  = ii[1] - ii[0]; ii++;
1037     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1038     for ( j=0; j<n; j++ ) {
1039       xb = x + 5*(*idx++);
1040       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1041       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1042       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1043       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1044       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1045       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1046       v += 25;
1047     }
1048     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1049     z += 5; y += 5;
1050   }
1051   VecRestoreArray_Fast(xx,x);
1052   VecRestoreArray_Fast(yy,y);
1053   VecRestoreArray_Fast(zz,z);
1054   PLogFlops(50*a->nz);
1055   return 0;
1056 }
1057 
1058 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1059 {
1060   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1061   register Scalar *x,*z,*v,*xb;
1062   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1063   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1064 
1065   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1066 
1067   VecGetArray_Fast(xx,x);
1068   VecGetArray_Fast(zz,z);
1069 
1070   idx   = a->j;
1071   v     = a->a;
1072   ii    = a->i;
1073 
1074 
1075   if (!a->mult_work) {
1076     k = PetscMax(a->m,a->n);
1077     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1078   }
1079   work = a->mult_work;
1080   for ( i=0; i<mbs; i++ ) {
1081     n     = ii[1] - ii[0]; ii++;
1082     ncols = n*bs;
1083     workt = work;
1084     for ( j=0; j<n; j++ ) {
1085       xb = x + bs*(*idx++);
1086       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1087       workt += bs;
1088     }
1089     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1090     v += n*bs2;
1091     z += bs;
1092   }
1093   VecRestoreArray_Fast(xx,x);
1094   VecRestoreArray_Fast(zz,z);
1095   PLogFlops(2*a->nz*bs2 );
1096   return 0;
1097 }
1098 
1099 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1100 {
1101   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1102   Scalar          *xg,*zg,*zb;
1103   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1104   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1105   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1106 
1107 
1108   VecGetArray_Fast(xx,xg); x = xg;
1109   VecGetArray_Fast(zz,zg); z = zg;
1110   PetscMemzero(z,N*sizeof(Scalar));
1111 
1112   idx   = a->j;
1113   v     = a->a;
1114   ii    = a->i;
1115 
1116   switch (bs) {
1117   case 1:
1118     for ( i=0; i<mbs; i++ ) {
1119       n  = ii[1] - ii[0]; ii++;
1120       xb = x + i; x1 = xb[0];
1121       ib = idx + ai[i];
1122       for ( j=0; j<n; j++ ) {
1123         rval    = ib[j];
1124         z[rval] += *v++ * x1;
1125       }
1126     }
1127     break;
1128   case 2:
1129     for ( i=0; i<mbs; i++ ) {
1130       n  = ii[1] - ii[0]; ii++;
1131       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1132       ib = idx + ai[i];
1133       for ( j=0; j<n; j++ ) {
1134         rval      = ib[j]*2;
1135         z[rval++] += v[0]*x1 + v[1]*x2;
1136         z[rval++] += v[2]*x1 + v[3]*x2;
1137         v += 4;
1138       }
1139     }
1140     break;
1141   case 3:
1142     for ( i=0; i<mbs; i++ ) {
1143       n  = ii[1] - ii[0]; ii++;
1144       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1145       ib = idx + ai[i];
1146       for ( j=0; j<n; j++ ) {
1147         rval      = ib[j]*3;
1148         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1149         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1150         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1151         v += 9;
1152       }
1153     }
1154     break;
1155   case 4:
1156     for ( i=0; i<mbs; i++ ) {
1157       n  = ii[1] - ii[0]; ii++;
1158       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1159       ib = idx + ai[i];
1160       for ( j=0; j<n; j++ ) {
1161         rval      = ib[j]*4;
1162         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1163         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1164         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1165         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1166         v += 16;
1167       }
1168     }
1169     break;
1170   case 5:
1171     for ( i=0; i<mbs; i++ ) {
1172       n  = ii[1] - ii[0]; ii++;
1173       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1174       x4 = xb[3];   x5 = xb[4];
1175       ib = idx + ai[i];
1176       for ( j=0; j<n; j++ ) {
1177         rval      = ib[j]*5;
1178         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1179         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1180         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1181         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1182         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1183         v += 25;
1184       }
1185     }
1186     break;
1187       /* block sizes larger then 5 by 5 are handled by BLAS */
1188     default: {
1189       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1190       if (!a->mult_work) {
1191         k = PetscMax(a->m,a->n);
1192         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1193         CHKPTRQ(a->mult_work);
1194       }
1195       work = a->mult_work;
1196       for ( i=0; i<mbs; i++ ) {
1197         n     = ii[1] - ii[0]; ii++;
1198         ncols = n*bs;
1199         PetscMemzero(work,ncols*sizeof(Scalar));
1200         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1201         v += n*bs2;
1202         x += bs;
1203         workt = work;
1204         for ( j=0; j<n; j++ ) {
1205           zb = z + bs*(*idx++);
1206           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1207           workt += bs;
1208         }
1209       }
1210     }
1211   }
1212   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1213   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1214   PLogFlops(2*a->nz*a->bs2 - a->n);
1215   return 0;
1216 }
1217 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1218 {
1219   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1220   Scalar          *xg,*zg,*zb;
1221   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1222   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1223   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1224 
1225 
1226 
1227   VecGetArray_Fast(xx,xg); x = xg;
1228   VecGetArray_Fast(zz,zg); z = zg;
1229 
1230   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1231   else PetscMemzero(z,N*sizeof(Scalar));
1232 
1233 
1234   idx   = a->j;
1235   v     = a->a;
1236   ii    = a->i;
1237 
1238   switch (bs) {
1239   case 1:
1240     for ( i=0; i<mbs; i++ ) {
1241       n  = ii[1] - ii[0]; ii++;
1242       xb = x + i; x1 = xb[0];
1243       ib = idx + ai[i];
1244       for ( j=0; j<n; j++ ) {
1245         rval    = ib[j];
1246         z[rval] += *v++ * x1;
1247       }
1248     }
1249     break;
1250   case 2:
1251     for ( i=0; i<mbs; i++ ) {
1252       n  = ii[1] - ii[0]; ii++;
1253       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1254       ib = idx + ai[i];
1255       for ( j=0; j<n; j++ ) {
1256         rval      = ib[j]*2;
1257         z[rval++] += v[0]*x1 + v[1]*x2;
1258         z[rval++] += v[2]*x1 + v[3]*x2;
1259         v += 4;
1260       }
1261     }
1262     break;
1263   case 3:
1264     for ( i=0; i<mbs; i++ ) {
1265       n  = ii[1] - ii[0]; ii++;
1266       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1267       ib = idx + ai[i];
1268       for ( j=0; j<n; j++ ) {
1269         rval      = ib[j]*3;
1270         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1271         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1272         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1273         v += 9;
1274       }
1275     }
1276     break;
1277   case 4:
1278     for ( i=0; i<mbs; i++ ) {
1279       n  = ii[1] - ii[0]; ii++;
1280       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1281       ib = idx + ai[i];
1282       for ( j=0; j<n; j++ ) {
1283         rval      = ib[j]*4;
1284         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1285         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1286         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1287         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1288         v += 16;
1289       }
1290     }
1291     break;
1292   case 5:
1293     for ( i=0; i<mbs; i++ ) {
1294       n  = ii[1] - ii[0]; ii++;
1295       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1296       x4 = xb[3];   x5 = xb[4];
1297       ib = idx + ai[i];
1298       for ( j=0; j<n; j++ ) {
1299         rval      = ib[j]*5;
1300         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1301         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1302         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1303         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1304         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1305         v += 25;
1306       }
1307     }
1308     break;
1309       /* block sizes larger then 5 by 5 are handled by BLAS */
1310     default: {
1311       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1312       if (!a->mult_work) {
1313         k = PetscMax(a->m,a->n);
1314         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1315         CHKPTRQ(a->mult_work);
1316       }
1317       work = a->mult_work;
1318       for ( i=0; i<mbs; i++ ) {
1319         n     = ii[1] - ii[0]; ii++;
1320         ncols = n*bs;
1321         PetscMemzero(work,ncols*sizeof(Scalar));
1322         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1323         v += n*bs2;
1324         x += bs;
1325         workt = work;
1326         for ( j=0; j<n; j++ ) {
1327           zb = z + bs*(*idx++);
1328           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1329           workt += bs;
1330         }
1331       }
1332     }
1333   }
1334   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1335   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1336   PLogFlops(2*a->nz*a->bs2);
1337   return 0;
1338 }
1339 
1340 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1341 {
1342   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1343 
1344   info->rows_global    = (double)a->m;
1345   info->columns_global = (double)a->n;
1346   info->rows_local     = (double)a->m;
1347   info->columns_local  = (double)a->n;
1348   info->block_size     = a->bs2;
1349   info->nz_allocated   = a->maxnz;
1350   info->nz_used        = a->bs2*a->nz;
1351   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1352   /*  if (info->nz_unneeded != A->info.nz_unneeded)
1353     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
1354   info->assemblies   = A->num_ass;
1355   info->mallocs      = a->reallocs;
1356   info->memory       = A->mem;
1357   if (A->factor) {
1358     info->fill_ratio_given  = A->info.fill_ratio_given;
1359     info->fill_ratio_needed = A->info.fill_ratio_needed;
1360     info->factor_mallocs    = A->info.factor_mallocs;
1361   } else {
1362     info->fill_ratio_given  = 0;
1363     info->fill_ratio_needed = 0;
1364     info->factor_mallocs    = 0;
1365   }
1366   return 0;
1367 }
1368 
1369 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
1370 {
1371   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1372 
1373   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
1374 
1375   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1376   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1377       (a->nz != b->nz)) {
1378     *flg = PETSC_FALSE; return 0;
1379   }
1380 
1381   /* if the a->i are the same */
1382   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
1383     *flg = PETSC_FALSE; return 0;
1384   }
1385 
1386   /* if a->j are the same */
1387   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
1388     *flg = PETSC_FALSE; return 0;
1389   }
1390 
1391   /* if a->a are the same */
1392   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
1393     *flg = PETSC_FALSE; return 0;
1394   }
1395   *flg = PETSC_TRUE;
1396   return 0;
1397 
1398 }
1399 
1400 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1401 {
1402   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1403   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1404   Scalar      *x, zero = 0.0,*aa,*aa_j;
1405 
1406   bs   = a->bs;
1407   aa   = a->a;
1408   ai   = a->i;
1409   aj   = a->j;
1410   ambs = a->mbs;
1411   bs2  = a->bs2;
1412 
1413   VecSet(&zero,v);
1414   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
1415   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
1416   for ( i=0; i<ambs; i++ ) {
1417     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1418       if (aj[j] == i) {
1419         row  = i*bs;
1420         aa_j = aa+j*bs2;
1421         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1422         break;
1423       }
1424     }
1425   }
1426   return 0;
1427 }
1428 
1429 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1430 {
1431   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1432   Scalar      *l,*r,x,*v,*aa,*li,*ri;
1433   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1434 
1435   ai  = a->i;
1436   aj  = a->j;
1437   aa  = a->a;
1438   m   = a->m;
1439   n   = a->n;
1440   bs  = a->bs;
1441   mbs = a->mbs;
1442   bs2 = a->bs2;
1443   if (ll) {
1444     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
1445     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
1446     for ( i=0; i<mbs; i++ ) { /* for each block row */
1447       M  = ai[i+1] - ai[i];
1448       li = l + i*bs;
1449       v  = aa + bs2*ai[i];
1450       for ( j=0; j<M; j++ ) { /* for each block */
1451         for ( k=0; k<bs2; k++ ) {
1452           (*v++) *= li[k%bs];
1453         }
1454       }
1455     }
1456   }
1457 
1458   if (rr) {
1459     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
1460     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
1461     for ( i=0; i<mbs; i++ ) { /* for each block row */
1462       M  = ai[i+1] - ai[i];
1463       v  = aa + bs2*ai[i];
1464       for ( j=0; j<M; j++ ) { /* for each block */
1465         ri = r + bs*aj[ai[i]+j];
1466         for ( k=0; k<bs; k++ ) {
1467           x = ri[k];
1468           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
1469         }
1470       }
1471     }
1472   }
1473   return 0;
1474 }
1475 
1476 
1477 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1478 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1479 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1480 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1481 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
1482 
1483 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1484 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1485 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1486 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1487 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1488 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1489 
1490 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1491 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1492 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1493 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1494 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1495 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1496 
1497 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
1498 {
1499   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1500   Scalar      *v = a->a;
1501   double      sum = 0.0;
1502   int         i,nz=a->nz,bs2=a->bs2;
1503 
1504   if (type == NORM_FROBENIUS) {
1505     for (i=0; i< bs2*nz; i++ ) {
1506 #if defined(PETSC_COMPLEX)
1507       sum += real(conj(*v)*(*v)); v++;
1508 #else
1509       sum += (*v)*(*v); v++;
1510 #endif
1511     }
1512     *norm = sqrt(sum);
1513   }
1514   else {
1515     SETERRQ(PETSC_ERR_SUP,"MatNorm_SeqBAIJ:No support for this norm yet");
1516   }
1517   return 0;
1518 }
1519 
1520 /*
1521      note: This can only work for identity for row and col. It would
1522    be good to check this and otherwise generate an error.
1523 */
1524 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1525 {
1526   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1527   Mat         outA;
1528   int         ierr;
1529 
1530   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
1531 
1532   outA          = inA;
1533   inA->factor   = FACTOR_LU;
1534   a->row        = row;
1535   a->col        = col;
1536 
1537   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1538 
1539   if (!a->diag) {
1540     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1541   }
1542   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1543   return 0;
1544 }
1545 
1546 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1547 {
1548   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1549   int         one = 1, totalnz = a->bs2*a->nz;
1550   BLscal_( &totalnz, alpha, a->a, &one );
1551   PLogFlops(totalnz);
1552   return 0;
1553 }
1554 
1555 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1556 {
1557   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1558   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1559   int        *ai = a->i, *ailen = a->ilen;
1560   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1561   Scalar     *ap, *aa = a->a, zero = 0.0;
1562 
1563   for ( k=0; k<m; k++ ) { /* loop over rows */
1564     row  = im[k]; brow = row/bs;
1565     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
1566     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1567     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1568     nrow = ailen[brow];
1569     for ( l=0; l<n; l++ ) { /* loop over columns */
1570       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
1571       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1572       col  = in[l] ;
1573       bcol = col/bs;
1574       cidx = col%bs;
1575       ridx = row%bs;
1576       high = nrow;
1577       low  = 0; /* assume unsorted */
1578       while (high-low > 5) {
1579         t = (low+high)/2;
1580         if (rp[t] > bcol) high = t;
1581         else             low  = t;
1582       }
1583       for ( i=low; i<high; i++ ) {
1584         if (rp[i] > bcol) break;
1585         if (rp[i] == bcol) {
1586           *v++ = ap[bs2*i+bs*cidx+ridx];
1587           goto finished;
1588         }
1589       }
1590       *v++ = zero;
1591       finished:;
1592     }
1593   }
1594   return 0;
1595 }
1596 
1597 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
1598 {
1599   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
1600   *bs = baij->bs;
1601   return 0;
1602 }
1603 
1604 /* idx should be of length atlease bs */
1605 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1606 {
1607   int i,row;
1608   row = idx[0];
1609   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1610 
1611   for ( i=1; i<bs; i++ ) {
1612     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1613   }
1614   *flg = PETSC_TRUE;
1615   return 0;
1616 }
1617 
1618 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1619 {
1620   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1621   IS          is_local;
1622   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1623   PetscTruth  flg;
1624   Scalar      zero = 0.0,*aa;
1625 
1626   /* Make a copy of the IS and  sort it */
1627   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1628   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1629   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1630   ierr = ISSort(is_local); CHKERRQ(ierr);
1631   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1632 
1633   i = 0;
1634   while (i < is_n) {
1635     if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range");
1636     flg = PETSC_FALSE;
1637     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1638     if (flg) { /* There exists a block of rows to be Zerowed */
1639       baij->ilen[rows[i]/bs] = 0;
1640       i += bs;
1641     } else { /* Zero out only the requested row */
1642       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1643       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1644       for ( j=0; j<count; j++ ) {
1645         aa[0] = zero;
1646         aa+=bs;
1647       }
1648       i++;
1649     }
1650   }
1651   if (diag) {
1652     for ( j=0; j<is_n; j++ ) {
1653       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1654     }
1655   }
1656   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1657   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1658   ierr = ISDestroy(is_local); CHKERRQ(ierr);
1659   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1660 
1661   return 0;
1662 }
1663 int MatPrintHelp_SeqBAIJ(Mat A)
1664 {
1665   static int called = 0;
1666   MPI_Comm   comm = A->comm;
1667 
1668   if (called) return 0; else called = 1;
1669   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1670   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1671   return 0;
1672 }
1673 
1674 /* -------------------------------------------------------------------*/
1675 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1676        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1677        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1678        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
1679        MatSolve_SeqBAIJ_N,0,
1680        0,0,
1681        MatLUFactor_SeqBAIJ,0,
1682        0,
1683        MatTranspose_SeqBAIJ,
1684        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
1685        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1686        0,MatAssemblyEnd_SeqBAIJ,
1687        0,
1688        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
1689        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1690        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1691        MatILUFactorSymbolic_SeqBAIJ,0,
1692        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1693        MatConvertSameType_SeqBAIJ,0,0,
1694        MatILUFactor_SeqBAIJ,0,0,
1695        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
1696        MatGetValues_SeqBAIJ,0,
1697        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
1698        0,0,0,MatGetBlockSize_SeqBAIJ,
1699        MatGetRowIJ_SeqBAIJ,
1700        MatRestoreRowIJ_SeqBAIJ};
1701 
1702 /*@C
1703    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1704    compressed row) format.  For good matrix assembly performance the
1705    user should preallocate the matrix storage by setting the parameter nz
1706    (or the array nzz).  By setting these parameters accurately, performance
1707    during matrix assembly can be increased by more than a factor of 50.
1708 
1709    Input Parameters:
1710 .  comm - MPI communicator, set to MPI_COMM_SELF
1711 .  bs - size of block
1712 .  m - number of rows
1713 .  n - number of columns
1714 .  nz - number of block nonzeros per block row (same for all rows)
1715 .  nzz - array containing the number of block nonzeros in the various block rows
1716          (possibly different for each block row) or PETSC_NULL
1717 
1718    Output Parameter:
1719 .  A - the matrix
1720 
1721    Notes:
1722    The block AIJ format is fully compatible with standard Fortran 77
1723    storage.  That is, the stored row and column indices can begin at
1724    either one (as in Fortran) or zero.  See the users' manual for details.
1725 
1726    Specify the preallocated storage with either nz or nnz (not both).
1727    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1728    allocation.  For additional details, see the users manual chapter on
1729    matrices.
1730 
1731 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1732 @*/
1733 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1734 {
1735   Mat         B;
1736   Mat_SeqBAIJ *b;
1737   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
1738 
1739   MPI_Comm_size(comm,&size);
1740   if (size > 1) SETERRQ(1,"MatCreateSeqBAIJ:Comm must be of size 1");
1741 
1742   if (mbs*bs!=m || nbs*bs!=n)
1743     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
1744 
1745   *A = 0;
1746   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
1747   PLogObjectCreate(B);
1748   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1749   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1750   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1751   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1752   if (!flg) {
1753     switch (bs) {
1754       case 1:
1755         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1756         B->ops.solve           = MatSolve_SeqBAIJ_1;
1757         B->ops.mult            = MatMult_SeqBAIJ_1;
1758         B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
1759        break;
1760       case 2:
1761         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1762         B->ops.solve           = MatSolve_SeqBAIJ_2;
1763         B->ops.mult            = MatMult_SeqBAIJ_2;
1764         B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
1765         break;
1766       case 3:
1767         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1768         B->ops.solve           = MatSolve_SeqBAIJ_3;
1769         B->ops.mult            = MatMult_SeqBAIJ_3;
1770         B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
1771         break;
1772       case 4:
1773         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1774         B->ops.solve           = MatSolve_SeqBAIJ_4;
1775         B->ops.mult            = MatMult_SeqBAIJ_4;
1776         B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
1777         break;
1778       case 5:
1779         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1780         B->ops.solve           = MatSolve_SeqBAIJ_5;
1781         B->ops.mult            = MatMult_SeqBAIJ_5;
1782         B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
1783         break;
1784     }
1785   }
1786   B->destroy          = MatDestroy_SeqBAIJ;
1787   B->view             = MatView_SeqBAIJ;
1788   B->factor           = 0;
1789   B->lupivotthreshold = 1.0;
1790   B->mapping          = 0;
1791   b->row              = 0;
1792   b->col              = 0;
1793   b->reallocs         = 0;
1794 
1795   b->m       = m; B->m = m; B->M = m;
1796   b->n       = n; B->n = n; B->N = n;
1797   b->mbs     = mbs;
1798   b->nbs     = nbs;
1799   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1800   if (nnz == PETSC_NULL) {
1801     if (nz == PETSC_DEFAULT) nz = 5;
1802     else if (nz <= 0)        nz = 1;
1803     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1804     nz = nz*mbs;
1805   }
1806   else {
1807     nz = 0;
1808     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1809   }
1810 
1811   /* allocate the matrix space */
1812   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1813   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1814   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1815   b->j  = (int *) (b->a + nz*bs2);
1816   PetscMemzero(b->j,nz*sizeof(int));
1817   b->i  = b->j + nz;
1818   b->singlemalloc = 1;
1819 
1820   b->i[0] = 0;
1821   for (i=1; i<mbs+1; i++) {
1822     b->i[i] = b->i[i-1] + b->imax[i-1];
1823   }
1824 
1825   /* b->ilen will count nonzeros in each block row so far. */
1826   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1827   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1828   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1829 
1830   b->bs               = bs;
1831   b->bs2              = bs2;
1832   b->mbs              = mbs;
1833   b->nz               = 0;
1834   b->maxnz            = nz*bs2;
1835   b->sorted           = 0;
1836   b->roworiented      = 1;
1837   b->nonew            = 0;
1838   b->diag             = 0;
1839   b->solve_work       = 0;
1840   b->mult_work        = 0;
1841   b->spptr            = 0;
1842   B->info.nz_unneeded = (double)b->maxnz;
1843 
1844   *A = B;
1845   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1846   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1847   return 0;
1848 }
1849 
1850 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1851 {
1852   Mat         C;
1853   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1854   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1855 
1856   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
1857 
1858   *B = 0;
1859   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1860   PLogObjectCreate(C);
1861   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1862   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1863   C->destroy    = MatDestroy_SeqBAIJ;
1864   C->view       = MatView_SeqBAIJ;
1865   C->factor     = A->factor;
1866   c->row        = 0;
1867   c->col        = 0;
1868   C->assembled  = PETSC_TRUE;
1869 
1870   c->m = C->m   = a->m;
1871   c->n = C->n   = a->n;
1872   C->M          = a->m;
1873   C->N          = a->n;
1874 
1875   c->bs         = a->bs;
1876   c->bs2        = a->bs2;
1877   c->mbs        = a->mbs;
1878   c->nbs        = a->nbs;
1879 
1880   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1881   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1882   for ( i=0; i<mbs; i++ ) {
1883     c->imax[i] = a->imax[i];
1884     c->ilen[i] = a->ilen[i];
1885   }
1886 
1887   /* allocate the matrix space */
1888   c->singlemalloc = 1;
1889   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1890   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1891   c->j  = (int *) (c->a + nz*bs2);
1892   c->i  = c->j + nz;
1893   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1894   if (mbs > 0) {
1895     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1896     if (cpvalues == COPY_VALUES) {
1897       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1898     }
1899   }
1900 
1901   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1902   c->sorted      = a->sorted;
1903   c->roworiented = a->roworiented;
1904   c->nonew       = a->nonew;
1905 
1906   if (a->diag) {
1907     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1908     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1909     for ( i=0; i<mbs; i++ ) {
1910       c->diag[i] = a->diag[i];
1911     }
1912   }
1913   else c->diag          = 0;
1914   c->nz                 = a->nz;
1915   c->maxnz              = a->maxnz;
1916   c->solve_work         = 0;
1917   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1918   c->mult_work          = 0;
1919   *B = C;
1920   return 0;
1921 }
1922 
1923 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1924 {
1925   Mat_SeqBAIJ  *a;
1926   Mat          B;
1927   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1928   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1929   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1930   int          *masked, nmask,tmp,bs2,ishift;
1931   Scalar       *aa;
1932   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1933 
1934   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1935   bs2  = bs*bs;
1936 
1937   MPI_Comm_size(comm,&size);
1938   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
1939   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1940   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1941   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
1942   M = header[1]; N = header[2]; nz = header[3];
1943 
1944   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
1945 
1946   /*
1947      This code adds extra rows to make sure the number of rows is
1948     divisible by the blocksize
1949   */
1950   mbs        = M/bs;
1951   extra_rows = bs - M + bs*(mbs);
1952   if (extra_rows == bs) extra_rows = 0;
1953   else                  mbs++;
1954   if (extra_rows) {
1955     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1956   }
1957 
1958   /* read in row lengths */
1959   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1960   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1961   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1962 
1963   /* read in column indices */
1964   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1965   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
1966   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1967 
1968   /* loop over row lengths determining block row lengths */
1969   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1970   PetscMemzero(browlengths,mbs*sizeof(int));
1971   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1972   PetscMemzero(mask,mbs*sizeof(int));
1973   masked = mask + mbs;
1974   rowcount = 0; nzcount = 0;
1975   for ( i=0; i<mbs; i++ ) {
1976     nmask = 0;
1977     for ( j=0; j<bs; j++ ) {
1978       kmax = rowlengths[rowcount];
1979       for ( k=0; k<kmax; k++ ) {
1980         tmp = jj[nzcount++]/bs;
1981         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1982       }
1983       rowcount++;
1984     }
1985     browlengths[i] += nmask;
1986     /* zero out the mask elements we set */
1987     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1988   }
1989 
1990   /* create our matrix */
1991   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1992          CHKERRQ(ierr);
1993   B = *A;
1994   a = (Mat_SeqBAIJ *) B->data;
1995 
1996   /* set matrix "i" values */
1997   a->i[0] = 0;
1998   for ( i=1; i<= mbs; i++ ) {
1999     a->i[i]      = a->i[i-1] + browlengths[i-1];
2000     a->ilen[i-1] = browlengths[i-1];
2001   }
2002   a->nz         = 0;
2003   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
2004 
2005   /* read in nonzero values */
2006   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
2007   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
2008   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2009 
2010   /* set "a" and "j" values into matrix */
2011   nzcount = 0; jcount = 0;
2012   for ( i=0; i<mbs; i++ ) {
2013     nzcountb = nzcount;
2014     nmask    = 0;
2015     for ( j=0; j<bs; j++ ) {
2016       kmax = rowlengths[i*bs+j];
2017       for ( k=0; k<kmax; k++ ) {
2018         tmp = jj[nzcount++]/bs;
2019 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2020       }
2021       rowcount++;
2022     }
2023     /* sort the masked values */
2024     PetscSortInt(nmask,masked);
2025 
2026     /* set "j" values into matrix */
2027     maskcount = 1;
2028     for ( j=0; j<nmask; j++ ) {
2029       a->j[jcount++]  = masked[j];
2030       mask[masked[j]] = maskcount++;
2031     }
2032     /* set "a" values into matrix */
2033     ishift = bs2*a->i[i];
2034     for ( j=0; j<bs; j++ ) {
2035       kmax = rowlengths[i*bs+j];
2036       for ( k=0; k<kmax; k++ ) {
2037         tmp    = jj[nzcountb]/bs ;
2038         block  = mask[tmp] - 1;
2039         point  = jj[nzcountb] - bs*tmp;
2040         idx    = ishift + bs2*block + j + bs*point;
2041         a->a[idx] = aa[nzcountb++];
2042       }
2043     }
2044     /* zero out the mask elements we set */
2045     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2046   }
2047   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
2048 
2049   PetscFree(rowlengths);
2050   PetscFree(browlengths);
2051   PetscFree(aa);
2052   PetscFree(jj);
2053   PetscFree(mask);
2054 
2055   B->assembled = PETSC_TRUE;
2056 
2057   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2058   if (flg) {
2059     Viewer tviewer;
2060     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2061     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
2062     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2063     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2064   }
2065   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2066   if (flg) {
2067     Viewer tviewer;
2068     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2069     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
2070     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2071     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2072   }
2073   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2074   if (flg) {
2075     Viewer tviewer;
2076     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2077     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2078     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2079   }
2080   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2081   if (flg) {
2082     Viewer tviewer;
2083     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2084     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
2085     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2086     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2087   }
2088   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2089   if (flg) {
2090     Viewer tviewer;
2091     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
2092     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
2093     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
2094     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2095   }
2096   return 0;
2097 }
2098 
2099 
2100 
2101