xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 70f55243aafb320636e2a54ff30cab5d1e8d3d7b)
1 #ifndef lint
2 static char vcid[] = "$Id: baij.c,v 1.64 1996/08/06 04:02:46 bsmith Exp bsmith $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 #include "src/mat/impls/baij/seq/baij.h"
10 #include "src/vec/vecimpl.h"
11 #include "src/inline/spops.h"
12 #include "petsc.h"
13 
14 extern   int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
15 
16 static int MatGetReordering_SeqBAIJ(Mat A,MatReordering type,IS *rperm,IS *cperm)
17 {
18   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
19   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
20 
21   /*
22      this is tacky: In the future when we have written special factorization
23      and solve routines for the identity permutation we should use a
24      stride index set instead of the general one.
25   */
26   if (type  == ORDER_NATURAL) {
27     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
28     for ( i=0; i<n; i++ ) idx[i] = i;
29     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
31     PetscFree(idx);
32     ISSetPermutation(*rperm);
33     ISSetPermutation(*cperm);
34     ISSetIdentity(*rperm);
35     ISSetIdentity(*cperm);
36     return 0;
37   }
38 
39   MatReorderingRegisterAll();
40   ishift = 0;
41   oshift = -MatReorderingIndexShift[(int)type];
42   if (MatReorderingRequiresSymmetric[(int)type]) {
43     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr);
44     ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
45     PetscFree(ia); PetscFree(ja);
46   } else {
47     if (ishift == oshift) {
48       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
49     }
50     else if (ishift == -1) {
51       /* temporarily subtract 1 from i and j indices */
52       int nz = a->i[n] - 1;
53       for ( i=0; i<nz; i++ ) a->j[i]--;
54       for ( i=0; i<n+1; i++ ) a->i[i]--;
55       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
56       for ( i=0; i<nz; i++ ) a->j[i]++;
57       for ( i=0; i<n+1; i++ ) a->i[i]++;
58     } else {
59       /* temporarily add 1 to i and j indices */
60       int nz = a->i[n] - 1;
61       for ( i=0; i<nz; i++ ) a->j[i]++;
62       for ( i=0; i<n+1; i++ ) a->i[i]++;
63       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
64       for ( i=0; i<nz; i++ ) a->j[i]--;
65       for ( i=0; i<n+1; i++ ) a->i[i]--;
66     }
67   }
68   return 0;
69 }
70 
71 /*
72      Adds diagonal pointers to sparse matrix structure.
73 */
74 
75 int MatMarkDiag_SeqBAIJ(Mat A)
76 {
77   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
78   int         i,j, *diag, m = a->mbs;
79 
80   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
81   PLogObjectMemory(A,(m+1)*sizeof(int));
82   for ( i=0; i<m; i++ ) {
83     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
84       if (a->j[j] == i) {
85         diag[i] = j;
86         break;
87       }
88     }
89   }
90   a->diag = diag;
91   return 0;
92 }
93 
94 #include "draw.h"
95 #include "pinclude/pviewer.h"
96 #include "sys.h"
97 
98 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
99 {
100   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
101   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
102   Scalar      *aa;
103 
104   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
105   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
106   col_lens[0] = MAT_COOKIE;
107   col_lens[1] = a->m;
108   col_lens[2] = a->n;
109   col_lens[3] = a->nz*bs2;
110 
111   /* store lengths of each row and write (including header) to file */
112   count = 0;
113   for ( i=0; i<a->mbs; i++ ) {
114     for ( j=0; j<bs; j++ ) {
115       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
116     }
117   }
118   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
119   PetscFree(col_lens);
120 
121   /* store column indices (zero start index) */
122   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
123   count = 0;
124   for ( i=0; i<a->mbs; i++ ) {
125     for ( j=0; j<bs; j++ ) {
126       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
127         for ( l=0; l<bs; l++ ) {
128           jj[count++] = bs*a->j[k] + l;
129         }
130       }
131     }
132   }
133   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
134   PetscFree(jj);
135 
136   /* store nonzero values */
137   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
138   count = 0;
139   for ( i=0; i<a->mbs; i++ ) {
140     for ( j=0; j<bs; j++ ) {
141       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
142         for ( l=0; l<bs; l++ ) {
143           aa[count++] = a->a[bs2*k + l*bs + j];
144         }
145       }
146     }
147   }
148   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
149   PetscFree(aa);
150   return 0;
151 }
152 
153 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
154 {
155   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
156   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
157   FILE        *fd;
158   char        *outputname;
159 
160   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
161   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
162   ierr = ViewerGetFormat(viewer,&format);
163   if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) {
164     fprintf(fd,"  block size is %d\n",bs);
165   }
166   else if (format == ASCII_FORMAT_MATLAB) {
167     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
168   }
169   else if (format == ASCII_FORMAT_COMMON) {
170     for ( i=0; i<a->mbs; i++ ) {
171       for ( j=0; j<bs; j++ ) {
172         fprintf(fd,"row %d:",i*bs+j);
173         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
174           for ( l=0; l<bs; l++ ) {
175 #if defined(PETSC_COMPLEX)
176           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
177             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
178               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
179           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
180             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
181 #else
182           if (a->a[bs2*k + l*bs + j] != 0.0)
183             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
184 #endif
185           }
186         }
187         fprintf(fd,"\n");
188       }
189     }
190   }
191   else {
192     for ( i=0; i<a->mbs; i++ ) {
193       for ( j=0; j<bs; j++ ) {
194         fprintf(fd,"row %d:",i*bs+j);
195         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
196           for ( l=0; l<bs; l++ ) {
197 #if defined(PETSC_COMPLEX)
198           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
199             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
200               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
201           }
202           else {
203             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
204           }
205 #else
206             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
207 #endif
208           }
209         }
210         fprintf(fd,"\n");
211       }
212     }
213   }
214   fflush(fd);
215   return 0;
216 }
217 
218 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
219 {
220   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
221   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
222   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
223   Scalar       *aa;
224   Draw         draw;
225   DrawButton   button;
226   PetscTruth   isnull;
227 
228   ViewerDrawGetDraw(viewer,&draw);
229   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
230 
231   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
232   xr += w;    yr += h;  xl = -w;     yl = -h;
233   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
234   /* loop over matrix elements drawing boxes */
235   color = DRAW_BLUE;
236   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
237     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
238       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
239       x_l = a->j[j]*bs; x_r = x_l + 1.0;
240       aa = a->a + j*bs2;
241       for ( k=0; k<bs; k++ ) {
242         for ( l=0; l<bs; l++ ) {
243           if (PetscReal(*aa++) >=  0.) continue;
244           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
245         }
246       }
247     }
248   }
249   color = DRAW_CYAN;
250   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
251     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
252       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
253       x_l = a->j[j]*bs; x_r = x_l + 1.0;
254       aa = a->a + j*bs2;
255       for ( k=0; k<bs; k++ ) {
256         for ( l=0; l<bs; l++ ) {
257           if (PetscReal(*aa++) != 0.) continue;
258           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
259         }
260       }
261     }
262   }
263 
264   color = DRAW_RED;
265   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
266     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
267       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
268       x_l = a->j[j]*bs; x_r = x_l + 1.0;
269       aa = a->a + j*bs2;
270       for ( k=0; k<bs; k++ ) {
271         for ( l=0; l<bs; l++ ) {
272           if (PetscReal(*aa++) <= 0.) continue;
273           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
274         }
275       }
276     }
277   }
278 
279   DrawFlush(draw);
280   DrawGetPause(draw,&pause);
281   if (pause >= 0) { PetscSleep(pause); return 0;}
282 
283   /* allow the matrix to zoom or shrink */
284   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
285   while (button != BUTTON_RIGHT) {
286     DrawClear(draw);
287     if (button == BUTTON_LEFT) scale = .5;
288     else if (button == BUTTON_CENTER) scale = 2.;
289     xl = scale*(xl + w - xc) + xc - w*scale;
290     xr = scale*(xr - w - xc) + xc + w*scale;
291     yl = scale*(yl + h - yc) + yc - h*scale;
292     yr = scale*(yr - h - yc) + yc + h*scale;
293     w *= scale; h *= scale;
294     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
295     color = DRAW_BLUE;
296     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
297       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
298         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
299         x_l = a->j[j]*bs; x_r = x_l + 1.0;
300         aa = a->a + j*bs2;
301         for ( k=0; k<bs; k++ ) {
302           for ( l=0; l<bs; l++ ) {
303             if (PetscReal(*aa++) >=  0.) continue;
304             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
305           }
306         }
307       }
308     }
309     color = DRAW_CYAN;
310     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
311       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
312         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
313         x_l = a->j[j]*bs; x_r = x_l + 1.0;
314         aa = a->a + j*bs2;
315         for ( k=0; k<bs; k++ ) {
316           for ( l=0; l<bs; l++ ) {
317           if (PetscReal(*aa++) != 0.) continue;
318           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
319           }
320         }
321       }
322     }
323 
324     color = DRAW_RED;
325     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
326       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
327         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
328         x_l = a->j[j]*bs; x_r = x_l + 1.0;
329         aa = a->a + j*bs2;
330         for ( k=0; k<bs; k++ ) {
331           for ( l=0; l<bs; l++ ) {
332             if (PetscReal(*aa++) <= 0.) continue;
333             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
334           }
335         }
336       }
337     }
338     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
339   }
340   return 0;
341 }
342 
343 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
344 {
345   Mat         A = (Mat) obj;
346   ViewerType  vtype;
347   int         ierr;
348 
349   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
350   if (vtype == MATLAB_VIEWER) {
351     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
352   }
353   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
354     return MatView_SeqBAIJ_ASCII(A,viewer);
355   }
356   else if (vtype == BINARY_FILE_VIEWER) {
357     return MatView_SeqBAIJ_Binary(A,viewer);
358   }
359   else if (vtype == DRAW_VIEWER) {
360     return MatView_SeqBAIJ_Draw(A,viewer);
361   }
362   return 0;
363 }
364 
365 #define CHUNKSIZE  10
366 
367 /* This version has row oriented v  */
368 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
369 {
370   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
371   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
372   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
373   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
374   int          ridx,cidx,bs2=a->bs2;
375   Scalar      *ap,value,*aa=a->a,*bap;
376 
377   for ( k=0; k<m; k++ ) { /* loop over added rows */
378     row  = im[k]; brow = row/bs;
379     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
380     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
381     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
382     rmax = imax[brow]; nrow = ailen[brow];
383     low = 0;
384     for ( l=0; l<n; l++ ) { /* loop over added columns */
385       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
386       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
387       col = in[l]; bcol = col/bs;
388       ridx = row % bs; cidx = col % bs;
389       if (roworiented) {
390         value = *v++;
391       }
392       else {
393         value = v[k + l*m];
394       }
395       if (!sorted) low = 0; high = nrow;
396       while (high-low > 5) {
397         t = (low+high)/2;
398         if (rp[t] > bcol) high = t;
399         else             low  = t;
400       }
401       for ( i=low; i<high; i++ ) {
402         if (rp[i] > bcol) break;
403         if (rp[i] == bcol) {
404           bap  = ap +  bs2*i + bs*cidx + ridx;
405           if (is == ADD_VALUES) *bap += value;
406           else                  *bap  = value;
407           goto noinsert;
408         }
409       }
410       if (nonew) goto noinsert;
411       if (nrow >= rmax) {
412         /* there is no extra room in row, therefore enlarge */
413         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
414         Scalar *new_a;
415 
416         /* malloc new storage space */
417         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
418         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
419         new_j   = (int *) (new_a + bs2*new_nz);
420         new_i   = new_j + new_nz;
421 
422         /* copy over old data into new slots */
423         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
424         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
425         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
426         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
427         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
428                                                            len*sizeof(int));
429         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
430         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
431         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
432                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
433         /* free up old matrix storage */
434         PetscFree(a->a);
435         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
436         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
437         a->singlemalloc = 1;
438 
439         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
440         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
441         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
442         a->maxnz += bs2*CHUNKSIZE;
443         a->reallocs++;
444         a->nz++;
445       }
446       N = nrow++ - 1;
447       /* shift up all the later entries in this row */
448       for ( ii=N; ii>=i; ii-- ) {
449         rp[ii+1] = rp[ii];
450          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
451       }
452       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
453       rp[i] = bcol;
454       ap[bs2*i + bs*cidx + ridx] = value;
455       noinsert:;
456       low = i;
457     }
458     ailen[brow] = nrow;
459   }
460   return 0;
461 }
462 
463 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
464 {
465   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
466   *m = a->m; *n = a->n;
467   return 0;
468 }
469 
470 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
471 {
472   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
473   *m = 0; *n = a->m;
474   return 0;
475 }
476 
477 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
478 {
479   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
480   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
481   Scalar      *aa,*v_i,*aa_i;
482 
483   bs  = a->bs;
484   ai  = a->i;
485   aj  = a->j;
486   aa  = a->a;
487   bs2 = a->bs2;
488 
489   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
490 
491   bn  = row/bs;   /* Block number */
492   bp  = row % bs; /* Block Position */
493   M   = ai[bn+1] - ai[bn];
494   *nz = bs*M;
495 
496   if (v) {
497     *v = 0;
498     if (*nz) {
499       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
500       for ( i=0; i<M; i++ ) { /* for each block in the block row */
501         v_i  = *v + i*bs;
502         aa_i = aa + bs2*(ai[bn] + i);
503         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
504       }
505     }
506   }
507 
508   if (idx) {
509     *idx = 0;
510     if (*nz) {
511       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
512       for ( i=0; i<M; i++ ) { /* for each block in the block row */
513         idx_i = *idx + i*bs;
514         itmp  = bs*aj[ai[bn] + i];
515         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
516       }
517     }
518   }
519   return 0;
520 }
521 
522 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
523 {
524   if (idx) {if (*idx) PetscFree(*idx);}
525   if (v)   {if (*v)   PetscFree(*v);}
526   return 0;
527 }
528 
529 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
530 {
531   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
532   Mat         C;
533   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
534   int         *rows,*cols,bs2=a->bs2;
535   Scalar      *array=a->a;
536 
537   if (B==PETSC_NULL && mbs!=nbs)
538     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
539   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
540   PetscMemzero(col,(1+nbs)*sizeof(int));
541 
542   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
543   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
544   PetscFree(col);
545   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
546   cols = rows + bs;
547   for ( i=0; i<mbs; i++ ) {
548     cols[0] = i*bs;
549     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
550     len = ai[i+1] - ai[i];
551     for ( j=0; j<len; j++ ) {
552       rows[0] = (*aj++)*bs;
553       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
554       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
555       array += bs2;
556     }
557   }
558   PetscFree(rows);
559 
560   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
561   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
562 
563   if (B != PETSC_NULL) {
564     *B = C;
565   } else {
566     /* This isn't really an in-place transpose */
567     PetscFree(a->a);
568     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
569     if (a->diag) PetscFree(a->diag);
570     if (a->ilen) PetscFree(a->ilen);
571     if (a->imax) PetscFree(a->imax);
572     if (a->solve_work) PetscFree(a->solve_work);
573     PetscFree(a);
574     PetscMemcpy(A,C,sizeof(struct _Mat));
575     PetscHeaderDestroy(C);
576   }
577   return 0;
578 }
579 
580 
581 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
582 {
583   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
584   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
585   int        m = a->m,*ip, N, *ailen = a->ilen;
586   int        mbs = a->mbs, bs2 = a->bs2;
587   Scalar     *aa = a->a, *ap;
588 
589   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
590 
591   for ( i=1; i<mbs; i++ ) {
592     /* move each row back by the amount of empty slots (fshift) before it*/
593     fshift += imax[i-1] - ailen[i-1];
594     if (fshift) {
595       ip = aj + ai[i]; ap = aa + bs2*ai[i];
596       N = ailen[i];
597       for ( j=0; j<N; j++ ) {
598         ip[j-fshift] = ip[j];
599         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
600       }
601     }
602     ai[i] = ai[i-1] + ailen[i-1];
603   }
604   if (mbs) {
605     fshift += imax[mbs-1] - ailen[mbs-1];
606     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
607   }
608   /* reset ilen and imax for each row */
609   for ( i=0; i<mbs; i++ ) {
610     ailen[i] = imax[i] = ai[i+1] - ai[i];
611   }
612   a->nz = ai[mbs];
613 
614   /* diagonals may have moved, so kill the diagonal pointers */
615   if (fshift && a->diag) {
616     PetscFree(a->diag);
617     PLogObjectMemory(A,-(m+1)*sizeof(int));
618     a->diag = 0;
619   }
620   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space (blocks) %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs);
621   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n",
622            a->reallocs);
623   return 0;
624 }
625 
626 static int MatZeroEntries_SeqBAIJ(Mat A)
627 {
628   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
629   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
630   return 0;
631 }
632 
633 int MatDestroy_SeqBAIJ(PetscObject obj)
634 {
635   Mat         A  = (Mat) obj;
636   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
637 
638 #if defined(PETSC_LOG)
639   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
640 #endif
641   PetscFree(a->a);
642   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
643   if (a->diag) PetscFree(a->diag);
644   if (a->ilen) PetscFree(a->ilen);
645   if (a->imax) PetscFree(a->imax);
646   if (a->solve_work) PetscFree(a->solve_work);
647   if (a->mult_work) PetscFree(a->mult_work);
648   PetscFree(a);
649   PLogObjectDestroy(A);
650   PetscHeaderDestroy(A);
651   return 0;
652 }
653 
654 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
655 {
656   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
657   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
658   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
659   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
660   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
661   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
662   else if (op == MAT_ROWS_SORTED ||
663            op == MAT_SYMMETRIC ||
664            op == MAT_STRUCTURALLY_SYMMETRIC ||
665            op == MAT_YES_NEW_DIAGONALS)
666     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
667   else if (op == MAT_NO_NEW_DIAGONALS)
668     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");}
669   else
670     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
671   return 0;
672 }
673 
674 
675 /* -------------------------------------------------------*/
676 /* Should check that shapes of vectors and matrices match */
677 /* -------------------------------------------------------*/
678 #include "pinclude/plapack.h"
679 
680 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
681 {
682   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
683   register Scalar *x,*z,*v,sum;
684   int             mbs=a->mbs,i,*idx,*ii,n;
685 
686   VecGetArray_Fast(xx,x);
687   VecGetArray_Fast(zz,z);
688 
689   idx   = a->j;
690   v     = a->a;
691   ii    = a->i;
692 
693   for ( i=0; i<mbs; i++ ) {
694     n    = ii[1] - ii[0]; ii++;
695     sum  = 0.0;
696     while (n--) sum += *v++ * x[*idx++];
697     z[i] = sum;
698   }
699   VecRestoreArray_Fast(xx,x);
700   VecRestoreArray_Fast(zz,z);
701   PLogFlops(2*a->nz - a->m);
702   return 0;
703 }
704 
705 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
706 {
707   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
708   register Scalar *x,*z,*v,*xb,sum1,sum2;
709   register Scalar x1,x2;
710   int             mbs=a->mbs,i,*idx,*ii;
711   int             j,n;
712 
713   VecGetArray_Fast(xx,x);
714   VecGetArray_Fast(zz,z);
715 
716   idx   = a->j;
717   v     = a->a;
718   ii    = a->i;
719 
720   for ( i=0; i<mbs; i++ ) {
721     n  = ii[1] - ii[0]; ii++;
722     sum1 = 0.0; sum2 = 0.0;
723     for ( j=0; j<n; j++ ) {
724       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
725       sum1 += v[0]*x1 + v[2]*x2;
726       sum2 += v[1]*x1 + v[3]*x2;
727       v += 4;
728     }
729     z[0] = sum1; z[1] = sum2;
730     z += 2;
731   }
732   VecRestoreArray_Fast(xx,x);
733   VecRestoreArray_Fast(zz,z);
734   PLogFlops(4*a->nz - a->m);
735   return 0;
736 }
737 
738 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
739 {
740   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
741   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
742   int             mbs=a->mbs,i,*idx,*ii,j,n;
743 
744   VecGetArray_Fast(xx,x);
745   VecGetArray_Fast(zz,z);
746 
747   idx   = a->j;
748   v     = a->a;
749   ii    = a->i;
750 
751   for ( i=0; i<mbs; i++ ) {
752     n  = ii[1] - ii[0]; ii++;
753     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
754     for ( j=0; j<n; j++ ) {
755       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
756       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
757       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
758       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
759       v += 9;
760     }
761     z[0] = sum1; z[1] = sum2; z[2] = sum3;
762     z += 3;
763   }
764   VecRestoreArray_Fast(xx,x);
765   VecRestoreArray_Fast(zz,z);
766   PLogFlops(18*a->nz - a->m);
767   return 0;
768 }
769 
770 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
771 {
772   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
773   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
774   register Scalar x1,x2,x3,x4;
775   int             mbs=a->mbs,i,*idx,*ii;
776   int             j,n;
777 
778   VecGetArray_Fast(xx,x);
779   VecGetArray_Fast(zz,z);
780 
781   idx   = a->j;
782   v     = a->a;
783   ii    = a->i;
784 
785   for ( i=0; i<mbs; i++ ) {
786     n  = ii[1] - ii[0]; ii++;
787     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
788     for ( j=0; j<n; j++ ) {
789       xb = x + 4*(*idx++);
790       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
791       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
792       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
793       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
794       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
795       v += 16;
796     }
797     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
798     z += 4;
799   }
800   VecRestoreArray_Fast(xx,x);
801   VecRestoreArray_Fast(zz,z);
802   PLogFlops(32*a->nz - a->m);
803   return 0;
804 }
805 
806 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
807 {
808   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
809   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
810   register Scalar x1,x2,x3,x4,x5;
811   int             mbs=a->mbs,i,*idx,*ii,j,n;
812 
813   VecGetArray_Fast(xx,x);
814   VecGetArray_Fast(zz,z);
815 
816   idx   = a->j;
817   v     = a->a;
818   ii    = a->i;
819 
820   for ( i=0; i<mbs; i++ ) {
821     n  = ii[1] - ii[0]; ii++;
822     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
823     for ( j=0; j<n; j++ ) {
824       xb = x + 5*(*idx++);
825       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
826       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
827       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
828       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
829       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
830       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
831       v += 25;
832     }
833     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
834     z += 5;
835   }
836   VecRestoreArray_Fast(xx,x);
837   VecRestoreArray_Fast(zz,z);
838   PLogFlops(50*a->nz - a->m);
839   return 0;
840 }
841 
842 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
843 {
844   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
845   register Scalar *x,*z,*v,*xb;
846   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
847   int             _One = 1,ncols,k;
848   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
849 
850   VecGetArray_Fast(xx,x);
851   VecGetArray_Fast(zz,z);
852 
853   idx   = a->j;
854   v     = a->a;
855   ii    = a->i;
856 
857 
858   if (!a->mult_work) {
859     k = PetscMax(a->m,a->n);
860     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
861   }
862   work = a->mult_work;
863   for ( i=0; i<mbs; i++ ) {
864     n     = ii[1] - ii[0]; ii++;
865     ncols = n*bs;
866     workt = work;
867     for ( j=0; j<n; j++ ) {
868       xb = x + bs*(*idx++);
869       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
870       workt += bs;
871     }
872     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
873     v += n*bs2;
874     z += bs;
875   }
876   VecRestoreArray_Fast(xx,x);
877   VecRestoreArray_Fast(zz,z);
878   PLogFlops(2*a->nz*bs2 - a->m);
879   return 0;
880 }
881 
882 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
883 {
884   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
885   register Scalar *x,*y,*z,*v,sum;
886   int             mbs=a->mbs,i,*idx,*ii,n;
887 
888   VecGetArray_Fast(xx,x);
889   VecGetArray_Fast(yy,y);
890   VecGetArray_Fast(zz,z);
891 
892   idx   = a->j;
893   v     = a->a;
894   ii    = a->i;
895 
896   for ( i=0; i<mbs; i++ ) {
897     n    = ii[1] - ii[0]; ii++;
898     sum  = y[i];
899     while (n--) sum += *v++ * x[*idx++];
900     z[i] = sum;
901   }
902   VecRestoreArray_Fast(xx,x);
903   VecRestoreArray_Fast(yy,y);
904   VecRestoreArray_Fast(zz,z);
905   PLogFlops(2*a->nz);
906   return 0;
907 }
908 
909 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
910 {
911   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
912   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
913   register Scalar x1,x2;
914   int             mbs=a->mbs,i,*idx,*ii;
915   int             j,n;
916 
917   VecGetArray_Fast(xx,x);
918   VecGetArray_Fast(yy,y);
919   VecGetArray_Fast(zz,z);
920 
921   idx   = a->j;
922   v     = a->a;
923   ii    = a->i;
924 
925   for ( i=0; i<mbs; i++ ) {
926     n  = ii[1] - ii[0]; ii++;
927     sum1 = y[0]; sum2 = y[1];
928     for ( j=0; j<n; j++ ) {
929       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
930       sum1 += v[0]*x1 + v[2]*x2;
931       sum2 += v[1]*x1 + v[3]*x2;
932       v += 4;
933     }
934     z[0] = sum1; z[1] = sum2;
935     z += 2; y += 2;
936   }
937   VecRestoreArray_Fast(xx,x);
938   VecRestoreArray_Fast(yy,y);
939   VecRestoreArray_Fast(zz,z);
940   PLogFlops(4*a->nz);
941   return 0;
942 }
943 
944 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
945 {
946   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
947   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
948   int             mbs=a->mbs,i,*idx,*ii,j,n;
949 
950   VecGetArray_Fast(xx,x);
951   VecGetArray_Fast(yy,y);
952   VecGetArray_Fast(zz,z);
953 
954   idx   = a->j;
955   v     = a->a;
956   ii    = a->i;
957 
958   for ( i=0; i<mbs; i++ ) {
959     n  = ii[1] - ii[0]; ii++;
960     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
961     for ( j=0; j<n; j++ ) {
962       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
963       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
964       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
965       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
966       v += 9;
967     }
968     z[0] = sum1; z[1] = sum2; z[2] = sum3;
969     z += 3; y += 3;
970   }
971   VecRestoreArray_Fast(xx,x);
972   VecRestoreArray_Fast(yy,y);
973   VecRestoreArray_Fast(zz,z);
974   PLogFlops(18*a->nz);
975   return 0;
976 }
977 
978 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
979 {
980   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
981   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
982   register Scalar x1,x2,x3,x4;
983   int             mbs=a->mbs,i,*idx,*ii;
984   int             j,n;
985 
986   VecGetArray_Fast(xx,x);
987   VecGetArray_Fast(yy,y);
988   VecGetArray_Fast(zz,z);
989 
990   idx   = a->j;
991   v     = a->a;
992   ii    = a->i;
993 
994   for ( i=0; i<mbs; i++ ) {
995     n  = ii[1] - ii[0]; ii++;
996     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
997     for ( j=0; j<n; j++ ) {
998       xb = x + 4*(*idx++);
999       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1000       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1001       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1002       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1003       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1004       v += 16;
1005     }
1006     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1007     z += 4; y += 4;
1008   }
1009   VecRestoreArray_Fast(xx,x);
1010   VecRestoreArray_Fast(yy,y);
1011   VecRestoreArray_Fast(zz,z);
1012   PLogFlops(32*a->nz);
1013   return 0;
1014 }
1015 
1016 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1017 {
1018   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1019   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1020   register Scalar x1,x2,x3,x4,x5;
1021   int             mbs=a->mbs,i,*idx,*ii,j,n;
1022 
1023   VecGetArray_Fast(xx,x);
1024   VecGetArray_Fast(yy,y);
1025   VecGetArray_Fast(zz,z);
1026 
1027   idx   = a->j;
1028   v     = a->a;
1029   ii    = a->i;
1030 
1031   for ( i=0; i<mbs; i++ ) {
1032     n  = ii[1] - ii[0]; ii++;
1033     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1034     for ( j=0; j<n; j++ ) {
1035       xb = x + 5*(*idx++);
1036       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1037       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1038       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1039       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1040       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1041       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1042       v += 25;
1043     }
1044     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1045     z += 5; y += 5;
1046   }
1047   VecRestoreArray_Fast(xx,x);
1048   VecRestoreArray_Fast(yy,y);
1049   VecRestoreArray_Fast(zz,z);
1050   PLogFlops(50*a->nz);
1051   return 0;
1052 }
1053 
1054 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1055 {
1056   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1057   register Scalar *x,*z,*v,*xb;
1058   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1059   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1060 
1061   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1062 
1063   VecGetArray_Fast(xx,x);
1064   VecGetArray_Fast(zz,z);
1065 
1066   idx   = a->j;
1067   v     = a->a;
1068   ii    = a->i;
1069 
1070 
1071   if (!a->mult_work) {
1072     k = PetscMax(a->m,a->n);
1073     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1074   }
1075   work = a->mult_work;
1076   for ( i=0; i<mbs; i++ ) {
1077     n     = ii[1] - ii[0]; ii++;
1078     ncols = n*bs;
1079     workt = work;
1080     for ( j=0; j<n; j++ ) {
1081       xb = x + bs*(*idx++);
1082       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1083       workt += bs;
1084     }
1085     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1086     v += n*bs2;
1087     z += bs;
1088   }
1089   VecRestoreArray_Fast(xx,x);
1090   VecRestoreArray_Fast(zz,z);
1091   PLogFlops(2*a->nz*bs2 );
1092   return 0;
1093 }
1094 
1095 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1096 {
1097   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1098   Scalar          *xg,*zg,*zb;
1099   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1100   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1101   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1102 
1103 
1104   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1105   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1106   PetscMemzero(z,N*sizeof(Scalar));
1107 
1108   idx   = a->j;
1109   v     = a->a;
1110   ii    = a->i;
1111 
1112   switch (bs) {
1113   case 1:
1114     for ( i=0; i<mbs; i++ ) {
1115       n  = ii[1] - ii[0]; ii++;
1116       xb = x + i; x1 = xb[0];
1117       ib = idx + ai[i];
1118       for ( j=0; j<n; j++ ) {
1119         rval    = ib[j];
1120         z[rval] += *v++ * x1;
1121       }
1122     }
1123     break;
1124   case 2:
1125     for ( i=0; i<mbs; i++ ) {
1126       n  = ii[1] - ii[0]; ii++;
1127       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1128       ib = idx + ai[i];
1129       for ( j=0; j<n; j++ ) {
1130         rval      = ib[j]*2;
1131         z[rval++] += v[0]*x1 + v[1]*x2;
1132         z[rval++] += v[2]*x1 + v[3]*x2;
1133         v += 4;
1134       }
1135     }
1136     break;
1137   case 3:
1138     for ( i=0; i<mbs; i++ ) {
1139       n  = ii[1] - ii[0]; ii++;
1140       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1141       ib = idx + ai[i];
1142       for ( j=0; j<n; j++ ) {
1143         rval      = ib[j]*3;
1144         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1145         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1146         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1147         v += 9;
1148       }
1149     }
1150     break;
1151   case 4:
1152     for ( i=0; i<mbs; i++ ) {
1153       n  = ii[1] - ii[0]; ii++;
1154       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1155       ib = idx + ai[i];
1156       for ( j=0; j<n; j++ ) {
1157         rval      = ib[j]*4;
1158         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1159         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1160         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1161         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1162         v += 16;
1163       }
1164     }
1165     break;
1166   case 5:
1167     for ( i=0; i<mbs; i++ ) {
1168       n  = ii[1] - ii[0]; ii++;
1169       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1170       x4 = xb[3];   x5 = xb[4];
1171       ib = idx + ai[i];
1172       for ( j=0; j<n; j++ ) {
1173         rval      = ib[j]*5;
1174         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1175         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1176         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1177         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1178         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1179         v += 25;
1180       }
1181     }
1182     break;
1183       /* block sizes larger then 5 by 5 are handled by BLAS */
1184     default: {
1185       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1186       if (!a->mult_work) {
1187         k = PetscMax(a->m,a->n);
1188         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1189         CHKPTRQ(a->mult_work);
1190       }
1191       work = a->mult_work;
1192       for ( i=0; i<mbs; i++ ) {
1193         n     = ii[1] - ii[0]; ii++;
1194         ncols = n*bs;
1195         PetscMemzero(work,ncols*sizeof(Scalar));
1196         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1197         v += n*bs2;
1198         x += bs;
1199         workt = work;
1200         for ( j=0; j<n; j++ ) {
1201           zb = z + bs*(*idx++);
1202           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1203           workt += bs;
1204         }
1205       }
1206     }
1207   }
1208   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1209   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1210   PLogFlops(2*a->nz*a->bs2 - a->n);
1211   return 0;
1212 }
1213 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1214 {
1215   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1216   Scalar          *xg,*zg,*zb;
1217   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1218   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1219   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1220 
1221 
1222 
1223   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1224   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1225 
1226   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1227   else PetscMemzero(z,N*sizeof(Scalar));
1228 
1229 
1230   idx   = a->j;
1231   v     = a->a;
1232   ii    = a->i;
1233 
1234   switch (bs) {
1235   case 1:
1236     for ( i=0; i<mbs; i++ ) {
1237       n  = ii[1] - ii[0]; ii++;
1238       xb = x + i; x1 = xb[0];
1239       ib = idx + ai[i];
1240       for ( j=0; j<n; j++ ) {
1241         rval    = ib[j];
1242         z[rval] += *v++ * x1;
1243       }
1244     }
1245     break;
1246   case 2:
1247     for ( i=0; i<mbs; i++ ) {
1248       n  = ii[1] - ii[0]; ii++;
1249       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1250       ib = idx + ai[i];
1251       for ( j=0; j<n; j++ ) {
1252         rval      = ib[j]*2;
1253         z[rval++] += v[0]*x1 + v[1]*x2;
1254         z[rval++] += v[2]*x1 + v[3]*x2;
1255         v += 4;
1256       }
1257     }
1258     break;
1259   case 3:
1260     for ( i=0; i<mbs; i++ ) {
1261       n  = ii[1] - ii[0]; ii++;
1262       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1263       ib = idx + ai[i];
1264       for ( j=0; j<n; j++ ) {
1265         rval      = ib[j]*3;
1266         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1267         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1268         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1269         v += 9;
1270       }
1271     }
1272     break;
1273   case 4:
1274     for ( i=0; i<mbs; i++ ) {
1275       n  = ii[1] - ii[0]; ii++;
1276       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1277       ib = idx + ai[i];
1278       for ( j=0; j<n; j++ ) {
1279         rval      = ib[j]*4;
1280         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1281         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1282         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1283         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1284         v += 16;
1285       }
1286     }
1287     break;
1288   case 5:
1289     for ( i=0; i<mbs; i++ ) {
1290       n  = ii[1] - ii[0]; ii++;
1291       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1292       x4 = xb[3];   x5 = xb[4];
1293       ib = idx + ai[i];
1294       for ( j=0; j<n; j++ ) {
1295         rval      = ib[j]*5;
1296         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1297         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1298         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1299         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1300         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1301         v += 25;
1302       }
1303     }
1304     break;
1305       /* block sizes larger then 5 by 5 are handled by BLAS */
1306     default: {
1307       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1308       if (!a->mult_work) {
1309         k = PetscMax(a->m,a->n);
1310         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1311         CHKPTRQ(a->mult_work);
1312       }
1313       work = a->mult_work;
1314       for ( i=0; i<mbs; i++ ) {
1315         n     = ii[1] - ii[0]; ii++;
1316         ncols = n*bs;
1317         PetscMemzero(work,ncols*sizeof(Scalar));
1318         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1319         v += n*bs2;
1320         x += bs;
1321         workt = work;
1322         for ( j=0; j<n; j++ ) {
1323           zb = z + bs*(*idx++);
1324           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1325           workt += bs;
1326         }
1327       }
1328     }
1329   }
1330   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1331   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1332   PLogFlops(2*a->nz*a->bs2);
1333   return 0;
1334 }
1335 
1336 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
1337 {
1338   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1339   if (nz)  *nz  = a->bs2*a->nz;
1340   if (nza) *nza = a->maxnz;
1341   if (mem) *mem = (int)A->mem;
1342   return 0;
1343 }
1344 
1345 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
1346 {
1347   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1348 
1349   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
1350 
1351   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1352   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1353       (a->nz != b->nz)) {
1354     *flg = PETSC_FALSE; return 0;
1355   }
1356 
1357   /* if the a->i are the same */
1358   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
1359     *flg = PETSC_FALSE; return 0;
1360   }
1361 
1362   /* if a->j are the same */
1363   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
1364     *flg = PETSC_FALSE; return 0;
1365   }
1366 
1367   /* if a->a are the same */
1368   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
1369     *flg = PETSC_FALSE; return 0;
1370   }
1371   *flg = PETSC_TRUE;
1372   return 0;
1373 
1374 }
1375 
1376 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1377 {
1378   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1379   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1380   Scalar      *x, zero = 0.0,*aa,*aa_j;
1381 
1382   bs   = a->bs;
1383   aa   = a->a;
1384   ai   = a->i;
1385   aj   = a->j;
1386   ambs = a->mbs;
1387   bs2  = a->bs2;
1388 
1389   VecSet(&zero,v);
1390   VecGetArray(v,&x); VecGetLocalSize(v,&n);
1391   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
1392   for ( i=0; i<ambs; i++ ) {
1393     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1394       if (aj[j] == i) {
1395         row  = i*bs;
1396         aa_j = aa+j*bs2;
1397         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1398         break;
1399       }
1400     }
1401   }
1402   return 0;
1403 }
1404 
1405 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1406 {
1407   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1408   Scalar      *l,*r,x,*v,*aa,*li,*ri;
1409   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1410 
1411   ai  = a->i;
1412   aj  = a->j;
1413   aa  = a->a;
1414   m   = a->m;
1415   n   = a->n;
1416   bs  = a->bs;
1417   mbs = a->mbs;
1418   bs2 = a->bs2;
1419   if (ll) {
1420     VecGetArray(ll,&l); VecGetSize(ll,&lm);
1421     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
1422     for ( i=0; i<mbs; i++ ) { /* for each block row */
1423       M  = ai[i+1] - ai[i];
1424       li = l + i*bs;
1425       v  = aa + bs2*ai[i];
1426       for ( j=0; j<M; j++ ) { /* for each block */
1427         for ( k=0; k<bs2; k++ ) {
1428           (*v++) *= li[k%bs];
1429         }
1430       }
1431     }
1432   }
1433 
1434   if (rr) {
1435     VecGetArray(rr,&r); VecGetSize(rr,&rn);
1436     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
1437     for ( i=0; i<mbs; i++ ) { /* for each block row */
1438       M  = ai[i+1] - ai[i];
1439       v  = aa + bs2*ai[i];
1440       for ( j=0; j<M; j++ ) { /* for each block */
1441         ri = r + bs*aj[ai[i]+j];
1442         for ( k=0; k<bs; k++ ) {
1443           x = ri[k];
1444           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
1445         }
1446       }
1447     }
1448   }
1449   return 0;
1450 }
1451 
1452 
1453 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1454 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1455 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1456 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1457 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
1458 
1459 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1460 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1461 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1462 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1463 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1464 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1465 
1466 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1467 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1468 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1469 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1470 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1471 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1472 
1473 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
1474 {
1475   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1476   Scalar      *v = a->a;
1477   double      sum = 0.0;
1478   int         i,nz=a->nz,bs2=a->bs2;
1479 
1480   if (type == NORM_FROBENIUS) {
1481     for (i=0; i< bs2*nz; i++ ) {
1482 #if defined(PETSC_COMPLEX)
1483       sum += real(conj(*v)*(*v)); v++;
1484 #else
1485       sum += (*v)*(*v); v++;
1486 #endif
1487     }
1488     *norm = sqrt(sum);
1489   }
1490   else {
1491     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
1492   }
1493   return 0;
1494 }
1495 
1496 /*
1497      note: This can only work for identity for row and col. It would
1498    be good to check this and otherwise generate an error.
1499 */
1500 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1501 {
1502   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1503   Mat         outA;
1504   int         ierr;
1505 
1506   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
1507 
1508   outA          = inA;
1509   inA->factor   = FACTOR_LU;
1510   a->row        = row;
1511   a->col        = col;
1512 
1513   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1514 
1515   if (!a->diag) {
1516     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1517   }
1518   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1519   return 0;
1520 }
1521 
1522 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1523 {
1524   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1525   int         one = 1, totalnz = a->bs2*a->nz;
1526   BLscal_( &totalnz, alpha, a->a, &one );
1527   PLogFlops(totalnz);
1528   return 0;
1529 }
1530 
1531 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1532 {
1533   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1534   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1535   int        *ai = a->i, *ailen = a->ilen;
1536   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1537   Scalar     *ap, *aa = a->a, zero = 0.0;
1538 
1539   for ( k=0; k<m; k++ ) { /* loop over rows */
1540     row  = im[k]; brow = row/bs;
1541     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
1542     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1543     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1544     nrow = ailen[brow];
1545     for ( l=0; l<n; l++ ) { /* loop over columns */
1546       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
1547       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1548       col  = in[l] ;
1549       bcol = col/bs;
1550       cidx = col%bs;
1551       ridx = row%bs;
1552       high = nrow;
1553       low  = 0; /* assume unsorted */
1554       while (high-low > 5) {
1555         t = (low+high)/2;
1556         if (rp[t] > bcol) high = t;
1557         else             low  = t;
1558       }
1559       for ( i=low; i<high; i++ ) {
1560         if (rp[i] > bcol) break;
1561         if (rp[i] == bcol) {
1562           *v++ = ap[bs2*i+bs*cidx+ridx];
1563           goto finished;
1564         }
1565       }
1566       *v++ = zero;
1567       finished:;
1568     }
1569   }
1570   return 0;
1571 }
1572 
1573 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
1574 {
1575   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
1576   *bs = baij->bs;
1577   return 0;
1578 }
1579 
1580 /* idx should be of length atlease bs */
1581 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1582 {
1583   int i,row;
1584   row = idx[0];
1585   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1586 
1587   for ( i=1; i<bs; i++ ) {
1588     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1589   }
1590   *flg = PETSC_TRUE;
1591   return 0;
1592 }
1593 
1594 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1595 {
1596   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1597   IS          is_local;
1598   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1599   PetscTruth  flg;
1600   Scalar      zero = 0.0,*aa;
1601 
1602   /* Make a copy of the IS and  sort it */
1603   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1604   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1605   ierr = ISCreateSeq(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1606   ierr = ISSort(is_local); CHKERRQ(ierr);
1607   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1608 
1609   i = 0;
1610   while (i < is_n) {
1611     if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range");
1612     flg = PETSC_FALSE;
1613     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1614     if (flg) { /* There exists a block of rows to be Zerowed */
1615       baij->ilen[rows[i]/bs] = 0;
1616       i += bs;
1617     } else { /* Zero out only the requested row */
1618       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1619       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1620       for ( j=0; j<count; j++ ) {
1621         aa[0] = zero;
1622         aa+=bs;
1623       }
1624       i++;
1625     }
1626   }
1627   if (diag) {
1628     for ( j=0; j<is_n; j++ ) {
1629       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1630     }
1631   }
1632   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1633   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1634   ierr = ISDestroy(is_local); CHKERRQ(ierr);
1635   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1636   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1637 
1638   return 0;
1639 }
1640 int MatPrintHelp_SeqBAIJ(Mat A)
1641 {
1642   static int called = 0;
1643   MPI_Comm   comm = A->comm;
1644 
1645   if (called) return 0; else called = 1;
1646   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1647   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1648   return 0;
1649 }
1650 
1651 /* -------------------------------------------------------------------*/
1652 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1653        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1654        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1655        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
1656        MatSolve_SeqBAIJ_N,0,
1657        0,0,
1658        MatLUFactor_SeqBAIJ,0,
1659        0,
1660        MatTranspose_SeqBAIJ,
1661        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
1662        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1663        0,MatAssemblyEnd_SeqBAIJ,
1664        0,
1665        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
1666        MatGetReordering_SeqBAIJ,
1667        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1668        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1669        MatILUFactorSymbolic_SeqBAIJ,0,
1670        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1671        MatConvertSameType_SeqBAIJ,0,0,
1672        MatILUFactor_SeqBAIJ,0,0,
1673        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
1674        MatGetValues_SeqBAIJ,0,
1675        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
1676        0,0,0,MatGetBlockSize_SeqBAIJ};
1677 
1678 /*@C
1679    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1680    compressed row) format.  For good matrix assembly performance the
1681    user should preallocate the matrix storage by setting the parameter nz
1682    (or the array nzz).  By setting these parameters accurately, performance
1683    during matrix assembly can be increased by more than a factor of 50.
1684 
1685    Input Parameters:
1686 .  comm - MPI communicator, set to MPI_COMM_SELF
1687 .  bs - size of block
1688 .  m - number of rows
1689 .  n - number of columns
1690 .  nz - number of block nonzeros per block row (same for all rows)
1691 .  nzz - array containing the number of block nonzeros in the various block rows
1692          (possibly different for each block row) or PETSC_NULL
1693 
1694    Output Parameter:
1695 .  A - the matrix
1696 
1697    Notes:
1698    The block AIJ format is fully compatible with standard Fortran 77
1699    storage.  That is, the stored row and column indices can begin at
1700    either one (as in Fortran) or zero.  See the users' manual for details.
1701 
1702    Specify the preallocated storage with either nz or nnz (not both).
1703    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1704    allocation.  For additional details, see the users manual chapter on
1705    matrices and the file $(PETSC_DIR)/Performance.
1706 
1707 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1708 @*/
1709 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1710 {
1711   Mat         B;
1712   Mat_SeqBAIJ *b;
1713   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
1714 
1715   if (mbs*bs!=m || nbs*bs!=n)
1716     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
1717 
1718   *A = 0;
1719   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
1720   PLogObjectCreate(B);
1721   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1722   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1723   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1724   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1725   if (!flg) {
1726     switch (bs) {
1727       case 1:
1728         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1729         B->ops.solve           = MatSolve_SeqBAIJ_1;
1730         B->ops.mult            = MatMult_SeqBAIJ_1;
1731         B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
1732        break;
1733       case 2:
1734         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1735         B->ops.solve           = MatSolve_SeqBAIJ_2;
1736         B->ops.mult            = MatMult_SeqBAIJ_2;
1737         B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
1738         break;
1739       case 3:
1740         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1741         B->ops.solve           = MatSolve_SeqBAIJ_3;
1742         B->ops.mult            = MatMult_SeqBAIJ_3;
1743         B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
1744         break;
1745       case 4:
1746         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1747         B->ops.solve           = MatSolve_SeqBAIJ_4;
1748         B->ops.mult            = MatMult_SeqBAIJ_4;
1749         B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
1750         break;
1751       case 5:
1752         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1753         B->ops.solve           = MatSolve_SeqBAIJ_5;
1754         B->ops.mult            = MatMult_SeqBAIJ_5;
1755         B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
1756         break;
1757     }
1758   }
1759   B->destroy          = MatDestroy_SeqBAIJ;
1760   B->view             = MatView_SeqBAIJ;
1761   B->factor           = 0;
1762   B->lupivotthreshold = 1.0;
1763   b->row              = 0;
1764   b->col              = 0;
1765   b->reallocs         = 0;
1766 
1767   b->m       = m; B->m = m; B->M = m;
1768   b->n       = n; B->n = n; B->N = n;
1769   b->mbs     = mbs;
1770   b->nbs     = nbs;
1771   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1772   if (nnz == PETSC_NULL) {
1773     if (nz == PETSC_DEFAULT) nz = 5;
1774     else if (nz <= 0)        nz = 1;
1775     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1776     nz = nz*mbs;
1777   }
1778   else {
1779     nz = 0;
1780     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1781   }
1782 
1783   /* allocate the matrix space */
1784   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1785   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1786   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1787   b->j  = (int *) (b->a + nz*bs2);
1788   PetscMemzero(b->j,nz*sizeof(int));
1789   b->i  = b->j + nz;
1790   b->singlemalloc = 1;
1791 
1792   b->i[0] = 0;
1793   for (i=1; i<mbs+1; i++) {
1794     b->i[i] = b->i[i-1] + b->imax[i-1];
1795   }
1796 
1797   /* b->ilen will count nonzeros in each block row so far. */
1798   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1799   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1800   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1801 
1802   b->bs               = bs;
1803   b->bs2              = bs2;
1804   b->mbs              = mbs;
1805   b->nz               = 0;
1806   b->maxnz            = nz*bs2;
1807   b->sorted           = 0;
1808   b->roworiented      = 1;
1809   b->nonew            = 0;
1810   b->diag             = 0;
1811   b->solve_work       = 0;
1812   b->mult_work        = 0;
1813   b->spptr            = 0;
1814   *A = B;
1815   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1816   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1817   return 0;
1818 }
1819 
1820 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1821 {
1822   Mat         C;
1823   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1824   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1825 
1826   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
1827 
1828   *B = 0;
1829   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1830   PLogObjectCreate(C);
1831   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1832   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1833   C->destroy    = MatDestroy_SeqBAIJ;
1834   C->view       = MatView_SeqBAIJ;
1835   C->factor     = A->factor;
1836   c->row        = 0;
1837   c->col        = 0;
1838   C->assembled  = PETSC_TRUE;
1839 
1840   c->m = C->m   = a->m;
1841   c->n = C->n   = a->n;
1842   C->M          = a->m;
1843   C->N          = a->n;
1844 
1845   c->bs         = a->bs;
1846   c->bs2        = a->bs2;
1847   c->mbs        = a->mbs;
1848   c->nbs        = a->nbs;
1849 
1850   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1851   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1852   for ( i=0; i<mbs; i++ ) {
1853     c->imax[i] = a->imax[i];
1854     c->ilen[i] = a->ilen[i];
1855   }
1856 
1857   /* allocate the matrix space */
1858   c->singlemalloc = 1;
1859   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1860   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1861   c->j  = (int *) (c->a + nz*bs2);
1862   c->i  = c->j + nz;
1863   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1864   if (mbs > 0) {
1865     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1866     if (cpvalues == COPY_VALUES) {
1867       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1868     }
1869   }
1870 
1871   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1872   c->sorted      = a->sorted;
1873   c->roworiented = a->roworiented;
1874   c->nonew       = a->nonew;
1875 
1876   if (a->diag) {
1877     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1878     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1879     for ( i=0; i<mbs; i++ ) {
1880       c->diag[i] = a->diag[i];
1881     }
1882   }
1883   else c->diag          = 0;
1884   c->nz                 = a->nz;
1885   c->maxnz              = a->maxnz;
1886   c->solve_work         = 0;
1887   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1888   c->mult_work          = 0;
1889   *B = C;
1890   return 0;
1891 }
1892 
1893 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1894 {
1895   Mat_SeqBAIJ  *a;
1896   Mat          B;
1897   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1898   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1899   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1900   int          *masked, nmask,tmp,bs2,ishift;
1901   Scalar       *aa;
1902   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1903 
1904   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1905   bs2  = bs*bs;
1906 
1907   MPI_Comm_size(comm,&size);
1908   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
1909   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1910   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1911   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
1912   M = header[1]; N = header[2]; nz = header[3];
1913 
1914   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
1915 
1916   /*
1917      This code adds extra rows to make sure the number of rows is
1918     divisible by the blocksize
1919   */
1920   mbs        = M/bs;
1921   extra_rows = bs - M + bs*(mbs);
1922   if (extra_rows == bs) extra_rows = 0;
1923   else                  mbs++;
1924   if (extra_rows) {
1925     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize");
1926   }
1927 
1928   /* read in row lengths */
1929   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1930   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1931   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1932 
1933   /* read in column indices */
1934   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1935   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
1936   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1937 
1938   /* loop over row lengths determining block row lengths */
1939   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1940   PetscMemzero(browlengths,mbs*sizeof(int));
1941   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1942   PetscMemzero(mask,mbs*sizeof(int));
1943   masked = mask + mbs;
1944   rowcount = 0; nzcount = 0;
1945   for ( i=0; i<mbs; i++ ) {
1946     nmask = 0;
1947     for ( j=0; j<bs; j++ ) {
1948       kmax = rowlengths[rowcount];
1949       for ( k=0; k<kmax; k++ ) {
1950         tmp = jj[nzcount++]/bs;
1951         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1952       }
1953       rowcount++;
1954     }
1955     browlengths[i] += nmask;
1956     /* zero out the mask elements we set */
1957     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1958   }
1959 
1960   /* create our matrix */
1961   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1962          CHKERRQ(ierr);
1963   B = *A;
1964   a = (Mat_SeqBAIJ *) B->data;
1965 
1966   /* set matrix "i" values */
1967   a->i[0] = 0;
1968   for ( i=1; i<= mbs; i++ ) {
1969     a->i[i]      = a->i[i-1] + browlengths[i-1];
1970     a->ilen[i-1] = browlengths[i-1];
1971   }
1972   a->nz         = 0;
1973   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1974 
1975   /* read in nonzero values */
1976   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1977   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
1978   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1979 
1980   /* set "a" and "j" values into matrix */
1981   nzcount = 0; jcount = 0;
1982   for ( i=0; i<mbs; i++ ) {
1983     nzcountb = nzcount;
1984     nmask    = 0;
1985     for ( j=0; j<bs; j++ ) {
1986       kmax = rowlengths[i*bs+j];
1987       for ( k=0; k<kmax; k++ ) {
1988         tmp = jj[nzcount++]/bs;
1989 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1990       }
1991       rowcount++;
1992     }
1993     /* sort the masked values */
1994     PetscSortInt(nmask,masked);
1995 
1996     /* set "j" values into matrix */
1997     maskcount = 1;
1998     for ( j=0; j<nmask; j++ ) {
1999       a->j[jcount++]  = masked[j];
2000       mask[masked[j]] = maskcount++;
2001     }
2002     /* set "a" values into matrix */
2003     ishift = bs2*a->i[i];
2004     for ( j=0; j<bs; j++ ) {
2005       kmax = rowlengths[i*bs+j];
2006       for ( k=0; k<kmax; k++ ) {
2007         tmp    = jj[nzcountb]/bs ;
2008         block  = mask[tmp] - 1;
2009         point  = jj[nzcountb] - bs*tmp;
2010         idx    = ishift + bs2*block + j + bs*point;
2011         a->a[idx] = aa[nzcountb++];
2012       }
2013     }
2014     /* zero out the mask elements we set */
2015     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2016   }
2017   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
2018 
2019   PetscFree(rowlengths);
2020   PetscFree(browlengths);
2021   PetscFree(aa);
2022   PetscFree(jj);
2023   PetscFree(mask);
2024 
2025   B->assembled = PETSC_TRUE;
2026 
2027   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2028   if (flg) {
2029     Viewer tviewer;
2030     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2031     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
2032     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2033     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2034   }
2035   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2036   if (flg) {
2037     Viewer tviewer;
2038     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2039     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
2040     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2041     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2042   }
2043   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2044   if (flg) {
2045     Viewer tviewer;
2046     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2047     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2048     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2049   }
2050   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2051   if (flg) {
2052     Viewer tviewer;
2053     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2054     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
2055     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2056     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2057   }
2058   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2059   if (flg) {
2060     Viewer tviewer;
2061     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
2062     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
2063     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
2064     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2065   }
2066   return 0;
2067 }
2068 
2069 
2070 
2071